SkBlitRow_opts_arm_neon.cpp revision 3f55eed73f5af405909c2c10bff179d80526d423
1a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com/* 2a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com * Copyright 2012 The Android Open Source Project 3a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com * 4a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com * Use of this source code is governed by a BSD-style license that can be 5a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com * found in the LICENSE file. 6a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com */ 7a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 8a111e492b84c312a6bd5d5d9ef100dca48f4941ddjsollen@google.com#include "SkBlitRow_opts_arm_neon.h" 9a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 10a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#include "SkBlitMask.h" 11a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#include "SkBlitRow.h" 12a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#include "SkColorPriv.h" 13a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#include "SkDither.h" 14a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#include "SkMathPriv.h" 15a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#include "SkUtils.h" 16a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 170060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org#include "SkColor_opts_neon.h" 18a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#include <arm_neon.h> 19a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 20ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#ifdef SK_CPU_ARM64 21ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petitstatic inline uint8x8x4_t sk_vld4_u8_arm64_3(const SkPMColor* SK_RESTRICT & src) { 22ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit uint8x8x4_t vsrc; 23ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit uint8x8_t vsrc_0, vsrc_1, vsrc_2; 24ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit 25ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit asm ( 26ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit "ld4 {v0.8b - v3.8b}, [%[src]], #32 \t\n" 27ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit "mov %[vsrc0].8b, v0.8b \t\n" 28ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit "mov %[vsrc1].8b, v1.8b \t\n" 29ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit "mov %[vsrc2].8b, v2.8b \t\n" 30ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit : [vsrc0] "=w" (vsrc_0), [vsrc1] "=w" (vsrc_1), 31ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit [vsrc2] "=w" (vsrc_2), [src] "+&r" (src) 32ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit : : "v0", "v1", "v2", "v3" 33ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit ); 34ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit 35ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[0] = vsrc_0; 36ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[1] = vsrc_1; 37ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[2] = vsrc_2; 38ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit 39ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit return vsrc; 40ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit} 41ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit 42ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petitstatic inline uint8x8x4_t sk_vld4_u8_arm64_4(const SkPMColor* SK_RESTRICT & src) { 43ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit uint8x8x4_t vsrc; 44ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit uint8x8_t vsrc_0, vsrc_1, vsrc_2, vsrc_3; 45ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit 46ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit asm ( 47ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit "ld4 {v0.8b - v3.8b}, [%[src]], #32 \t\n" 48ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit "mov %[vsrc0].8b, v0.8b \t\n" 49ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit "mov %[vsrc1].8b, v1.8b \t\n" 50ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit "mov %[vsrc2].8b, v2.8b \t\n" 51ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit "mov %[vsrc3].8b, v3.8b \t\n" 52ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit : [vsrc0] "=w" (vsrc_0), [vsrc1] "=w" (vsrc_1), 53ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit [vsrc2] "=w" (vsrc_2), [vsrc3] "=w" (vsrc_3), 54ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit [src] "+&r" (src) 55ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit : : "v0", "v1", "v2", "v3" 56ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit ); 57ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit 58ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[0] = vsrc_0; 59ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[1] = vsrc_1; 60ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[2] = vsrc_2; 61ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[3] = vsrc_3; 62ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit 63ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit return vsrc; 64ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit} 65ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#endif 66ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit 670060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.orgvoid S32_D565_Opaque_neon(uint16_t* SK_RESTRICT dst, 680060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org const SkPMColor* SK_RESTRICT src, int count, 690060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org U8CPU alpha, int /*x*/, int /*y*/) { 700060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org SkASSERT(255 == alpha); 710060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org 720060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org while (count >= 8) { 730060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org uint8x8x4_t vsrc; 740060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org uint16x8_t vdst; 750060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org 760060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org // Load 77ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#ifdef SK_CPU_ARM64 78ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc = sk_vld4_u8_arm64_3(src); 79ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#else 800060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org vsrc = vld4_u8((uint8_t*)src); 81ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit src += 8; 82ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#endif 830060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org 840060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org // Convert src to 565 85bc25dfc798fff225ce65355ecda19d2b85bd0e74commit-bot@chromium.org vdst = SkPixel32ToPixel16_neon8(vsrc); 860060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org 870060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org // Store 880060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org vst1q_u16(dst, vdst); 890060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org 900060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org // Prepare next iteration 910060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org dst += 8; 920060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org count -= 8; 930060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org }; 940060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org 950060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org // Leftovers 960060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org while (count > 0) { 970060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org SkPMColor c = *src++; 980060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org SkPMColorAssert(c); 990060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org *dst = SkPixel32ToPixel16_ToU16(c); 1000060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org dst++; 1010060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org count--; 1020060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org }; 1030060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org} 1040060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org 10595c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.orgvoid S32_D565_Blend_neon(uint16_t* SK_RESTRICT dst, 10695c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org const SkPMColor* SK_RESTRICT src, int count, 10795c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org U8CPU alpha, int /*x*/, int /*y*/) { 10895c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org SkASSERT(255 > alpha); 10995c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 11095c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org uint16x8_t vmask_blue, vscale; 11195c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 11295c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org // prepare constants 11395c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vscale = vdupq_n_u16(SkAlpha255To256(alpha)); 11495c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vmask_blue = vmovq_n_u16(0x1F); 11595c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 11695c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org while (count >= 8) { 117ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit uint8x8x4_t vsrc; 11895c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org uint16x8_t vdst, vdst_r, vdst_g, vdst_b; 11995c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org uint16x8_t vres_r, vres_g, vres_b; 12095c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 12195c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org // Load src 122ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#ifdef SK_CPU_ARM64 123ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc = sk_vld4_u8_arm64_3(src); 124ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#else 12595c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org { 12695c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org register uint8x8_t d0 asm("d0"); 12795c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org register uint8x8_t d1 asm("d1"); 12895c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org register uint8x8_t d2 asm("d2"); 12995c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org register uint8x8_t d3 asm("d3"); 13095c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 13195c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org asm ( 13295c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org "vld4.8 {d0-d3},[%[src]]!" 13395c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3), [src] "+&r" (src) 13495c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org : 13595c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org ); 136ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[0] = d0; 137ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[1] = d1; 138ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[2] = d2; 13995c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org } 140ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#endif 14195c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 14295c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org // Load and unpack dst 14395c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vdst = vld1q_u16(dst); 14495c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vdst_g = vshlq_n_u16(vdst, 5); // shift green to top of lanes 14595c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vdst_b = vandq_u16(vdst, vmask_blue); // extract blue 14695c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vdst_r = vshrq_n_u16(vdst, 6+5); // extract red 14795c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vdst_g = vshrq_n_u16(vdst_g, 5+5); // extract green 14895c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 149ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit // Shift src to 565 range 150ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[NEON_R] = vshr_n_u8(vsrc.val[NEON_R], 3); 151ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[NEON_G] = vshr_n_u8(vsrc.val[NEON_G], 2); 152ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[NEON_B] = vshr_n_u8(vsrc.val[NEON_B], 3); 15395c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 15495c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org // Scale src - dst 155ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vres_r = vmovl_u8(vsrc.val[NEON_R]) - vdst_r; 156ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vres_g = vmovl_u8(vsrc.val[NEON_G]) - vdst_g; 157ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vres_b = vmovl_u8(vsrc.val[NEON_B]) - vdst_b; 15895c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 15995c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vres_r = vshrq_n_u16(vres_r * vscale, 8); 16095c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vres_g = vshrq_n_u16(vres_g * vscale, 8); 16195c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vres_b = vshrq_n_u16(vres_b * vscale, 8); 16295c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 16395c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vres_r += vdst_r; 16495c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vres_g += vdst_g; 16595c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vres_b += vdst_b; 16695c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 16795c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org // Combine 16895c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vres_b = vsliq_n_u16(vres_b, vres_g, 5); // insert green into blue 16995c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vres_b = vsliq_n_u16(vres_b, vres_r, 6+5); // insert red into green/blue 17095c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 17195c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org // Store 17295c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vst1q_u16(dst, vres_b); 17395c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org dst += 8; 17495c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org count -= 8; 17595c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org } 17695c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org if (count > 0) { 17795c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org int scale = SkAlpha255To256(alpha); 17895c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org do { 17995c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org SkPMColor c = *src++; 18095c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org SkPMColorAssert(c); 18195c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org uint16_t d = *dst; 18295c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org *dst++ = SkPackRGB16( 18395c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org SkAlphaBlend(SkPacked32ToR16(c), SkGetPackedR16(d), scale), 18495c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org SkAlphaBlend(SkPacked32ToG16(c), SkGetPackedG16(d), scale), 18595c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org SkAlphaBlend(SkPacked32ToB16(c), SkGetPackedB16(d), scale)); 18695c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org } while (--count != 0); 18795c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org } 18895c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org} 18995c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 190ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#ifdef SK_CPU_ARM32 191a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comvoid S32A_D565_Opaque_neon(uint16_t* SK_RESTRICT dst, 192a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com const SkPMColor* SK_RESTRICT src, int count, 193a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com U8CPU alpha, int /*x*/, int /*y*/) { 194a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkASSERT(255 == alpha); 195a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 196a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com if (count >= 8) { 197efbe8e9bedda21a3e061ebf3d96431a0f250a654djsollen@google.com uint16_t* SK_RESTRICT keep_dst = 0; 198fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 199a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com asm volatile ( 200a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "ands ip, %[count], #7 \n\t" 201a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmov.u8 d31, #1<<7 \n\t" 202a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vld1.16 {q12}, [%[dst]] \n\t" 203a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vld4.8 {d0-d3}, [%[src]] \n\t" 204a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // Thumb does not support the standard ARM conditional 205a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // instructions but instead requires the 'it' instruction 206a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // to signal conditional execution 207a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "it eq \n\t" 208a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "moveq ip, #8 \n\t" 209a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "mov %[keep_dst], %[dst] \n\t" 210fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 211a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "add %[src], %[src], ip, LSL#2 \n\t" 212a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "add %[dst], %[dst], ip, LSL#1 \n\t" 213a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "subs %[count], %[count], ip \n\t" 214a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "b 9f \n\t" 215a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // LOOP 216a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "2: \n\t" 217fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 218a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vld1.16 {q12}, [%[dst]]! \n\t" 219a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vld4.8 {d0-d3}, [%[src]]! \n\t" 220a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vst1.16 {q10}, [%[keep_dst]] \n\t" 221a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "sub %[keep_dst], %[dst], #8*2 \n\t" 222a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "subs %[count], %[count], #8 \n\t" 223a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "9: \n\t" 224a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "pld [%[dst],#32] \n\t" 225a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // expand 0565 q12 to 8888 {d4-d7} 226a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmovn.u16 d4, q12 \n\t" 227a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshr.u16 q11, q12, #5 \n\t" 228a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshr.u16 q10, q12, #6+5 \n\t" 229a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmovn.u16 d5, q11 \n\t" 230a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmovn.u16 d6, q10 \n\t" 231a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshl.u8 d4, d4, #3 \n\t" 232a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshl.u8 d5, d5, #2 \n\t" 233a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshl.u8 d6, d6, #3 \n\t" 234fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 235a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmovl.u8 q14, d31 \n\t" 236a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmovl.u8 q13, d31 \n\t" 237a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmovl.u8 q12, d31 \n\t" 238fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 239a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // duplicate in 4/2/1 & 8pix vsns 240a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmvn.8 d30, d3 \n\t" 241a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmlal.u8 q14, d30, d6 \n\t" 242a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmlal.u8 q13, d30, d5 \n\t" 243a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmlal.u8 q12, d30, d4 \n\t" 244a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshr.u16 q8, q14, #5 \n\t" 245a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshr.u16 q9, q13, #6 \n\t" 246a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vaddhn.u16 d6, q14, q8 \n\t" 247a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshr.u16 q8, q12, #5 \n\t" 248a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vaddhn.u16 d5, q13, q9 \n\t" 249a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vaddhn.u16 d4, q12, q8 \n\t" 250a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // intentionally don't calculate alpha 251a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // result in d4-d6 252fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 2533f55eed73f5af405909c2c10bff179d80526d423Mike Klein #ifdef SK_PMCOLOR_IS_RGBA 2543f55eed73f5af405909c2c10bff179d80526d423Mike Klein "vqadd.u8 d6, d6, d0 \n\t" 255a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vqadd.u8 d5, d5, d1 \n\t" 256a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vqadd.u8 d4, d4, d2 \n\t" 2573f55eed73f5af405909c2c10bff179d80526d423Mike Klein #else 2583f55eed73f5af405909c2c10bff179d80526d423Mike Klein "vqadd.u8 d6, d6, d2 \n\t" 2593f55eed73f5af405909c2c10bff179d80526d423Mike Klein "vqadd.u8 d5, d5, d1 \n\t" 2603f55eed73f5af405909c2c10bff179d80526d423Mike Klein "vqadd.u8 d4, d4, d0 \n\t" 2613f55eed73f5af405909c2c10bff179d80526d423Mike Klein #endif 262fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 263a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // pack 8888 {d4-d6} to 0565 q10 264a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshll.u8 q10, d6, #8 \n\t" 265a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshll.u8 q3, d5, #8 \n\t" 266a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshll.u8 q2, d4, #8 \n\t" 267a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vsri.u16 q10, q3, #5 \n\t" 268a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vsri.u16 q10, q2, #11 \n\t" 269fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 270a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "bne 2b \n\t" 271fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 272a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "1: \n\t" 273a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vst1.16 {q10}, [%[keep_dst]] \n\t" 274a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com : [count] "+r" (count) 275fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com : [dst] "r" (dst), [keep_dst] "r" (keep_dst), [src] "r" (src) 276a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com : "ip", "cc", "memory", "d0","d1","d2","d3","d4","d5","d6","d7", 277a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "d16","d17","d18","d19","d20","d21","d22","d23","d24","d25","d26","d27","d28","d29", 278a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "d30","d31" 279a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com ); 280a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 281fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com else 282a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com { // handle count < 8 283efbe8e9bedda21a3e061ebf3d96431a0f250a654djsollen@google.com uint16_t* SK_RESTRICT keep_dst = 0; 284fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 285a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com asm volatile ( 286a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmov.u8 d31, #1<<7 \n\t" 287a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "mov %[keep_dst], %[dst] \n\t" 288fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 289a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "tst %[count], #4 \n\t" 290a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "beq 14f \n\t" 291a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vld1.16 {d25}, [%[dst]]! \n\t" 292a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vld1.32 {q1}, [%[src]]! \n\t" 293fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 294a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "14: \n\t" 295a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "tst %[count], #2 \n\t" 296a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "beq 12f \n\t" 297a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vld1.32 {d24[1]}, [%[dst]]! \n\t" 298a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vld1.32 {d1}, [%[src]]! \n\t" 299fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 300a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "12: \n\t" 301a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "tst %[count], #1 \n\t" 302a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "beq 11f \n\t" 303a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vld1.16 {d24[1]}, [%[dst]]! \n\t" 304a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vld1.32 {d0[1]}, [%[src]]! \n\t" 305fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 306a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "11: \n\t" 307a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // unzips achieve the same as a vld4 operation 308ea13afff6e46d8a969611cdd56c996bfb05a27c1thakis "vuzp.u16 q0, q1 \n\t" 309a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vuzp.u8 d0, d1 \n\t" 310a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vuzp.u8 d2, d3 \n\t" 311a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // expand 0565 q12 to 8888 {d4-d7} 312a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmovn.u16 d4, q12 \n\t" 313a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshr.u16 q11, q12, #5 \n\t" 314a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshr.u16 q10, q12, #6+5 \n\t" 315a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmovn.u16 d5, q11 \n\t" 316a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmovn.u16 d6, q10 \n\t" 317a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshl.u8 d4, d4, #3 \n\t" 318a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshl.u8 d5, d5, #2 \n\t" 319a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshl.u8 d6, d6, #3 \n\t" 320fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 321a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmovl.u8 q14, d31 \n\t" 322a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmovl.u8 q13, d31 \n\t" 323a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmovl.u8 q12, d31 \n\t" 324fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 325a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // duplicate in 4/2/1 & 8pix vsns 326a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmvn.8 d30, d3 \n\t" 327a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmlal.u8 q14, d30, d6 \n\t" 328a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmlal.u8 q13, d30, d5 \n\t" 329a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmlal.u8 q12, d30, d4 \n\t" 330a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshr.u16 q8, q14, #5 \n\t" 331a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshr.u16 q9, q13, #6 \n\t" 332a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vaddhn.u16 d6, q14, q8 \n\t" 333a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshr.u16 q8, q12, #5 \n\t" 334a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vaddhn.u16 d5, q13, q9 \n\t" 335a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vaddhn.u16 d4, q12, q8 \n\t" 336a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // intentionally don't calculate alpha 337a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // result in d4-d6 338fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 3393f55eed73f5af405909c2c10bff179d80526d423Mike Klein #ifdef SK_PMCOLOR_IS_RGBA 3403f55eed73f5af405909c2c10bff179d80526d423Mike Klein "vqadd.u8 d6, d6, d0 \n\t" 341a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vqadd.u8 d5, d5, d1 \n\t" 342a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vqadd.u8 d4, d4, d2 \n\t" 3433f55eed73f5af405909c2c10bff179d80526d423Mike Klein #else 3443f55eed73f5af405909c2c10bff179d80526d423Mike Klein "vqadd.u8 d6, d6, d2 \n\t" 3453f55eed73f5af405909c2c10bff179d80526d423Mike Klein "vqadd.u8 d5, d5, d1 \n\t" 3463f55eed73f5af405909c2c10bff179d80526d423Mike Klein "vqadd.u8 d4, d4, d0 \n\t" 3473f55eed73f5af405909c2c10bff179d80526d423Mike Klein #endif 348fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 349a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // pack 8888 {d4-d6} to 0565 q10 350a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshll.u8 q10, d6, #8 \n\t" 351a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshll.u8 q3, d5, #8 \n\t" 352a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshll.u8 q2, d4, #8 \n\t" 353a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vsri.u16 q10, q3, #5 \n\t" 354a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vsri.u16 q10, q2, #11 \n\t" 355fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 356a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // store 357a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "tst %[count], #4 \n\t" 358a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "beq 24f \n\t" 359a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vst1.16 {d21}, [%[keep_dst]]! \n\t" 360fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 361a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "24: \n\t" 362a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "tst %[count], #2 \n\t" 363a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "beq 22f \n\t" 364a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vst1.32 {d20[1]}, [%[keep_dst]]! \n\t" 365fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 366a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "22: \n\t" 367a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "tst %[count], #1 \n\t" 368a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "beq 21f \n\t" 369a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vst1.16 {d20[1]}, [%[keep_dst]]! \n\t" 370fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 371a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "21: \n\t" 372a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com : [count] "+r" (count) 373a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com : [dst] "r" (dst), [keep_dst] "r" (keep_dst), [src] "r" (src) 374a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com : "ip", "cc", "memory", "d0","d1","d2","d3","d4","d5","d6","d7", 375a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "d16","d17","d18","d19","d20","d21","d22","d23","d24","d25","d26","d27","d28","d29", 376a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "d30","d31" 377a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com ); 378a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 379a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com} 3800d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit 3810d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit#else // #ifdef SK_CPU_ARM32 3820d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit 3830d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petitvoid S32A_D565_Opaque_neon(uint16_t* SK_RESTRICT dst, 3840d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit const SkPMColor* SK_RESTRICT src, int count, 3850d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit U8CPU alpha, int /*x*/, int /*y*/) { 3860d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit SkASSERT(255 == alpha); 3870d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit 3880d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit if (count >= 16) { 3890d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit asm ( 3900d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "movi v4.8h, #0x80 \t\n" 3910d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit 3920d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "1: \t\n" 3930d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "sub %[count], %[count], #16 \t\n" 3940d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "ld1 {v16.8h-v17.8h}, [%[dst]] \t\n" 3950d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "ld4 {v0.16b-v3.16b}, [%[src]], #64 \t\n" 3960d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "prfm pldl1keep, [%[src],#512] \t\n" 3970d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "prfm pldl1keep, [%[dst],#256] \t\n" 3980d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "ushr v20.8h, v17.8h, #5 \t\n" 3990d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "ushr v31.8h, v16.8h, #5 \t\n" 4000d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "xtn v6.8b, v31.8h \t\n" 4010d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "xtn2 v6.16b, v20.8h \t\n" 4020d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "ushr v20.8h, v17.8h, #11 \t\n" 4030d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "shl v19.16b, v6.16b, #2 \t\n" 4040d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "ushr v31.8h, v16.8h, #11 \t\n" 4050d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "xtn v22.8b, v31.8h \t\n" 4060d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "xtn2 v22.16b, v20.8h \t\n" 4070d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "shl v18.16b, v22.16b, #3 \t\n" 4080d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "mvn v3.16b, v3.16b \t\n" 4090d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "xtn v16.8b, v16.8h \t\n" 4100d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "mov v7.16b, v4.16b \t\n" 4110d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "xtn2 v16.16b, v17.8h \t\n" 4120d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "umlal v7.8h, v3.8b, v19.8b \t\n" 4130d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "shl v16.16b, v16.16b, #3 \t\n" 4140d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "mov v22.16b, v4.16b \t\n" 4150d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "ushr v24.8h, v7.8h, #6 \t\n" 4160d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "umlal v22.8h, v3.8b, v18.8b \t\n" 4170d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "ushr v20.8h, v22.8h, #5 \t\n" 4180d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "addhn v20.8b, v22.8h, v20.8h \t\n" 4190d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "cmp %[count], #16 \t\n" 4200d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "mov v6.16b, v4.16b \t\n" 4210d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "mov v5.16b, v4.16b \t\n" 4220d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "umlal v6.8h, v3.8b, v16.8b \t\n" 4230d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "umlal2 v5.8h, v3.16b, v19.16b \t\n" 4240d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "mov v17.16b, v4.16b \t\n" 4250d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "ushr v19.8h, v6.8h, #5 \t\n" 4260d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "umlal2 v17.8h, v3.16b, v18.16b \t\n" 4270d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "addhn v7.8b, v7.8h, v24.8h \t\n" 4280d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "ushr v18.8h, v5.8h, #6 \t\n" 4290d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "ushr v21.8h, v17.8h, #5 \t\n" 4300d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "addhn2 v7.16b, v5.8h, v18.8h \t\n" 4310d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "addhn2 v20.16b, v17.8h, v21.8h \t\n" 4320d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "mov v22.16b, v4.16b \t\n" 4330d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "addhn v6.8b, v6.8h, v19.8h \t\n" 4340d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "umlal2 v22.8h, v3.16b, v16.16b \t\n" 4350d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "ushr v5.8h, v22.8h, #5 \t\n" 4360d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "addhn2 v6.16b, v22.8h, v5.8h \t\n" 4370d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "uqadd v7.16b, v1.16b, v7.16b \t\n" 4380be677d35c7ef1bf8a7a694d1838fa11333d1beckevin.petit#if SK_PMCOLOR_BYTE_ORDER(B,G,R,A) 4390d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "uqadd v20.16b, v2.16b, v20.16b \t\n" 4400d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "uqadd v6.16b, v0.16b, v6.16b \t\n" 4410be677d35c7ef1bf8a7a694d1838fa11333d1beckevin.petit#elif SK_PMCOLOR_BYTE_ORDER(R,G,B,A) 4420be677d35c7ef1bf8a7a694d1838fa11333d1beckevin.petit "uqadd v20.16b, v0.16b, v20.16b \t\n" 4430be677d35c7ef1bf8a7a694d1838fa11333d1beckevin.petit "uqadd v6.16b, v2.16b, v6.16b \t\n" 4440be677d35c7ef1bf8a7a694d1838fa11333d1beckevin.petit#else 4450be677d35c7ef1bf8a7a694d1838fa11333d1beckevin.petit#error "This function only supports BGRA and RGBA." 4460be677d35c7ef1bf8a7a694d1838fa11333d1beckevin.petit#endif 4470d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "shll v22.8h, v20.8b, #8 \t\n" 4480d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "shll v5.8h, v7.8b, #8 \t\n" 4490d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "sri v22.8h, v5.8h, #5 \t\n" 4500d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "shll v17.8h, v6.8b, #8 \t\n" 4510d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "shll2 v23.8h, v20.16b, #8 \t\n" 4520d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "shll2 v7.8h, v7.16b, #8 \t\n" 4530d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "sri v22.8h, v17.8h, #11 \t\n" 4540d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "sri v23.8h, v7.8h, #5 \t\n" 4550d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "shll2 v6.8h, v6.16b, #8 \t\n" 4560d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "st1 {v22.8h}, [%[dst]], #16 \t\n" 4570d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "sri v23.8h, v6.8h, #11 \t\n" 4580d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "st1 {v23.8h}, [%[dst]], #16 \t\n" 4590d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "b.ge 1b \t\n" 4600d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit : [dst] "+&r" (dst), [src] "+&r" (src), [count] "+&r" (count) 4610d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit :: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", 4620d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", 4630d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "v31" 4640d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit ); 4650d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit } 4660d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit // Leftovers 4670d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit if (count > 0) { 4680d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit do { 4690d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit SkPMColor c = *src++; 4700d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit SkPMColorAssert(c); 4710d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit if (c) { 4720d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit *dst = SkSrcOver32To16(c, *dst); 4730d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit } 4740d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit dst += 1; 4750d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit } while (--count != 0); 4760d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit } 4770d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit} 4780d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit#endif // #ifdef SK_CPU_ARM32 479a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 480402448d6818cab9d7b7633a0c18fcf574c915357mleestatic uint32_t pmcolor_to_expand16(SkPMColor c) { 481402448d6818cab9d7b7633a0c18fcf574c915357mlee unsigned r = SkGetPackedR32(c); 482402448d6818cab9d7b7633a0c18fcf574c915357mlee unsigned g = SkGetPackedG32(c); 483402448d6818cab9d7b7633a0c18fcf574c915357mlee unsigned b = SkGetPackedB32(c); 484402448d6818cab9d7b7633a0c18fcf574c915357mlee return (g << 24) | (r << 13) | (b << 2); 485402448d6818cab9d7b7633a0c18fcf574c915357mlee} 486402448d6818cab9d7b7633a0c18fcf574c915357mlee 487402448d6818cab9d7b7633a0c18fcf574c915357mleevoid Color32A_D565_neon(uint16_t dst[], SkPMColor src, int count, int x, int y) { 488402448d6818cab9d7b7633a0c18fcf574c915357mlee uint32_t src_expand; 489402448d6818cab9d7b7633a0c18fcf574c915357mlee unsigned scale; 490402448d6818cab9d7b7633a0c18fcf574c915357mlee uint16x8_t vmask_blue; 491402448d6818cab9d7b7633a0c18fcf574c915357mlee 492402448d6818cab9d7b7633a0c18fcf574c915357mlee if (count <= 0) return; 493402448d6818cab9d7b7633a0c18fcf574c915357mlee SkASSERT(((size_t)dst & 0x01) == 0); 494402448d6818cab9d7b7633a0c18fcf574c915357mlee 495402448d6818cab9d7b7633a0c18fcf574c915357mlee /* 496402448d6818cab9d7b7633a0c18fcf574c915357mlee * This preamble code is in order to make dst aligned to 8 bytes 497402448d6818cab9d7b7633a0c18fcf574c915357mlee * in the next mutiple bytes read & write access. 498402448d6818cab9d7b7633a0c18fcf574c915357mlee */ 499402448d6818cab9d7b7633a0c18fcf574c915357mlee src_expand = pmcolor_to_expand16(src); 500402448d6818cab9d7b7633a0c18fcf574c915357mlee scale = SkAlpha255To256(0xFF - SkGetPackedA32(src)) >> 3; 501402448d6818cab9d7b7633a0c18fcf574c915357mlee 502402448d6818cab9d7b7633a0c18fcf574c915357mlee#define DST_ALIGN 8 503402448d6818cab9d7b7633a0c18fcf574c915357mlee 504402448d6818cab9d7b7633a0c18fcf574c915357mlee /* 505402448d6818cab9d7b7633a0c18fcf574c915357mlee * preamble_size is in byte, meantime, this blend32_16_row_neon updates 2 bytes at a time. 506402448d6818cab9d7b7633a0c18fcf574c915357mlee */ 507402448d6818cab9d7b7633a0c18fcf574c915357mlee int preamble_size = (DST_ALIGN - (size_t)dst) & (DST_ALIGN - 1); 508402448d6818cab9d7b7633a0c18fcf574c915357mlee 509402448d6818cab9d7b7633a0c18fcf574c915357mlee for (int i = 0; i < preamble_size; i+=2, dst++) { 510402448d6818cab9d7b7633a0c18fcf574c915357mlee uint32_t dst_expand = SkExpand_rgb_16(*dst) * scale; 511402448d6818cab9d7b7633a0c18fcf574c915357mlee *dst = SkCompact_rgb_16((src_expand + dst_expand) >> 5); 512402448d6818cab9d7b7633a0c18fcf574c915357mlee if (--count == 0) 513402448d6818cab9d7b7633a0c18fcf574c915357mlee break; 514402448d6818cab9d7b7633a0c18fcf574c915357mlee } 515402448d6818cab9d7b7633a0c18fcf574c915357mlee 516402448d6818cab9d7b7633a0c18fcf574c915357mlee int count16 = 0; 517402448d6818cab9d7b7633a0c18fcf574c915357mlee count16 = count >> 4; 518402448d6818cab9d7b7633a0c18fcf574c915357mlee vmask_blue = vmovq_n_u16(SK_B16_MASK); 519402448d6818cab9d7b7633a0c18fcf574c915357mlee 520402448d6818cab9d7b7633a0c18fcf574c915357mlee if (count16) { 521402448d6818cab9d7b7633a0c18fcf574c915357mlee uint16x8_t wide_sr; 522402448d6818cab9d7b7633a0c18fcf574c915357mlee uint16x8_t wide_sg; 523402448d6818cab9d7b7633a0c18fcf574c915357mlee uint16x8_t wide_sb; 524402448d6818cab9d7b7633a0c18fcf574c915357mlee uint16x8_t wide_256_sa; 525402448d6818cab9d7b7633a0c18fcf574c915357mlee 526402448d6818cab9d7b7633a0c18fcf574c915357mlee unsigned sr = SkGetPackedR32(src); 527402448d6818cab9d7b7633a0c18fcf574c915357mlee unsigned sg = SkGetPackedG32(src); 528402448d6818cab9d7b7633a0c18fcf574c915357mlee unsigned sb = SkGetPackedB32(src); 529402448d6818cab9d7b7633a0c18fcf574c915357mlee unsigned sa = SkGetPackedA32(src); 530402448d6818cab9d7b7633a0c18fcf574c915357mlee 531402448d6818cab9d7b7633a0c18fcf574c915357mlee // Operation: dst_rgb = src_rgb + ((256 - src_a) >> 3) x dst_rgb 532402448d6818cab9d7b7633a0c18fcf574c915357mlee // sr: 8-bit based, dr: 5-bit based, with dr x ((256-sa)>>3), 5-bit left shifted, 533402448d6818cab9d7b7633a0c18fcf574c915357mlee //thus, for sr, do 2-bit left shift to match MSB : (8 + 2 = 5 + 5) 534402448d6818cab9d7b7633a0c18fcf574c915357mlee wide_sr = vshlq_n_u16(vmovl_u8(vdup_n_u8(sr)), 2); // widen and src_red shift 535402448d6818cab9d7b7633a0c18fcf574c915357mlee 536402448d6818cab9d7b7633a0c18fcf574c915357mlee // sg: 8-bit based, dg: 6-bit based, with dg x ((256-sa)>>3), 5-bit left shifted, 537402448d6818cab9d7b7633a0c18fcf574c915357mlee //thus, for sg, do 3-bit left shift to match MSB : (8 + 3 = 6 + 5) 538402448d6818cab9d7b7633a0c18fcf574c915357mlee wide_sg = vshlq_n_u16(vmovl_u8(vdup_n_u8(sg)), 3); // widen and src_grn shift 539402448d6818cab9d7b7633a0c18fcf574c915357mlee 540402448d6818cab9d7b7633a0c18fcf574c915357mlee // sb: 8-bit based, db: 5-bit based, with db x ((256-sa)>>3), 5-bit left shifted, 541402448d6818cab9d7b7633a0c18fcf574c915357mlee //thus, for sb, do 2-bit left shift to match MSB : (8 + 2 = 5 + 5) 542402448d6818cab9d7b7633a0c18fcf574c915357mlee wide_sb = vshlq_n_u16(vmovl_u8(vdup_n_u8(sb)), 2); // widen and src blu shift 543402448d6818cab9d7b7633a0c18fcf574c915357mlee 544402448d6818cab9d7b7633a0c18fcf574c915357mlee wide_256_sa = 545402448d6818cab9d7b7633a0c18fcf574c915357mlee vshrq_n_u16(vsubw_u8(vdupq_n_u16(256), vdup_n_u8(sa)), 3); // (256 - sa) >> 3 546402448d6818cab9d7b7633a0c18fcf574c915357mlee 547402448d6818cab9d7b7633a0c18fcf574c915357mlee while (count16-- > 0) { 548402448d6818cab9d7b7633a0c18fcf574c915357mlee uint16x8_t vdst1, vdst1_r, vdst1_g, vdst1_b; 549402448d6818cab9d7b7633a0c18fcf574c915357mlee uint16x8_t vdst2, vdst2_r, vdst2_g, vdst2_b; 550402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst1 = vld1q_u16(dst); 551402448d6818cab9d7b7633a0c18fcf574c915357mlee dst += 8; 552402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst2 = vld1q_u16(dst); 553402448d6818cab9d7b7633a0c18fcf574c915357mlee dst -= 8; //to store dst again. 554402448d6818cab9d7b7633a0c18fcf574c915357mlee 555402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst1_g = vshlq_n_u16(vdst1, SK_R16_BITS); // shift green to top of lanes 556402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst1_b = vdst1 & vmask_blue; // extract blue 557402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst1_r = vshrq_n_u16(vdst1, SK_R16_SHIFT); // extract red 558402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst1_g = vshrq_n_u16(vdst1_g, SK_R16_BITS + SK_B16_BITS); // extract green 559402448d6818cab9d7b7633a0c18fcf574c915357mlee 560402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst2_g = vshlq_n_u16(vdst2, SK_R16_BITS); // shift green to top of lanes 561402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst2_b = vdst2 & vmask_blue; // extract blue 562402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst2_r = vshrq_n_u16(vdst2, SK_R16_SHIFT); // extract red 563402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst2_g = vshrq_n_u16(vdst2_g, SK_R16_BITS + SK_B16_BITS); // extract green 564402448d6818cab9d7b7633a0c18fcf574c915357mlee 565402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst1_r = vmlaq_u16(wide_sr, wide_256_sa, vdst1_r); // sr + (256-sa) x dr1 566402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst1_g = vmlaq_u16(wide_sg, wide_256_sa, vdst1_g); // sg + (256-sa) x dg1 567402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst1_b = vmlaq_u16(wide_sb, wide_256_sa, vdst1_b); // sb + (256-sa) x db1 568402448d6818cab9d7b7633a0c18fcf574c915357mlee 569402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst2_r = vmlaq_u16(wide_sr, wide_256_sa, vdst2_r); // sr + (256-sa) x dr2 570402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst2_g = vmlaq_u16(wide_sg, wide_256_sa, vdst2_g); // sg + (256-sa) x dg2 571402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst2_b = vmlaq_u16(wide_sb, wide_256_sa, vdst2_b); // sb + (256-sa) x db2 572402448d6818cab9d7b7633a0c18fcf574c915357mlee 573402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst1_r = vshrq_n_u16(vdst1_r, 5); // 5-bit right shift for 5-bit red 574402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst1_g = vshrq_n_u16(vdst1_g, 5); // 5-bit right shift for 6-bit green 575402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst1_b = vshrq_n_u16(vdst1_b, 5); // 5-bit right shift for 5-bit blue 576402448d6818cab9d7b7633a0c18fcf574c915357mlee 577402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst1 = vsliq_n_u16(vdst1_b, vdst1_g, SK_G16_SHIFT); // insert green into blue 578402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst1 = vsliq_n_u16(vdst1, vdst1_r, SK_R16_SHIFT); // insert red into green/blue 579402448d6818cab9d7b7633a0c18fcf574c915357mlee 580402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst2_r = vshrq_n_u16(vdst2_r, 5); // 5-bit right shift for 5-bit red 581402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst2_g = vshrq_n_u16(vdst2_g, 5); // 5-bit right shift for 6-bit green 582402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst2_b = vshrq_n_u16(vdst2_b, 5); // 5-bit right shift for 5-bit blue 583402448d6818cab9d7b7633a0c18fcf574c915357mlee 584402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst2 = vsliq_n_u16(vdst2_b, vdst2_g, SK_G16_SHIFT); // insert green into blue 585402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst2 = vsliq_n_u16(vdst2, vdst2_r, SK_R16_SHIFT); // insert red into green/blue 586402448d6818cab9d7b7633a0c18fcf574c915357mlee 587402448d6818cab9d7b7633a0c18fcf574c915357mlee vst1q_u16(dst, vdst1); 588402448d6818cab9d7b7633a0c18fcf574c915357mlee dst += 8; 589402448d6818cab9d7b7633a0c18fcf574c915357mlee vst1q_u16(dst, vdst2); 590402448d6818cab9d7b7633a0c18fcf574c915357mlee dst += 8; 591402448d6818cab9d7b7633a0c18fcf574c915357mlee } 592402448d6818cab9d7b7633a0c18fcf574c915357mlee } 593402448d6818cab9d7b7633a0c18fcf574c915357mlee 594402448d6818cab9d7b7633a0c18fcf574c915357mlee count &= 0xF; 595402448d6818cab9d7b7633a0c18fcf574c915357mlee if (count > 0) { 596402448d6818cab9d7b7633a0c18fcf574c915357mlee do { 597402448d6818cab9d7b7633a0c18fcf574c915357mlee uint32_t dst_expand = SkExpand_rgb_16(*dst) * scale; 598402448d6818cab9d7b7633a0c18fcf574c915357mlee *dst = SkCompact_rgb_16((src_expand + dst_expand) >> 5); 599402448d6818cab9d7b7633a0c18fcf574c915357mlee dst += 1; 600402448d6818cab9d7b7633a0c18fcf574c915357mlee } while (--count != 0); 601402448d6818cab9d7b7633a0c18fcf574c915357mlee } 602402448d6818cab9d7b7633a0c18fcf574c915357mlee} 603402448d6818cab9d7b7633a0c18fcf574c915357mlee 604be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.orgstatic inline uint16x8_t SkDiv255Round_neon8(uint16x8_t prod) { 605be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org prod += vdupq_n_u16(128); 606be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org prod += vshrq_n_u16(prod, 8); 607be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org return vshrq_n_u16(prod, 8); 608be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org} 609be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org 610a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comvoid S32A_D565_Blend_neon(uint16_t* SK_RESTRICT dst, 611a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com const SkPMColor* SK_RESTRICT src, int count, 612a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com U8CPU alpha, int /*x*/, int /*y*/) { 613be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org SkASSERT(255 > alpha); 614a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 615be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org /* This code implements a Neon version of S32A_D565_Blend. The results have 616be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org * a few mismatches compared to the original code. These mismatches never 617be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org * exceed 1. 618a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com */ 619fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 620be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org if (count >= 8) { 621be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org uint16x8_t valpha_max, vmask_blue; 622be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org uint8x8_t valpha; 623be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org 624be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // prepare constants 625be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org valpha_max = vmovq_n_u16(255); 626be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org valpha = vdup_n_u8(alpha); 627be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vmask_blue = vmovq_n_u16(SK_B16_MASK); 628be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org 629be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org do { 630be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org uint16x8_t vdst, vdst_r, vdst_g, vdst_b; 631be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org uint16x8_t vres_a, vres_r, vres_g, vres_b; 632be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org uint8x8x4_t vsrc; 633be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org 634be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // load pixels 635be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vdst = vld1q_u16(dst); 636ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#ifdef SK_CPU_ARM64 637ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc = sk_vld4_u8_arm64_4(src); 638ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#else 639be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org#if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6)) 640be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org asm ( 641be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org "vld4.u8 %h[vsrc], [%[src]]!" 642be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org : [vsrc] "=w" (vsrc), [src] "+&r" (src) 643be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org : : 644be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org ); 645a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#else 646be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org register uint8x8_t d0 asm("d0"); 647be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org register uint8x8_t d1 asm("d1"); 648be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org register uint8x8_t d2 asm("d2"); 649be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org register uint8x8_t d3 asm("d3"); 650be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org 651be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org asm volatile ( 652be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org "vld4.u8 {d0-d3},[%[src]]!;" 653be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3), 654be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org [src] "+&r" (src) 655be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org : : 656be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org ); 657be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vsrc.val[0] = d0; 658be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vsrc.val[1] = d1; 659be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vsrc.val[2] = d2; 660be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vsrc.val[3] = d3; 661a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#endif 662ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#endif // #ifdef SK_CPU_ARM64 663fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 664fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 665be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // deinterleave dst 666be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vdst_g = vshlq_n_u16(vdst, SK_R16_BITS); // shift green to top of lanes 667be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vdst_b = vdst & vmask_blue; // extract blue 668be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vdst_r = vshrq_n_u16(vdst, SK_R16_SHIFT); // extract red 669be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vdst_g = vshrq_n_u16(vdst_g, SK_R16_BITS + SK_B16_BITS); // extract green 670be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org 671be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // shift src to 565 672be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vsrc.val[NEON_R] = vshr_n_u8(vsrc.val[NEON_R], 8 - SK_R16_BITS); 673be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vsrc.val[NEON_G] = vshr_n_u8(vsrc.val[NEON_G], 8 - SK_G16_BITS); 674be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vsrc.val[NEON_B] = vshr_n_u8(vsrc.val[NEON_B], 8 - SK_B16_BITS); 675be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org 676be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // calc src * src_scale 677be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_a = vmull_u8(vsrc.val[NEON_A], valpha); 678be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_r = vmull_u8(vsrc.val[NEON_R], valpha); 679be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_g = vmull_u8(vsrc.val[NEON_G], valpha); 680be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_b = vmull_u8(vsrc.val[NEON_B], valpha); 681be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org 682be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // prepare dst_scale 683be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_a = SkDiv255Round_neon8(vres_a); 684be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_a = valpha_max - vres_a; // 255 - (sa * src_scale) / 255 685be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org 686be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // add dst * dst_scale to previous result 687be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_r = vmlaq_u16(vres_r, vdst_r, vres_a); 688be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_g = vmlaq_u16(vres_g, vdst_g, vres_a); 689be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_b = vmlaq_u16(vres_b, vdst_b, vres_a); 690be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org 691be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org#ifdef S32A_D565_BLEND_EXACT 692be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // It is possible to get exact results with this but it is slow, 693be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // even slower than C code in some cases 694be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_r = SkDiv255Round_neon8(vres_r); 695be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_g = SkDiv255Round_neon8(vres_g); 696be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_b = SkDiv255Round_neon8(vres_b); 697be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org#else 698be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_r = vrshrq_n_u16(vres_r, 8); 699be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_g = vrshrq_n_u16(vres_g, 8); 700be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_b = vrshrq_n_u16(vres_b, 8); 701be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org#endif 702be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // pack result 703be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_b = vsliq_n_u16(vres_b, vres_g, SK_G16_SHIFT); // insert green into blue 704be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_b = vsliq_n_u16(vres_b, vres_r, SK_R16_SHIFT); // insert red into green/blue 705be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org 706be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // store 707be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vst1q_u16(dst, vres_b); 708be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org dst += 8; 709be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org count -= 8; 710be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org } while (count >= 8); 711be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org } 712a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 713be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // leftovers 714be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org while (count-- > 0) { 715be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org SkPMColor sc = *src++; 716be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org if (sc) { 717be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org uint16_t dc = *dst; 718be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org unsigned dst_scale = 255 - SkMulDiv255Round(SkGetPackedA32(sc), alpha); 719be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org unsigned dr = SkMulS16(SkPacked32ToR16(sc), alpha) + SkMulS16(SkGetPackedR16(dc), dst_scale); 720be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org unsigned dg = SkMulS16(SkPacked32ToG16(sc), alpha) + SkMulS16(SkGetPackedG16(dc), dst_scale); 721be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org unsigned db = SkMulS16(SkPacked32ToB16(sc), alpha) + SkMulS16(SkGetPackedB16(dc), dst_scale); 722be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org *dst = SkPackRGB16(SkDiv255Round(dr), SkDiv255Round(dg), SkDiv255Round(db)); 723be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org } 724be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org dst += 1; 725a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 726a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com} 727a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 728a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com/* dither matrix for Neon, derived from gDitherMatrix_3Bit_16. 729a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com * each dither value is spaced out into byte lanes, and repeated 730a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com * to allow an 8-byte load from offsets 0, 1, 2 or 3 from the 731a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com * start of each row. 732a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com */ 733a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comstatic const uint8_t gDitherMatrix_Neon[48] = { 734a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 0, 4, 1, 5, 0, 4, 1, 5, 0, 4, 1, 5, 735a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 6, 2, 7, 3, 6, 2, 7, 3, 6, 2, 7, 3, 736a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1, 5, 0, 4, 1, 5, 0, 4, 1, 5, 0, 4, 737a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 7, 3, 6, 2, 7, 3, 6, 2, 7, 3, 6, 2, 738fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 739a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com}; 740a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 741a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comvoid S32_D565_Blend_Dither_neon(uint16_t *dst, const SkPMColor *src, 742a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com int count, U8CPU alpha, int x, int y) 743a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com{ 744fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 7454cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org SkASSERT(255 > alpha); 7464cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 7474cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org // rescale alpha to range 1 - 256 748a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com int scale = SkAlpha255To256(alpha); 749fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 7504cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org if (count >= 8) { 7514cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org /* select row and offset for dither array */ 7524cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org const uint8_t *dstart = &gDitherMatrix_Neon[(y&3)*12 + (x&3)]; 753fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 7544cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org uint8x8_t vdither = vld1_u8(dstart); // load dither values 7554cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org uint8x8_t vdither_g = vshr_n_u8(vdither, 1); // calc. green dither values 756fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 7574cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org int16x8_t vscale = vdupq_n_s16(scale); // duplicate scale into neon reg 7584cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org uint16x8_t vmask_b = vdupq_n_u16(0x1F); // set up blue mask 759fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 7604cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org do { 7614cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 762ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit uint8x8x4_t vsrc; 7634cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org uint8x8_t vsrc_r, vsrc_g, vsrc_b; 7644cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org uint8x8_t vsrc565_r, vsrc565_g, vsrc565_b; 7654cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org uint16x8_t vsrc_dit_r, vsrc_dit_g, vsrc_dit_b; 7664cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org uint16x8_t vsrc_res_r, vsrc_res_g, vsrc_res_b; 7674cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org uint16x8_t vdst; 7684cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org uint16x8_t vdst_r, vdst_g, vdst_b; 7694cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org int16x8_t vres_r, vres_g, vres_b; 7704cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org int8x8_t vres8_r, vres8_g, vres8_b; 7714cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 7724cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org // Load source and add dither 773ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#ifdef SK_CPU_ARM64 774ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc = sk_vld4_u8_arm64_3(src); 775ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#else 7764cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org { 7774cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org register uint8x8_t d0 asm("d0"); 7784cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org register uint8x8_t d1 asm("d1"); 7794cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org register uint8x8_t d2 asm("d2"); 7804cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org register uint8x8_t d3 asm("d3"); 7814cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 7824cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org asm ( 783ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit "vld4.8 {d0-d3},[%[src]]! " 7844cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3), [src] "+&r" (src) 7854cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org : 7864cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org ); 787ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[0] = d0; 788ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[1] = d1; 789ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[2] = d2; 7904cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org } 791ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#endif 792ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc_r = vsrc.val[NEON_R]; 793ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc_g = vsrc.val[NEON_G]; 794ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc_b = vsrc.val[NEON_B]; 7954cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 7964cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc565_g = vshr_n_u8(vsrc_g, 6); // calc. green >> 6 7974cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc565_r = vshr_n_u8(vsrc_r, 5); // calc. red >> 5 7984cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc565_b = vshr_n_u8(vsrc_b, 5); // calc. blue >> 5 7994cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 8004cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc_dit_g = vaddl_u8(vsrc_g, vdither_g); // add in dither to green and widen 8014cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc_dit_r = vaddl_u8(vsrc_r, vdither); // add in dither to red and widen 8024cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc_dit_b = vaddl_u8(vsrc_b, vdither); // add in dither to blue and widen 8034cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 8044cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc_dit_r = vsubw_u8(vsrc_dit_r, vsrc565_r); // sub shifted red from result 8054cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc_dit_g = vsubw_u8(vsrc_dit_g, vsrc565_g); // sub shifted green from result 8064cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc_dit_b = vsubw_u8(vsrc_dit_b, vsrc565_b); // sub shifted blue from result 8074cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 8084cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc_res_r = vshrq_n_u16(vsrc_dit_r, 3); 8094cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc_res_g = vshrq_n_u16(vsrc_dit_g, 2); 8104cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc_res_b = vshrq_n_u16(vsrc_dit_b, 3); 8114cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 8124cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org // Load dst and unpack 8134cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vdst = vld1q_u16(dst); 8144cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vdst_g = vshrq_n_u16(vdst, 5); // shift down to get green 8154cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vdst_r = vshrq_n_u16(vshlq_n_u16(vdst, 5), 5+5); // double shift to extract red 8164cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vdst_b = vandq_u16(vdst, vmask_b); // mask to get blue 8174cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 8184cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org // subtract dst from src and widen 8194cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres_r = vsubq_s16(vreinterpretq_s16_u16(vsrc_res_r), vreinterpretq_s16_u16(vdst_r)); 8204cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres_g = vsubq_s16(vreinterpretq_s16_u16(vsrc_res_g), vreinterpretq_s16_u16(vdst_g)); 8214cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres_b = vsubq_s16(vreinterpretq_s16_u16(vsrc_res_b), vreinterpretq_s16_u16(vdst_b)); 8224cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 8234cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org // multiply diffs by scale and shift 8244cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres_r = vmulq_s16(vres_r, vscale); 8254cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres_g = vmulq_s16(vres_g, vscale); 8264cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres_b = vmulq_s16(vres_b, vscale); 8274cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 8284cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres8_r = vshrn_n_s16(vres_r, 8); 8294cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres8_g = vshrn_n_s16(vres_g, 8); 8304cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres8_b = vshrn_n_s16(vres_b, 8); 8314cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 8324cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org // add dst to result 8334cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres_r = vaddw_s8(vreinterpretq_s16_u16(vdst_r), vres8_r); 8344cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres_g = vaddw_s8(vreinterpretq_s16_u16(vdst_g), vres8_g); 8354cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres_b = vaddw_s8(vreinterpretq_s16_u16(vdst_b), vres8_b); 8364cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 8374cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org // put result into 565 format 8384cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres_b = vsliq_n_s16(vres_b, vres_g, 5); // shift up green and insert into blue 8394cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres_b = vsliq_n_s16(vres_b, vres_r, 6+5); // shift up red and insert into blue 8404cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 8414cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org // Store result 8424cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vst1q_u16(dst, vreinterpretq_u16_s16(vres_b)); 8434cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 8444cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org // Next iteration 8454cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org dst += 8; 8464cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org count -= 8; 8474cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 8484cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org } while (count >= 8); 8494cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org } 8504cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 8514cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org // Leftovers 8524cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org if (count > 0) { 8534cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org int scale = SkAlpha255To256(alpha); 8544cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org DITHER_565_SCAN(y); 8554cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org do { 8564cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org SkPMColor c = *src++; 8574cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org SkPMColorAssert(c); 8584cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 8594cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org int dither = DITHER_VALUE(x); 8604cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org int sr = SkGetPackedR32(c); 8614cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org int sg = SkGetPackedG32(c); 8624cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org int sb = SkGetPackedB32(c); 8634cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org sr = SkDITHER_R32To565(sr, dither); 8644cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org sg = SkDITHER_G32To565(sg, dither); 8654cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org sb = SkDITHER_B32To565(sb, dither); 8664cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 8674cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org uint16_t d = *dst; 8684cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org *dst++ = SkPackRGB16(SkAlphaBlend(sr, SkGetPackedR16(d), scale), 8694cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org SkAlphaBlend(sg, SkGetPackedG16(d), scale), 8704cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org SkAlphaBlend(sb, SkGetPackedB16(d), scale)); 8714cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org DITHER_INC_X(x); 8724cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org } while (--count != 0); 873a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 874a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com} 875a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 876a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comvoid S32A_Opaque_BlitRow32_neon(SkPMColor* SK_RESTRICT dst, 877a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com const SkPMColor* SK_RESTRICT src, 878a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com int count, U8CPU alpha) { 879a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 880a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkASSERT(255 == alpha); 881a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com if (count > 0) { 882a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 883a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 884fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint8x8_t alpha_mask; 885a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 886fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com static const uint8_t alpha_mask_setup[] = {3,3,3,3,7,7,7,7}; 887fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com alpha_mask = vld1_u8(alpha_mask_setup); 888a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 889fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* do the NEON unrolled code */ 890fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#define UNROLL 4 891fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com while (count >= UNROLL) { 892fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint8x8_t src_raw, dst_raw, dst_final; 893fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint8x8_t src_raw_2, dst_raw_2, dst_final_2; 894a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 8950a5699ee482c3b5ef1e857de8a2de06c6a1fa298commit-bot@chromium.org /* The two prefetches below may make the code slighlty 8960a5699ee482c3b5ef1e857de8a2de06c6a1fa298commit-bot@chromium.org * slower for small values of count but are worth having 8970a5699ee482c3b5ef1e857de8a2de06c6a1fa298commit-bot@chromium.org * in the general case. 8980a5699ee482c3b5ef1e857de8a2de06c6a1fa298commit-bot@chromium.org */ 8990a5699ee482c3b5ef1e857de8a2de06c6a1fa298commit-bot@chromium.org __builtin_prefetch(src+32); 9000a5699ee482c3b5ef1e857de8a2de06c6a1fa298commit-bot@chromium.org __builtin_prefetch(dst+32); 9010a5699ee482c3b5ef1e857de8a2de06c6a1fa298commit-bot@chromium.org 902fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* get the source */ 903fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com src_raw = vreinterpret_u8_u32(vld1_u32(src)); 904fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#if UNROLL > 2 905fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com src_raw_2 = vreinterpret_u8_u32(vld1_u32(src+2)); 906a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#endif 907a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 908fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* get and hold the dst too */ 909fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst_raw = vreinterpret_u8_u32(vld1_u32(dst)); 910fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#if UNROLL > 2 911fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst_raw_2 = vreinterpret_u8_u32(vld1_u32(dst+2)); 912a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#endif 913a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 914fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* 1st and 2nd bits of the unrolling */ 915fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com { 916fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint8x8_t dst_cooked; 917fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint16x8_t dst_wide; 918fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint8x8_t alpha_narrow; 919fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint16x8_t alpha_wide; 920a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 921fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* get the alphas spread out properly */ 922fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com alpha_narrow = vtbl1_u8(src_raw, alpha_mask); 923fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com alpha_wide = vsubw_u8(vdupq_n_u16(256), alpha_narrow); 924a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 925fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* spread the dest */ 926fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst_wide = vmovl_u8(dst_raw); 927a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 928fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* alpha mul the dest */ 929fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst_wide = vmulq_u16 (dst_wide, alpha_wide); 930fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst_cooked = vshrn_n_u16(dst_wide, 8); 931a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 932fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* sum -- ignoring any byte lane overflows */ 933fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst_final = vadd_u8(src_raw, dst_cooked); 934fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com } 935a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 936fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#if UNROLL > 2 937fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* the 3rd and 4th bits of our unrolling */ 938fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com { 939fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint8x8_t dst_cooked; 940fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint16x8_t dst_wide; 941fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint8x8_t alpha_narrow; 942fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint16x8_t alpha_wide; 943a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 944fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com alpha_narrow = vtbl1_u8(src_raw_2, alpha_mask); 945fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com alpha_wide = vsubw_u8(vdupq_n_u16(256), alpha_narrow); 946a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 947fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* spread the dest */ 948fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst_wide = vmovl_u8(dst_raw_2); 949a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 950fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* alpha mul the dest */ 951fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst_wide = vmulq_u16 (dst_wide, alpha_wide); 952fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst_cooked = vshrn_n_u16(dst_wide, 8); 953a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 954fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* sum -- ignoring any byte lane overflows */ 955fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst_final_2 = vadd_u8(src_raw_2, dst_cooked); 956fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com } 957a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#endif 958a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 959fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com vst1_u32(dst, vreinterpret_u32_u8(dst_final)); 960fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#if UNROLL > 2 961fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com vst1_u32(dst+2, vreinterpret_u32_u8(dst_final_2)); 962a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#endif 963a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 964fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com src += UNROLL; 965fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst += UNROLL; 966fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com count -= UNROLL; 967fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com } 968fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#undef UNROLL 969a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 970fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* do any residual iterations */ 971a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com while (--count >= 0) { 972a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com *dst = SkPMSrcOver(*src, *dst); 973a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com src += 1; 974a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com dst += 1; 975a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 976a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 977a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com} 978a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 979c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.comvoid S32A_Opaque_BlitRow32_neon_src_alpha(SkPMColor* SK_RESTRICT dst, 980c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com const SkPMColor* SK_RESTRICT src, 981c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com int count, U8CPU alpha) { 982c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com SkASSERT(255 == alpha); 983c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 984c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if (count <= 0) 985c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com return; 986c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 987c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* Use these to check if src is transparent or opaque */ 988c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com const unsigned int ALPHA_OPAQ = 0xFF000000; 989c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com const unsigned int ALPHA_TRANS = 0x00FFFFFF; 990c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 991c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com#define UNROLL 4 992c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com const SkPMColor* SK_RESTRICT src_end = src + count - (UNROLL + 1); 993c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com const SkPMColor* SK_RESTRICT src_temp = src; 994c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 995c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* set up the NEON variables */ 996c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com uint8x8_t alpha_mask; 997c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com static const uint8_t alpha_mask_setup[] = {3,3,3,3,7,7,7,7}; 998c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com alpha_mask = vld1_u8(alpha_mask_setup); 999c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 1000c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com uint8x8_t src_raw, dst_raw, dst_final; 1001c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com uint8x8_t src_raw_2, dst_raw_2, dst_final_2; 1002c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com uint8x8_t dst_cooked; 1003c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com uint16x8_t dst_wide; 1004c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com uint8x8_t alpha_narrow; 1005c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com uint16x8_t alpha_wide; 1006c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 1007c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* choose the first processing type */ 1008c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if( src >= src_end) 1009c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com goto TAIL; 1010c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if(*src <= ALPHA_TRANS) 1011c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com goto ALPHA_0; 1012c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if(*src >= ALPHA_OPAQ) 1013c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com goto ALPHA_255; 1014c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* fall-thru */ 1015c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 1016c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.comALPHA_1_TO_254: 1017c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com do { 1018c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 1019c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* get the source */ 1020c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com src_raw = vreinterpret_u8_u32(vld1_u32(src)); 1021c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com src_raw_2 = vreinterpret_u8_u32(vld1_u32(src+2)); 1022c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 1023c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* get and hold the dst too */ 1024c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst_raw = vreinterpret_u8_u32(vld1_u32(dst)); 1025c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst_raw_2 = vreinterpret_u8_u32(vld1_u32(dst+2)); 1026c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 1027c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 1028c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* get the alphas spread out properly */ 1029c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com alpha_narrow = vtbl1_u8(src_raw, alpha_mask); 1030c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* reflect SkAlpha255To256() semantics a+1 vs a+a>>7 */ 1031c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* we collapsed (255-a)+1 ... */ 1032c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com alpha_wide = vsubw_u8(vdupq_n_u16(256), alpha_narrow); 1033c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 1034c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* spread the dest */ 1035c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst_wide = vmovl_u8(dst_raw); 1036c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 1037c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* alpha mul the dest */ 1038c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst_wide = vmulq_u16 (dst_wide, alpha_wide); 1039c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst_cooked = vshrn_n_u16(dst_wide, 8); 1040c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 1041c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* sum -- ignoring any byte lane overflows */ 1042c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst_final = vadd_u8(src_raw, dst_cooked); 1043c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 1044c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com alpha_narrow = vtbl1_u8(src_raw_2, alpha_mask); 1045c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* reflect SkAlpha255To256() semantics a+1 vs a+a>>7 */ 1046c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* we collapsed (255-a)+1 ... */ 1047c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com alpha_wide = vsubw_u8(vdupq_n_u16(256), alpha_narrow); 1048c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 1049c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* spread the dest */ 1050c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst_wide = vmovl_u8(dst_raw_2); 1051c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 1052c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* alpha mul the dest */ 1053c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst_wide = vmulq_u16 (dst_wide, alpha_wide); 1054c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst_cooked = vshrn_n_u16(dst_wide, 8); 1055c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 1056c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* sum -- ignoring any byte lane overflows */ 1057c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst_final_2 = vadd_u8(src_raw_2, dst_cooked); 1058c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 1059c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com vst1_u32(dst, vreinterpret_u32_u8(dst_final)); 1060c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com vst1_u32(dst+2, vreinterpret_u32_u8(dst_final_2)); 1061c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 1062c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com src += UNROLL; 1063c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst += UNROLL; 1064c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 1065c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* if 2 of the next pixels aren't between 1 and 254 1066c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com it might make sense to go to the optimized loops */ 1067c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if((src[0] <= ALPHA_TRANS && src[1] <= ALPHA_TRANS) || (src[0] >= ALPHA_OPAQ && src[1] >= ALPHA_OPAQ)) 1068c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com break; 1069c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 1070c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com } while(src < src_end); 1071c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 1072c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if (src >= src_end) 1073c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com goto TAIL; 1074c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 1075c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if(src[0] >= ALPHA_OPAQ && src[1] >= ALPHA_OPAQ) 1076c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com goto ALPHA_255; 1077c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 1078c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /*fall-thru*/ 1079c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 1080c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.comALPHA_0: 1081c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 1082c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /*In this state, we know the current alpha is 0 and 1083c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com we optimize for the next alpha also being zero. */ 1084c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com src_temp = src; //so we don't have to increment dst every time 1085c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com do { 1086c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if(*(++src) > ALPHA_TRANS) 1087c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com break; 1088c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if(*(++src) > ALPHA_TRANS) 1089c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com break; 1090c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if(*(++src) > ALPHA_TRANS) 1091c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com break; 1092c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if(*(++src) > ALPHA_TRANS) 1093c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com break; 1094c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com } while(src < src_end); 1095c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 1096c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst += (src - src_temp); 1097c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 1098c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* no longer alpha 0, so determine where to go next. */ 1099c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if( src >= src_end) 1100c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com goto TAIL; 1101c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if(*src >= ALPHA_OPAQ) 1102c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com goto ALPHA_255; 1103c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com else 1104c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com goto ALPHA_1_TO_254; 1105c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 1106c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.comALPHA_255: 1107c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com while((src[0] & src[1] & src[2] & src[3]) >= ALPHA_OPAQ) { 1108c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst[0]=src[0]; 1109c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst[1]=src[1]; 1110c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst[2]=src[2]; 1111c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst[3]=src[3]; 1112c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com src+=UNROLL; 1113c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst+=UNROLL; 1114c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if(src >= src_end) 1115c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com goto TAIL; 1116c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com } 1117c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 1118c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com //Handle remainder. 1119c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if(*src >= ALPHA_OPAQ) { *dst++ = *src++; 1120c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if(*src >= ALPHA_OPAQ) { *dst++ = *src++; 1121c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if(*src >= ALPHA_OPAQ) { *dst++ = *src++; } 1122c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com } 1123c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com } 1124c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 1125c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if( src >= src_end) 1126c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com goto TAIL; 1127c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if(*src <= ALPHA_TRANS) 1128c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com goto ALPHA_0; 1129c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com else 1130c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com goto ALPHA_1_TO_254; 1131c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 1132c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.comTAIL: 1133c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* do any residual iterations */ 1134c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com src_end += UNROLL + 1; //goto the real end 1135c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com while(src != src_end) { 1136c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if( *src != 0 ) { 1137c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if( *src >= ALPHA_OPAQ ) { 1138c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com *dst = *src; 1139c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com } 1140c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com else { 1141c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com *dst = SkPMSrcOver(*src, *dst); 1142c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com } 1143c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com } 1144c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com src++; 1145c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst++; 1146c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com } 1147c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 1148c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com#undef UNROLL 1149c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com return; 1150c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com} 1151a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1152a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com/* Neon version of S32_Blend_BlitRow32() 1153a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com * portable version is in src/core/SkBlitRow_D32.cpp 1154a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com */ 1155a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comvoid S32_Blend_BlitRow32_neon(SkPMColor* SK_RESTRICT dst, 1156a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com const SkPMColor* SK_RESTRICT src, 1157a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com int count, U8CPU alpha) { 1158a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkASSERT(alpha <= 255); 1159fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1160374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org if (count <= 0) { 1161374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org return; 1162374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org } 1163dfff2737f8ad3e945a4dcbe175380d4b2a91a260commit-bot@chromium.org 1164374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org uint16_t src_scale = SkAlpha255To256(alpha); 1165374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org uint16_t dst_scale = 256 - src_scale; 1166dfff2737f8ad3e945a4dcbe175380d4b2a91a260commit-bot@chromium.org 1167374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org while (count >= 2) { 1168374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org uint8x8_t vsrc, vdst, vres; 1169374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org uint16x8_t vsrc_wide, vdst_wide; 1170dfff2737f8ad3e945a4dcbe175380d4b2a91a260commit-bot@chromium.org 1171374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org /* These commented prefetches are a big win for count 1172374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org * values > 64 on an A9 (Pandaboard) but hurt by 10% for count = 4. 1173374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org * They also hurt a little (<5%) on an A15 1174374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org */ 1175374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org //__builtin_prefetch(src+32); 1176374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org //__builtin_prefetch(dst+32); 1177dfff2737f8ad3e945a4dcbe175380d4b2a91a260commit-bot@chromium.org 1178374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org // Load 1179374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vsrc = vreinterpret_u8_u32(vld1_u32(src)); 1180374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vdst = vreinterpret_u8_u32(vld1_u32(dst)); 1181374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org 1182374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org // Process src 1183374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vsrc_wide = vmovl_u8(vsrc); 1184374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vsrc_wide = vmulq_u16(vsrc_wide, vdupq_n_u16(src_scale)); 1185374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org 1186374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org // Process dst 1187374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vdst_wide = vmull_u8(vdst, vdup_n_u8(dst_scale)); 1188374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org 1189374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org // Combine 1190374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vres = vshrn_n_u16(vdst_wide, 8) + vshrn_n_u16(vsrc_wide, 8); 1191374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org 1192374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org // Store 1193374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vst1_u32(dst, vreinterpret_u32_u8(vres)); 1194374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org 1195374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org src += 2; 1196374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org dst += 2; 1197374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org count -= 2; 1198fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com } 1199fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1200fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com if (count == 1) { 1201374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org uint8x8_t vsrc = vdup_n_u8(0), vdst = vdup_n_u8(0), vres; 1202374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org uint16x8_t vsrc_wide, vdst_wide; 1203dfff2737f8ad3e945a4dcbe175380d4b2a91a260commit-bot@chromium.org 1204374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org // Load 1205374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vsrc = vreinterpret_u8_u32(vld1_lane_u32(src, vreinterpret_u32_u8(vsrc), 0)); 1206374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vdst = vreinterpret_u8_u32(vld1_lane_u32(dst, vreinterpret_u32_u8(vdst), 0)); 1207374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org 1208374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org // Process 1209374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vsrc_wide = vmovl_u8(vsrc); 1210374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vsrc_wide = vmulq_u16(vsrc_wide, vdupq_n_u16(src_scale)); 1211374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vdst_wide = vmull_u8(vdst, vdup_n_u8(dst_scale)); 1212374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vres = vshrn_n_u16(vdst_wide, 8) + vshrn_n_u16(vsrc_wide, 8); 1213374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org 1214374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org // Store 1215374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vst1_lane_u32(dst, vreinterpret_u32_u8(vres), 0); 1216a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 1217a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com} 1218a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 12193a2682a77f996f649de7699c9f7bee046c6d4f17mtklein#ifdef SK_CPU_ARM32 12201fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.orgvoid S32A_Blend_BlitRow32_neon(SkPMColor* SK_RESTRICT dst, 12211fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org const SkPMColor* SK_RESTRICT src, 12221fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org int count, U8CPU alpha) { 12231fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 12241fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org SkASSERT(255 >= alpha); 12251fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 12261fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org if (count <= 0) { 12271fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org return; 12281fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org } 12291fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 12301fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org unsigned alpha256 = SkAlpha255To256(alpha); 12311fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 12321fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // First deal with odd counts 12331fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org if (count & 1) { 12341fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org uint8x8_t vsrc = vdup_n_u8(0), vdst = vdup_n_u8(0), vres; 12351fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org uint16x8_t vdst_wide, vsrc_wide; 12361fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org unsigned dst_scale; 12371fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 12381fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // Load 12391fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vsrc = vreinterpret_u8_u32(vld1_lane_u32(src, vreinterpret_u32_u8(vsrc), 0)); 12401fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vdst = vreinterpret_u8_u32(vld1_lane_u32(dst, vreinterpret_u32_u8(vdst), 0)); 12411fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 12421fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // Calc dst_scale 12431fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org dst_scale = vget_lane_u8(vsrc, 3); 12441fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org dst_scale *= alpha256; 12451fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org dst_scale >>= 8; 12461fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org dst_scale = 256 - dst_scale; 12471fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 12481fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // Process src 12491fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vsrc_wide = vmovl_u8(vsrc); 12501fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vsrc_wide = vmulq_n_u16(vsrc_wide, alpha256); 12511fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 12521fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // Process dst 12531fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vdst_wide = vmovl_u8(vdst); 12541fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vdst_wide = vmulq_n_u16(vdst_wide, dst_scale); 12551fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 12561fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // Combine 12571fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vres = vshrn_n_u16(vdst_wide, 8) + vshrn_n_u16(vsrc_wide, 8); 12581fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 12591fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vst1_lane_u32(dst, vreinterpret_u32_u8(vres), 0); 12601fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org dst++; 12611fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org src++; 12621fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org count--; 12631fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org } 12641fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 12651fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org if (count) { 12661fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org uint8x8_t alpha_mask; 12671fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org static const uint8_t alpha_mask_setup[] = {3,3,3,3,7,7,7,7}; 12681fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org alpha_mask = vld1_u8(alpha_mask_setup); 12691fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 12701fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org do { 12711fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 12721fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org uint8x8_t vsrc, vdst, vres, vsrc_alphas; 12731fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org uint16x8_t vdst_wide, vsrc_wide, vsrc_scale, vdst_scale; 12741fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 12751fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org __builtin_prefetch(src+32); 12761fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org __builtin_prefetch(dst+32); 12771fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 12781fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // Load 12791fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vsrc = vreinterpret_u8_u32(vld1_u32(src)); 12801fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vdst = vreinterpret_u8_u32(vld1_u32(dst)); 12811fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 12821fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // Prepare src_scale 12831fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vsrc_scale = vdupq_n_u16(alpha256); 12841fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 12851fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // Calc dst_scale 12861fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vsrc_alphas = vtbl1_u8(vsrc, alpha_mask); 12871fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vdst_scale = vmovl_u8(vsrc_alphas); 12881fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vdst_scale *= vsrc_scale; 12891fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vdst_scale = vshrq_n_u16(vdst_scale, 8); 12901fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vdst_scale = vsubq_u16(vdupq_n_u16(256), vdst_scale); 12911fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 12921fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // Process src 12931fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vsrc_wide = vmovl_u8(vsrc); 12941fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vsrc_wide *= vsrc_scale; 12951fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 12961fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // Process dst 12971fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vdst_wide = vmovl_u8(vdst); 12981fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vdst_wide *= vdst_scale; 12991fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 13001fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // Combine 13011fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vres = vshrn_n_u16(vdst_wide, 8) + vshrn_n_u16(vsrc_wide, 8); 13021fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 13031fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vst1_u32(dst, vreinterpret_u32_u8(vres)); 13041fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 13051fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org src += 2; 13061fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org dst += 2; 13071fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org count -= 2; 13081fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org } while(count); 13091fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org } 13101fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org} 13111fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 1312a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com/////////////////////////////////////////////////////////////////////////////// 1313a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1314fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#undef DEBUG_OPAQUE_DITHER 1315a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1316fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#if defined(DEBUG_OPAQUE_DITHER) 1317a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comstatic void showme8(char *str, void *p, int len) 1318a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com{ 1319fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com static char buf[256]; 1320fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com char tbuf[32]; 1321fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com int i; 1322fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com char *pc = (char*) p; 1323fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sprintf(buf,"%8s:", str); 1324fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com for(i=0;i<len;i++) { 1325fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sprintf(tbuf, " %02x", pc[i]); 1326fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com strcat(buf, tbuf); 1327fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com } 1328fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com SkDebugf("%s\n", buf); 1329a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com} 1330a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comstatic void showme16(char *str, void *p, int len) 1331a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com{ 1332fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com static char buf[256]; 1333fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com char tbuf[32]; 1334fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com int i; 1335fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint16_t *pc = (uint16_t*) p; 1336fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sprintf(buf,"%8s:", str); 1337fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com len = (len / sizeof(uint16_t)); /* passed as bytes */ 1338fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com for(i=0;i<len;i++) { 1339fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sprintf(tbuf, " %04x", pc[i]); 1340fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com strcat(buf, tbuf); 1341fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com } 1342fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com SkDebugf("%s\n", buf); 1343a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com} 1344a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#endif 1345ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#endif // #ifdef SK_CPU_ARM32 1346a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1347a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comvoid S32A_D565_Opaque_Dither_neon (uint16_t * SK_RESTRICT dst, 1348a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com const SkPMColor* SK_RESTRICT src, 1349a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com int count, U8CPU alpha, int x, int y) { 1350a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkASSERT(255 == alpha); 1351a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1352fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#define UNROLL 8 1353a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1354a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com if (count >= UNROLL) { 1355fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1356fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org#if defined(DEBUG_OPAQUE_DITHER) 1357fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint16_t tmpbuf[UNROLL]; 1358fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com int td[UNROLL]; 1359fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com int tdv[UNROLL]; 1360fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com int ta[UNROLL]; 1361fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com int tap[UNROLL]; 1362fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint16_t in_dst[UNROLL]; 1363fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com int offset = 0; 1364fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com int noisy = 0; 1365a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#endif 1366a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1367fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org uint8x8_t dbase; 1368fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com const uint8_t *dstart = &gDitherMatrix_Neon[(y&3)*12 + (x&3)]; 1369fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dbase = vld1_u8(dstart); 1370a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1371a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com do { 1372ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit uint8x8x4_t vsrc; 1373fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint8x8_t sr, sg, sb, sa, d; 1374fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint16x8_t dst8, scale8, alpha8; 1375fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint16x8_t dst_r, dst_g, dst_b; 1376fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1377fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org#if defined(DEBUG_OPAQUE_DITHER) 1378fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org // calculate 8 elements worth into a temp buffer 1379fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org { 1380fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org int my_y = y; 1381fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org int my_x = x; 1382fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org SkPMColor* my_src = (SkPMColor*)src; 1383fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org uint16_t* my_dst = dst; 1384fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org int i; 1385fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org 1386fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org DITHER_565_SCAN(my_y); 1387fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org for(i = 0; i < UNROLL; i++) { 1388a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkPMColor c = *my_src++; 1389a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkPMColorAssert(c); 1390a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com if (c) { 1391a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com unsigned a = SkGetPackedA32(c); 1392fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1393a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com int d = SkAlphaMul(DITHER_VALUE(my_x), SkAlpha255To256(a)); 1394fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org tdv[i] = DITHER_VALUE(my_x); 1395fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org ta[i] = a; 1396fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org tap[i] = SkAlpha255To256(a); 1397fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org td[i] = d; 1398fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1399a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com unsigned sr = SkGetPackedR32(c); 1400a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com unsigned sg = SkGetPackedG32(c); 1401a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com unsigned sb = SkGetPackedB32(c); 1402a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com sr = SkDITHER_R32_FOR_565(sr, d); 1403a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com sg = SkDITHER_G32_FOR_565(sg, d); 1404a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com sb = SkDITHER_B32_FOR_565(sb, d); 1405fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1406a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com uint32_t src_expanded = (sg << 24) | (sr << 13) | (sb << 2); 1407a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com uint32_t dst_expanded = SkExpand_rgb_16(*my_dst); 1408a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3); 1409a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // now src and dst expanded are in g:11 r:10 x:1 b:10 1410a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com tmpbuf[i] = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5); 1411fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org td[i] = d; 1412a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } else { 1413fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org tmpbuf[i] = *my_dst; 1414fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org ta[i] = tdv[i] = td[i] = 0xbeef; 1415fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org } 1416fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org in_dst[i] = *my_dst; 1417a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com my_dst += 1; 1418a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com DITHER_INC_X(my_x); 1419fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org } 1420fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org } 1421a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#endif 1422a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1423ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#ifdef SK_CPU_ARM64 1424ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc = sk_vld4_u8_arm64_4(src); 1425ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#else 1426fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com { 1427fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com register uint8x8_t d0 asm("d0"); 1428fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com register uint8x8_t d1 asm("d1"); 1429fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com register uint8x8_t d2 asm("d2"); 1430fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com register uint8x8_t d3 asm("d3"); 1431a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1432ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit asm ("vld4.8 {d0-d3},[%[src]]! " 1433fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3), [src] "+r" (src) 1434fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org : 1435fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org ); 1436ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[0] = d0; 1437ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[1] = d1; 1438ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[2] = d2; 1439ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[3] = d3; 1440fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com } 1441ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#endif 1442ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit sa = vsrc.val[NEON_A]; 1443ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit sr = vsrc.val[NEON_R]; 1444ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit sg = vsrc.val[NEON_G]; 1445ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit sb = vsrc.val[NEON_B]; 1446a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1447fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org /* calculate 'd', which will be 0..7 1448fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org * dbase[] is 0..7; alpha is 0..256; 16 bits suffice 1449fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org */ 1450fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org alpha8 = vmovl_u8(dbase); 1451fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org alpha8 = vmlal_u8(alpha8, sa, dbase); 1452fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org d = vshrn_n_u16(alpha8, 8); // narrowing too 1453fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1454fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org // sr = sr - (sr>>5) + d 1455fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* watching for 8-bit overflow. d is 0..7; risky range of 1456fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com * sr is >248; and then (sr>>5) is 7 so it offsets 'd'; 1457fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org * safe as long as we do ((sr-sr>>5) + d) 1458fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org */ 1459fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sr = vsub_u8(sr, vshr_n_u8(sr, 5)); 1460fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sr = vadd_u8(sr, d); 1461fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1462fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org // sb = sb - (sb>>5) + d 1463fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sb = vsub_u8(sb, vshr_n_u8(sb, 5)); 1464fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sb = vadd_u8(sb, d); 1465fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1466fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org // sg = sg - (sg>>6) + d>>1; similar logic for overflows 1467fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sg = vsub_u8(sg, vshr_n_u8(sg, 6)); 1468fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sg = vadd_u8(sg, vshr_n_u8(d,1)); 1469fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1470fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org // need to pick up 8 dst's -- at 16 bits each, 128 bits 1471fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst8 = vld1q_u16(dst); 1472fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org dst_b = vandq_u16(dst8, vdupq_n_u16(SK_B16_MASK)); 1473fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org dst_g = vshrq_n_u16(vshlq_n_u16(dst8, SK_R16_BITS), SK_R16_BITS + SK_B16_BITS); 1474fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org dst_r = vshrq_n_u16(dst8, SK_R16_SHIFT); // clearing hi bits 1475fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org 1476fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org // blend 1477fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com scale8 = vsubw_u8(vdupq_n_u16(256), sa); 1478a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1479fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org // combine the addq and mul, save 3 insns 1480fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com scale8 = vshrq_n_u16(scale8, 3); 1481fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst_b = vmlaq_u16(vshll_n_u8(sb,2), dst_b, scale8); 1482fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst_g = vmlaq_u16(vshll_n_u8(sg,3), dst_g, scale8); 1483fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst_r = vmlaq_u16(vshll_n_u8(sr,2), dst_r, scale8); 1484a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1485fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org // repack to store 1486fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org dst8 = vshrq_n_u16(dst_b, 5); 1487fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst8 = vsliq_n_u16(dst8, vshrq_n_u16(dst_g, 5), 5); 1488fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst8 = vsliq_n_u16(dst8, vshrq_n_u16(dst_r,5), 11); 1489fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1490fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com vst1q_u16(dst, dst8); 1491fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1492fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org#if defined(DEBUG_OPAQUE_DITHER) 1493fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org // verify my 8 elements match the temp buffer 1494fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org { 1495fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org int i, bad=0; 1496fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org static int invocation; 1497a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1498fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org for (i = 0; i < UNROLL; i++) { 1499fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org if (tmpbuf[i] != dst[i]) { 1500fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org bad=1; 1501fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org } 1502fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org } 1503fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org if (bad) { 1504fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org SkDebugf("BAD S32A_D565_Opaque_Dither_neon(); invocation %d offset %d\n", 1505fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org invocation, offset); 1506fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org SkDebugf(" alpha 0x%x\n", alpha); 1507fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org for (i = 0; i < UNROLL; i++) 1508fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org SkDebugf("%2d: %s %04x w %04x id %04x s %08x d %04x %04x %04x %04x\n", 1509fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org i, ((tmpbuf[i] != dst[i])?"BAD":"got"), dst[i], tmpbuf[i], 1510fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org in_dst[i], src[i-8], td[i], tdv[i], tap[i], ta[i]); 1511fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org 1512fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org showme16("alpha8", &alpha8, sizeof(alpha8)); 1513fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org showme16("scale8", &scale8, sizeof(scale8)); 1514fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org showme8("d", &d, sizeof(d)); 1515fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org showme16("dst8", &dst8, sizeof(dst8)); 1516fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org showme16("dst_b", &dst_b, sizeof(dst_b)); 1517fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org showme16("dst_g", &dst_g, sizeof(dst_g)); 1518fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org showme16("dst_r", &dst_r, sizeof(dst_r)); 1519fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org showme8("sb", &sb, sizeof(sb)); 1520fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org showme8("sg", &sg, sizeof(sg)); 1521fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org showme8("sr", &sr, sizeof(sr)); 1522fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org 1523fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org return; 1524fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org } 1525fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org offset += UNROLL; 1526fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org invocation++; 1527fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org } 1528fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org#endif 1529fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org dst += UNROLL; 1530fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com count -= UNROLL; 1531fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org // skip x += UNROLL, since it's unchanged mod-4 1532a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } while (count >= UNROLL); 1533a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 1534fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#undef UNROLL 1535a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1536fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org // residuals 1537a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com if (count > 0) { 1538a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com DITHER_565_SCAN(y); 1539a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com do { 1540a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkPMColor c = *src++; 1541a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkPMColorAssert(c); 1542a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com if (c) { 1543a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com unsigned a = SkGetPackedA32(c); 1544fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1545a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // dither and alpha are just temporary variables to work-around 1546a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // an ICE in debug. 1547a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com unsigned dither = DITHER_VALUE(x); 1548a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com unsigned alpha = SkAlpha255To256(a); 1549a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com int d = SkAlphaMul(dither, alpha); 1550fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1551a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com unsigned sr = SkGetPackedR32(c); 1552a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com unsigned sg = SkGetPackedG32(c); 1553a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com unsigned sb = SkGetPackedB32(c); 1554a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com sr = SkDITHER_R32_FOR_565(sr, d); 1555a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com sg = SkDITHER_G32_FOR_565(sg, d); 1556a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com sb = SkDITHER_B32_FOR_565(sb, d); 1557fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1558a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com uint32_t src_expanded = (sg << 24) | (sr << 13) | (sb << 2); 1559a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com uint32_t dst_expanded = SkExpand_rgb_16(*dst); 1560a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3); 1561a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // now src and dst expanded are in g:11 r:10 x:1 b:10 1562a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5); 1563a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 1564a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com dst += 1; 1565a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com DITHER_INC_X(x); 1566a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } while (--count != 0); 1567a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 1568a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com} 1569a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1570a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com/////////////////////////////////////////////////////////////////////////////// 1571a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1572fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#undef DEBUG_S32_OPAQUE_DITHER 1573a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1574a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comvoid S32_D565_Opaque_Dither_neon(uint16_t* SK_RESTRICT dst, 1575a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com const SkPMColor* SK_RESTRICT src, 1576a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com int count, U8CPU alpha, int x, int y) { 1577a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkASSERT(255 == alpha); 1578a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1579fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#define UNROLL 8 1580a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com if (count >= UNROLL) { 1581fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint8x8_t d; 1582fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com const uint8_t *dstart = &gDitherMatrix_Neon[(y&3)*12 + (x&3)]; 1583fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com d = vld1_u8(dstart); 1584fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1585fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com while (count >= UNROLL) { 1586efbe8e9bedda21a3e061ebf3d96431a0f250a654djsollen@google.com uint8x8_t sr, sg, sb; 1587efbe8e9bedda21a3e061ebf3d96431a0f250a654djsollen@google.com uint16x8_t dr, dg, db; 1588fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint16x8_t dst8; 1589ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit uint8x8x4_t vsrc; 1590fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1591ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#ifdef SK_CPU_ARM64 1592ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc = sk_vld4_u8_arm64_3(src); 1593ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#else 1594fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com { 1595fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com register uint8x8_t d0 asm("d0"); 1596fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com register uint8x8_t d1 asm("d1"); 1597fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com register uint8x8_t d2 asm("d2"); 1598fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com register uint8x8_t d3 asm("d3"); 1599fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1600688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org asm ( 1601ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit "vld4.8 {d0-d3},[%[src]]! " 1602688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3), [src] "+&r" (src) 1603688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org : 1604688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org ); 1605ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[0] = d0; 1606ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[1] = d1; 1607ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[2] = d2; 1608fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com } 1609ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#endif 1610ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit sr = vsrc.val[NEON_R]; 1611ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit sg = vsrc.val[NEON_G]; 1612ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit sb = vsrc.val[NEON_B]; 1613ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit 1614fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* XXX: if we want to prefetch, hide it in the above asm() 1615fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com * using the gcc __builtin_prefetch(), the prefetch will 1616fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com * fall to the bottom of the loop -- it won't stick up 1617fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com * at the top of the loop, just after the vld4. 1618fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com */ 1619fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1620688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org // sr = sr - (sr>>5) + d 1621fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sr = vsub_u8(sr, vshr_n_u8(sr, 5)); 1622fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dr = vaddl_u8(sr, d); 1623fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1624688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org // sb = sb - (sb>>5) + d 1625fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sb = vsub_u8(sb, vshr_n_u8(sb, 5)); 1626fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com db = vaddl_u8(sb, d); 1627fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1628688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org // sg = sg - (sg>>6) + d>>1; similar logic for overflows 1629fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sg = vsub_u8(sg, vshr_n_u8(sg, 6)); 1630688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org dg = vaddl_u8(sg, vshr_n_u8(d, 1)); 1631fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1632688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org // pack high bits of each into 565 format (rgb, b is lsb) 1633fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst8 = vshrq_n_u16(db, 3); 1634fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst8 = vsliq_n_u16(dst8, vshrq_n_u16(dg, 2), 5); 1635688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org dst8 = vsliq_n_u16(dst8, vshrq_n_u16(dr, 3), 11); 1636fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1637688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org // store it 1638fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com vst1q_u16(dst, dst8); 1639fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1640fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#if defined(DEBUG_S32_OPAQUE_DITHER) 1641688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org // always good to know if we generated good results 1642fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com { 1643fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com int i, myx = x, myy = y; 1644fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com DITHER_565_SCAN(myy); 1645fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com for (i=0;i<UNROLL;i++) { 1646688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org // the '!' in the asm block above post-incremented src by the 8 pixels it reads. 1647688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org SkPMColor c = src[i-8]; 1648fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com unsigned dither = DITHER_VALUE(myx); 1649fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint16_t val = SkDitherRGB32To565(c, dither); 1650fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com if (val != dst[i]) { 1651fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com SkDebugf("RBE: src %08x dither %02x, want %04x got %04x dbas[i] %02x\n", 1652fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com c, dither, val, dst[i], dstart[i]); 1653fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com } 1654fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com DITHER_INC_X(myx); 1655fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com } 1656fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com } 1657a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#endif 1658a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1659fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst += UNROLL; 1660688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org // we don't need to increment src as the asm above has already done it 1661fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com count -= UNROLL; 1662688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org x += UNROLL; // probably superfluous 1663fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com } 1664a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 1665fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#undef UNROLL 1666a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1667688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org // residuals 1668a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com if (count > 0) { 1669a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com DITHER_565_SCAN(y); 1670a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com do { 1671a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkPMColor c = *src++; 1672a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkPMColorAssert(c); 1673a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkASSERT(SkGetPackedA32(c) == 255); 1674a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1675a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com unsigned dither = DITHER_VALUE(x); 1676a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com *dst++ = SkDitherRGB32To565(c, dither); 1677a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com DITHER_INC_X(x); 1678a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } while (--count != 0); 1679a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 1680a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com} 1681a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1682a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comvoid Color32_arm_neon(SkPMColor* dst, const SkPMColor* src, int count, 1683a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkPMColor color) { 1684a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com if (count <= 0) { 1685a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com return; 1686a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 1687a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1688a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com if (0 == color) { 1689a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com if (src != dst) { 1690a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com memcpy(dst, src, count * sizeof(SkPMColor)); 1691a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 1692a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com return; 1693a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 1694a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1695a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com unsigned colorA = SkGetPackedA32(color); 1696a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com if (255 == colorA) { 1697a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com sk_memset32(dst, color, count); 16985376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org return; 16995376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org } 1700a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 17015376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org unsigned scale = 256 - SkAlpha255To256(colorA); 1702a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 17035376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org if (count >= 8) { 17045376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org uint32x4_t vcolor; 17055376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org uint8x8_t vscale; 17065376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org 17075376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vcolor = vdupq_n_u32(color); 17085376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org 17095376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org // scale numerical interval [0-255], so load as 8 bits 17105376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vscale = vdup_n_u8(scale); 17115376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org 17125376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org do { 17135376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org // load src color, 8 pixels, 4 64 bit registers 17145376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org // (and increment src). 17155376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org uint32x2x4_t vsrc; 17163a2682a77f996f649de7699c9f7bee046c6d4f17mtklein#if defined(SK_CPU_ARM32) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6))) 17175376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org asm ( 17185376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org "vld1.32 %h[vsrc], [%[src]]!" 17195376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org : [vsrc] "=w" (vsrc), [src] "+r" (src) 17205376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org : : 17215376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org ); 1722866b95d65dfc01af372bbed206ec067e04c1f533kevin.petit#else // 64bit targets and Clang 17235376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vsrc.val[0] = vld1_u32(src); 17245376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vsrc.val[1] = vld1_u32(src+2); 17255376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vsrc.val[2] = vld1_u32(src+4); 17265376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vsrc.val[3] = vld1_u32(src+6); 17275376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org src += 8; 17285376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org#endif 17295376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org 17305376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org // multiply long by scale, 64 bits at a time, 17315376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org // destination into a 128 bit register. 17325376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org uint16x8x4_t vtmp; 17335376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vtmp.val[0] = vmull_u8(vreinterpret_u8_u32(vsrc.val[0]), vscale); 17345376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vtmp.val[1] = vmull_u8(vreinterpret_u8_u32(vsrc.val[1]), vscale); 17355376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vtmp.val[2] = vmull_u8(vreinterpret_u8_u32(vsrc.val[2]), vscale); 17365376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vtmp.val[3] = vmull_u8(vreinterpret_u8_u32(vsrc.val[3]), vscale); 17375376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org 17385376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org // shift the 128 bit registers, containing the 16 17395376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org // bit scaled values back to 8 bits, narrowing the 17405376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org // results to 64 bit registers. 17415376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org uint8x16x2_t vres; 17425376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vres.val[0] = vcombine_u8( 17435376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vshrn_n_u16(vtmp.val[0], 8), 17445376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vshrn_n_u16(vtmp.val[1], 8)); 17455376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vres.val[1] = vcombine_u8( 17465376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vshrn_n_u16(vtmp.val[2], 8), 17475376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vshrn_n_u16(vtmp.val[3], 8)); 17485376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org 17495376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org // adding back the color, using 128 bit registers. 17505376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org uint32x4x2_t vdst; 17515376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vdst.val[0] = vreinterpretq_u32_u8(vres.val[0] + 17525376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vreinterpretq_u8_u32(vcolor)); 17535376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vdst.val[1] = vreinterpretq_u32_u8(vres.val[1] + 17545376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vreinterpretq_u8_u32(vcolor)); 17555376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org 17565376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org // store back the 8 calculated pixels (2 128 bit 17575376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org // registers), and increment dst. 17583a2682a77f996f649de7699c9f7bee046c6d4f17mtklein#if defined(SK_CPU_ARM32) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6))) 17595376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org asm ( 17605376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org "vst1.32 %h[vdst], [%[dst]]!" 17615376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org : [dst] "+r" (dst) 17625376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org : [vdst] "w" (vdst) 17635376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org : "memory" 17645376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org ); 1765866b95d65dfc01af372bbed206ec067e04c1f533kevin.petit#else // 64bit targets and Clang 17665376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vst1q_u32(dst, vdst.val[0]); 17675376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vst1q_u32(dst+4, vdst.val[1]); 17685376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org dst += 8; 17695376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org#endif 17705376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org count -= 8; 17715376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org 17725376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org } while (count >= 8); 17735376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org } 17745376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org 17755376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org while (count > 0) { 17765376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org *dst = color + SkAlphaMulQ(*src, scale); 17775376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org src += 1; 17785376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org dst += 1; 17795376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org count--; 1780a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 1781a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com} 1782a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1783a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com/////////////////////////////////////////////////////////////////////////////// 1784a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1785a7f11918d92621507f35b228a290f05dcaf0f4b6reedconst SkBlitRow::Proc16 sk_blitrow_platform_565_procs_arm_neon[] = { 1786a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // no dither 17870060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org S32_D565_Opaque_neon, 178895c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org S32_D565_Blend_neon, 1789a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com S32A_D565_Opaque_neon, 1790fa115bd4543631244f3b9accb3541b28f4222a96mtklein#if 0 1791a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com S32A_D565_Blend_neon, 1792fa115bd4543631244f3b9accb3541b28f4222a96mtklein#else 1793b1629c5d9eb6236429cca1502d3bf5fcda8e3406kui.zheng NULL, // https://code.google.com/p/skia/issues/detail?id=2797 17945b2c2c6fd09752641b14766678d62fe50b4e3ef3reed#endif 1795a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1796a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // dither 1797a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com S32_D565_Opaque_Dither_neon, 1798a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com S32_D565_Blend_Dither_neon, 1799a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com S32A_D565_Opaque_Dither_neon, 1800a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com NULL, // S32A_D565_Blend_Dither 1801a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com}; 1802a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1803402448d6818cab9d7b7633a0c18fcf574c915357mleeconst SkBlitRow::ColorProc16 sk_blitrow_platform_565_colorprocs_arm_neon[] = { 1804402448d6818cab9d7b7633a0c18fcf574c915357mlee Color32A_D565_neon, // Color32_D565, 1805402448d6818cab9d7b7633a0c18fcf574c915357mlee Color32A_D565_neon, // Color32A_D565, 1806402448d6818cab9d7b7633a0c18fcf574c915357mlee Color32A_D565_neon, // Color32_D565_Dither, 1807402448d6818cab9d7b7633a0c18fcf574c915357mlee Color32A_D565_neon, // Color32A_D565_Dither 1808402448d6818cab9d7b7633a0c18fcf574c915357mlee}; 1809402448d6818cab9d7b7633a0c18fcf574c915357mlee 1810a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comconst SkBlitRow::Proc32 sk_blitrow_platform_32_procs_arm_neon[] = { 1811a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com NULL, // S32_Opaque, 1812fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com S32_Blend_BlitRow32_neon, // S32_Blend, 1813c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* 1814c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com * We have two choices for S32A_Opaque procs. The one reads the src alpha 1815c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com * value and attempts to optimize accordingly. The optimization is 1816c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com * sensitive to the source content and is not a win in all cases. For 1817c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com * example, if there are a lot of transitions between the alpha states, 1818c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com * the performance will almost certainly be worse. However, for many 1819c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com * common cases the performance is equivalent or better than the standard 1820c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com * case where we do not inspect the src alpha. 1821c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com */ 1822c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com#if SK_A32_SHIFT == 24 1823c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com // This proc assumes the alpha value occupies bits 24-32 of each SkPMColor 1824c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com S32A_Opaque_BlitRow32_neon_src_alpha, // S32A_Opaque, 1825c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com#else 1826c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com S32A_Opaque_BlitRow32_neon, // S32A_Opaque, 1827c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com#endif 18283a2682a77f996f649de7699c9f7bee046c6d4f17mtklein#ifdef SK_CPU_ARM32 18291fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org S32A_Blend_BlitRow32_neon // S32A_Blend 1830866b95d65dfc01af372bbed206ec067e04c1f533kevin.petit#else 1831866b95d65dfc01af372bbed206ec067e04c1f533kevin.petit NULL 1832866b95d65dfc01af372bbed206ec067e04c1f533kevin.petit#endif 1833a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com}; 1834