1a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com/* 2a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com * Copyright 2012 The Android Open Source Project 3a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com * 4a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com * Use of this source code is governed by a BSD-style license that can be 5a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com * found in the LICENSE file. 6a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com */ 7a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 8a111e492b84c312a6bd5d5d9ef100dca48f4941ddjsollen@google.com#include "SkBlitRow_opts_arm_neon.h" 9a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 10a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#include "SkBlitMask.h" 11a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#include "SkBlitRow.h" 12a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#include "SkColorPriv.h" 13a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#include "SkDither.h" 14a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#include "SkMathPriv.h" 15a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#include "SkUtils.h" 16a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 170060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org#include "SkColor_opts_neon.h" 18a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#include <arm_neon.h> 19a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 20ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#ifdef SK_CPU_ARM64 21ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petitstatic inline uint8x8x4_t sk_vld4_u8_arm64_3(const SkPMColor* SK_RESTRICT & src) { 22ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit uint8x8x4_t vsrc; 23ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit uint8x8_t vsrc_0, vsrc_1, vsrc_2; 24ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit 25ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit asm ( 26ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit "ld4 {v0.8b - v3.8b}, [%[src]], #32 \t\n" 27ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit "mov %[vsrc0].8b, v0.8b \t\n" 28ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit "mov %[vsrc1].8b, v1.8b \t\n" 29ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit "mov %[vsrc2].8b, v2.8b \t\n" 30ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit : [vsrc0] "=w" (vsrc_0), [vsrc1] "=w" (vsrc_1), 31ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit [vsrc2] "=w" (vsrc_2), [src] "+&r" (src) 32ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit : : "v0", "v1", "v2", "v3" 33ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit ); 34ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit 35ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[0] = vsrc_0; 36ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[1] = vsrc_1; 37ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[2] = vsrc_2; 38ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit 39ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit return vsrc; 40ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit} 41ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit 42ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petitstatic inline uint8x8x4_t sk_vld4_u8_arm64_4(const SkPMColor* SK_RESTRICT & src) { 43ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit uint8x8x4_t vsrc; 44ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit uint8x8_t vsrc_0, vsrc_1, vsrc_2, vsrc_3; 45ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit 46ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit asm ( 47ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit "ld4 {v0.8b - v3.8b}, [%[src]], #32 \t\n" 48ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit "mov %[vsrc0].8b, v0.8b \t\n" 49ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit "mov %[vsrc1].8b, v1.8b \t\n" 50ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit "mov %[vsrc2].8b, v2.8b \t\n" 51ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit "mov %[vsrc3].8b, v3.8b \t\n" 52ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit : [vsrc0] "=w" (vsrc_0), [vsrc1] "=w" (vsrc_1), 53ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit [vsrc2] "=w" (vsrc_2), [vsrc3] "=w" (vsrc_3), 54ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit [src] "+&r" (src) 55ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit : : "v0", "v1", "v2", "v3" 56ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit ); 57ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit 58ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[0] = vsrc_0; 59ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[1] = vsrc_1; 60ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[2] = vsrc_2; 61ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[3] = vsrc_3; 62ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit 63ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit return vsrc; 64ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit} 65ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#endif 66ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit 670060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.orgvoid S32_D565_Opaque_neon(uint16_t* SK_RESTRICT dst, 680060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org const SkPMColor* SK_RESTRICT src, int count, 690060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org U8CPU alpha, int /*x*/, int /*y*/) { 700060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org SkASSERT(255 == alpha); 710060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org 720060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org while (count >= 8) { 730060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org uint8x8x4_t vsrc; 740060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org uint16x8_t vdst; 750060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org 760060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org // Load 77ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#ifdef SK_CPU_ARM64 78ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc = sk_vld4_u8_arm64_3(src); 79ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#else 800060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org vsrc = vld4_u8((uint8_t*)src); 81ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit src += 8; 82ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#endif 830060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org 840060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org // Convert src to 565 85bc25dfc798fff225ce65355ecda19d2b85bd0e74commit-bot@chromium.org vdst = SkPixel32ToPixel16_neon8(vsrc); 860060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org 870060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org // Store 880060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org vst1q_u16(dst, vdst); 890060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org 900060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org // Prepare next iteration 910060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org dst += 8; 920060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org count -= 8; 930060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org }; 940060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org 950060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org // Leftovers 960060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org while (count > 0) { 970060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org SkPMColor c = *src++; 980060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org SkPMColorAssert(c); 990060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org *dst = SkPixel32ToPixel16_ToU16(c); 1000060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org dst++; 1010060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org count--; 1020060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org }; 1030060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org} 1040060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org 10595c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.orgvoid S32_D565_Blend_neon(uint16_t* SK_RESTRICT dst, 10695c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org const SkPMColor* SK_RESTRICT src, int count, 10795c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org U8CPU alpha, int /*x*/, int /*y*/) { 10895c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org SkASSERT(255 > alpha); 10995c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 11095c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org uint16x8_t vmask_blue, vscale; 11195c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 11295c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org // prepare constants 11395c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vscale = vdupq_n_u16(SkAlpha255To256(alpha)); 11495c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vmask_blue = vmovq_n_u16(0x1F); 11595c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 11695c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org while (count >= 8) { 117ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit uint8x8x4_t vsrc; 11895c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org uint16x8_t vdst, vdst_r, vdst_g, vdst_b; 11995c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org uint16x8_t vres_r, vres_g, vres_b; 12095c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 12195c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org // Load src 122ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#ifdef SK_CPU_ARM64 123ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc = sk_vld4_u8_arm64_3(src); 124ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#else 12595c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org { 12695c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org register uint8x8_t d0 asm("d0"); 12795c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org register uint8x8_t d1 asm("d1"); 12895c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org register uint8x8_t d2 asm("d2"); 12995c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org register uint8x8_t d3 asm("d3"); 13095c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 13195c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org asm ( 13295c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org "vld4.8 {d0-d3},[%[src]]!" 13395c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3), [src] "+&r" (src) 13495c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org : 13595c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org ); 136ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[0] = d0; 137ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[1] = d1; 138ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[2] = d2; 13995c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org } 140ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#endif 14195c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 14295c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org // Load and unpack dst 14395c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vdst = vld1q_u16(dst); 14495c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vdst_g = vshlq_n_u16(vdst, 5); // shift green to top of lanes 14595c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vdst_b = vandq_u16(vdst, vmask_blue); // extract blue 14695c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vdst_r = vshrq_n_u16(vdst, 6+5); // extract red 14795c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vdst_g = vshrq_n_u16(vdst_g, 5+5); // extract green 14895c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 149ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit // Shift src to 565 range 150ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[NEON_R] = vshr_n_u8(vsrc.val[NEON_R], 3); 151ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[NEON_G] = vshr_n_u8(vsrc.val[NEON_G], 2); 152ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[NEON_B] = vshr_n_u8(vsrc.val[NEON_B], 3); 15395c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 15495c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org // Scale src - dst 155ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vres_r = vmovl_u8(vsrc.val[NEON_R]) - vdst_r; 156ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vres_g = vmovl_u8(vsrc.val[NEON_G]) - vdst_g; 157ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vres_b = vmovl_u8(vsrc.val[NEON_B]) - vdst_b; 15895c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 15995c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vres_r = vshrq_n_u16(vres_r * vscale, 8); 16095c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vres_g = vshrq_n_u16(vres_g * vscale, 8); 16195c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vres_b = vshrq_n_u16(vres_b * vscale, 8); 16295c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 16395c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vres_r += vdst_r; 16495c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vres_g += vdst_g; 16595c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vres_b += vdst_b; 16695c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 16795c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org // Combine 16895c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vres_b = vsliq_n_u16(vres_b, vres_g, 5); // insert green into blue 16995c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vres_b = vsliq_n_u16(vres_b, vres_r, 6+5); // insert red into green/blue 17095c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 17195c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org // Store 17295c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vst1q_u16(dst, vres_b); 17395c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org dst += 8; 17495c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org count -= 8; 17595c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org } 17695c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org if (count > 0) { 17795c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org int scale = SkAlpha255To256(alpha); 17895c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org do { 17995c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org SkPMColor c = *src++; 18095c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org SkPMColorAssert(c); 18195c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org uint16_t d = *dst; 18295c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org *dst++ = SkPackRGB16( 18395c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org SkAlphaBlend(SkPacked32ToR16(c), SkGetPackedR16(d), scale), 18495c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org SkAlphaBlend(SkPacked32ToG16(c), SkGetPackedG16(d), scale), 18595c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org SkAlphaBlend(SkPacked32ToB16(c), SkGetPackedB16(d), scale)); 18695c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org } while (--count != 0); 18795c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org } 18895c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org} 18995c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 190ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#ifdef SK_CPU_ARM32 191a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comvoid S32A_D565_Opaque_neon(uint16_t* SK_RESTRICT dst, 192a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com const SkPMColor* SK_RESTRICT src, int count, 193a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com U8CPU alpha, int /*x*/, int /*y*/) { 194a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkASSERT(255 == alpha); 195a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 196a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com if (count >= 8) { 197efbe8e9bedda21a3e061ebf3d96431a0f250a654djsollen@google.com uint16_t* SK_RESTRICT keep_dst = 0; 198fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 199a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com asm volatile ( 200a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "ands ip, %[count], #7 \n\t" 201a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmov.u8 d31, #1<<7 \n\t" 202a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vld1.16 {q12}, [%[dst]] \n\t" 203a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vld4.8 {d0-d3}, [%[src]] \n\t" 204a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // Thumb does not support the standard ARM conditional 205a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // instructions but instead requires the 'it' instruction 206a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // to signal conditional execution 207a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "it eq \n\t" 208a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "moveq ip, #8 \n\t" 209a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "mov %[keep_dst], %[dst] \n\t" 210fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 211a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "add %[src], %[src], ip, LSL#2 \n\t" 212a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "add %[dst], %[dst], ip, LSL#1 \n\t" 213a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "subs %[count], %[count], ip \n\t" 214a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "b 9f \n\t" 215a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // LOOP 216a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "2: \n\t" 217fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 218a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vld1.16 {q12}, [%[dst]]! \n\t" 219a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vld4.8 {d0-d3}, [%[src]]! \n\t" 220a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vst1.16 {q10}, [%[keep_dst]] \n\t" 221a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "sub %[keep_dst], %[dst], #8*2 \n\t" 222a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "subs %[count], %[count], #8 \n\t" 223a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "9: \n\t" 224a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "pld [%[dst],#32] \n\t" 225a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // expand 0565 q12 to 8888 {d4-d7} 226a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmovn.u16 d4, q12 \n\t" 227a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshr.u16 q11, q12, #5 \n\t" 228a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshr.u16 q10, q12, #6+5 \n\t" 229a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmovn.u16 d5, q11 \n\t" 230a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmovn.u16 d6, q10 \n\t" 231a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshl.u8 d4, d4, #3 \n\t" 232a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshl.u8 d5, d5, #2 \n\t" 233a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshl.u8 d6, d6, #3 \n\t" 234fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 235a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmovl.u8 q14, d31 \n\t" 236a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmovl.u8 q13, d31 \n\t" 237a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmovl.u8 q12, d31 \n\t" 238fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 239a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // duplicate in 4/2/1 & 8pix vsns 240a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmvn.8 d30, d3 \n\t" 241a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmlal.u8 q14, d30, d6 \n\t" 242a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmlal.u8 q13, d30, d5 \n\t" 243a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmlal.u8 q12, d30, d4 \n\t" 244a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshr.u16 q8, q14, #5 \n\t" 245a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshr.u16 q9, q13, #6 \n\t" 246a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vaddhn.u16 d6, q14, q8 \n\t" 247a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshr.u16 q8, q12, #5 \n\t" 248a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vaddhn.u16 d5, q13, q9 \n\t" 249a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vqadd.u8 d6, d6, d0 \n\t" // moved up 250a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vaddhn.u16 d4, q12, q8 \n\t" 251a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // intentionally don't calculate alpha 252a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // result in d4-d6 253fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 254a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vqadd.u8 d5, d5, d1 \n\t" 255a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vqadd.u8 d4, d4, d2 \n\t" 256fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 257a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // pack 8888 {d4-d6} to 0565 q10 258a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshll.u8 q10, d6, #8 \n\t" 259a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshll.u8 q3, d5, #8 \n\t" 260a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshll.u8 q2, d4, #8 \n\t" 261a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vsri.u16 q10, q3, #5 \n\t" 262a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vsri.u16 q10, q2, #11 \n\t" 263fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 264a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "bne 2b \n\t" 265fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 266a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "1: \n\t" 267a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vst1.16 {q10}, [%[keep_dst]] \n\t" 268a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com : [count] "+r" (count) 269fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com : [dst] "r" (dst), [keep_dst] "r" (keep_dst), [src] "r" (src) 270a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com : "ip", "cc", "memory", "d0","d1","d2","d3","d4","d5","d6","d7", 271a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "d16","d17","d18","d19","d20","d21","d22","d23","d24","d25","d26","d27","d28","d29", 272a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "d30","d31" 273a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com ); 274a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 275fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com else 276a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com { // handle count < 8 277efbe8e9bedda21a3e061ebf3d96431a0f250a654djsollen@google.com uint16_t* SK_RESTRICT keep_dst = 0; 278fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 279a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com asm volatile ( 280a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmov.u8 d31, #1<<7 \n\t" 281a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "mov %[keep_dst], %[dst] \n\t" 282fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 283a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "tst %[count], #4 \n\t" 284a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "beq 14f \n\t" 285a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vld1.16 {d25}, [%[dst]]! \n\t" 286a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vld1.32 {q1}, [%[src]]! \n\t" 287fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 288a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "14: \n\t" 289a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "tst %[count], #2 \n\t" 290a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "beq 12f \n\t" 291a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vld1.32 {d24[1]}, [%[dst]]! \n\t" 292a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vld1.32 {d1}, [%[src]]! \n\t" 293fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 294a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "12: \n\t" 295a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "tst %[count], #1 \n\t" 296a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "beq 11f \n\t" 297a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vld1.16 {d24[1]}, [%[dst]]! \n\t" 298a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vld1.32 {d0[1]}, [%[src]]! \n\t" 299fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 300a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "11: \n\t" 301a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // unzips achieve the same as a vld4 operation 302a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vuzpq.u16 q0, q1 \n\t" 303a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vuzp.u8 d0, d1 \n\t" 304a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vuzp.u8 d2, d3 \n\t" 305a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // expand 0565 q12 to 8888 {d4-d7} 306a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmovn.u16 d4, q12 \n\t" 307a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshr.u16 q11, q12, #5 \n\t" 308a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshr.u16 q10, q12, #6+5 \n\t" 309a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmovn.u16 d5, q11 \n\t" 310a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmovn.u16 d6, q10 \n\t" 311a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshl.u8 d4, d4, #3 \n\t" 312a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshl.u8 d5, d5, #2 \n\t" 313a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshl.u8 d6, d6, #3 \n\t" 314fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 315a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmovl.u8 q14, d31 \n\t" 316a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmovl.u8 q13, d31 \n\t" 317a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmovl.u8 q12, d31 \n\t" 318fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 319a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // duplicate in 4/2/1 & 8pix vsns 320a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmvn.8 d30, d3 \n\t" 321a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmlal.u8 q14, d30, d6 \n\t" 322a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmlal.u8 q13, d30, d5 \n\t" 323a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmlal.u8 q12, d30, d4 \n\t" 324a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshr.u16 q8, q14, #5 \n\t" 325a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshr.u16 q9, q13, #6 \n\t" 326a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vaddhn.u16 d6, q14, q8 \n\t" 327a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshr.u16 q8, q12, #5 \n\t" 328a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vaddhn.u16 d5, q13, q9 \n\t" 329a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vqadd.u8 d6, d6, d0 \n\t" // moved up 330a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vaddhn.u16 d4, q12, q8 \n\t" 331a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // intentionally don't calculate alpha 332a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // result in d4-d6 333fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 334a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vqadd.u8 d5, d5, d1 \n\t" 335a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vqadd.u8 d4, d4, d2 \n\t" 336fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 337a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // pack 8888 {d4-d6} to 0565 q10 338a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshll.u8 q10, d6, #8 \n\t" 339a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshll.u8 q3, d5, #8 \n\t" 340a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshll.u8 q2, d4, #8 \n\t" 341a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vsri.u16 q10, q3, #5 \n\t" 342a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vsri.u16 q10, q2, #11 \n\t" 343fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 344a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // store 345a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "tst %[count], #4 \n\t" 346a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "beq 24f \n\t" 347a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vst1.16 {d21}, [%[keep_dst]]! \n\t" 348fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 349a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "24: \n\t" 350a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "tst %[count], #2 \n\t" 351a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "beq 22f \n\t" 352a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vst1.32 {d20[1]}, [%[keep_dst]]! \n\t" 353fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 354a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "22: \n\t" 355a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "tst %[count], #1 \n\t" 356a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "beq 21f \n\t" 357a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vst1.16 {d20[1]}, [%[keep_dst]]! \n\t" 358fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 359a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "21: \n\t" 360a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com : [count] "+r" (count) 361a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com : [dst] "r" (dst), [keep_dst] "r" (keep_dst), [src] "r" (src) 362a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com : "ip", "cc", "memory", "d0","d1","d2","d3","d4","d5","d6","d7", 363a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "d16","d17","d18","d19","d20","d21","d22","d23","d24","d25","d26","d27","d28","d29", 364a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "d30","d31" 365a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com ); 366a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 367a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com} 368ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#endif 369a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 370be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.orgstatic inline uint16x8_t SkDiv255Round_neon8(uint16x8_t prod) { 371be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org prod += vdupq_n_u16(128); 372be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org prod += vshrq_n_u16(prod, 8); 373be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org return vshrq_n_u16(prod, 8); 374be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org} 375be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org 376a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comvoid S32A_D565_Blend_neon(uint16_t* SK_RESTRICT dst, 377a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com const SkPMColor* SK_RESTRICT src, int count, 378a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com U8CPU alpha, int /*x*/, int /*y*/) { 379be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org SkASSERT(255 > alpha); 380a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 381be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org /* This code implements a Neon version of S32A_D565_Blend. The results have 382be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org * a few mismatches compared to the original code. These mismatches never 383be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org * exceed 1. 384a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com */ 385fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 386be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org if (count >= 8) { 387be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org uint16x8_t valpha_max, vmask_blue; 388be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org uint8x8_t valpha; 389be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org 390be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // prepare constants 391be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org valpha_max = vmovq_n_u16(255); 392be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org valpha = vdup_n_u8(alpha); 393be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vmask_blue = vmovq_n_u16(SK_B16_MASK); 394be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org 395be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org do { 396be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org uint16x8_t vdst, vdst_r, vdst_g, vdst_b; 397be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org uint16x8_t vres_a, vres_r, vres_g, vres_b; 398be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org uint8x8x4_t vsrc; 399be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org 400be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // load pixels 401be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vdst = vld1q_u16(dst); 402ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#ifdef SK_CPU_ARM64 403ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc = sk_vld4_u8_arm64_4(src); 404ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#else 405be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org#if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6)) 406be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org asm ( 407be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org "vld4.u8 %h[vsrc], [%[src]]!" 408be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org : [vsrc] "=w" (vsrc), [src] "+&r" (src) 409be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org : : 410be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org ); 411a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#else 412be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org register uint8x8_t d0 asm("d0"); 413be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org register uint8x8_t d1 asm("d1"); 414be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org register uint8x8_t d2 asm("d2"); 415be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org register uint8x8_t d3 asm("d3"); 416be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org 417be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org asm volatile ( 418be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org "vld4.u8 {d0-d3},[%[src]]!;" 419be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3), 420be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org [src] "+&r" (src) 421be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org : : 422be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org ); 423be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vsrc.val[0] = d0; 424be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vsrc.val[1] = d1; 425be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vsrc.val[2] = d2; 426be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vsrc.val[3] = d3; 427a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#endif 428ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#endif // #ifdef SK_CPU_ARM64 429fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 430fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 431be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // deinterleave dst 432be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vdst_g = vshlq_n_u16(vdst, SK_R16_BITS); // shift green to top of lanes 433be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vdst_b = vdst & vmask_blue; // extract blue 434be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vdst_r = vshrq_n_u16(vdst, SK_R16_SHIFT); // extract red 435be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vdst_g = vshrq_n_u16(vdst_g, SK_R16_BITS + SK_B16_BITS); // extract green 436be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org 437be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // shift src to 565 438be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vsrc.val[NEON_R] = vshr_n_u8(vsrc.val[NEON_R], 8 - SK_R16_BITS); 439be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vsrc.val[NEON_G] = vshr_n_u8(vsrc.val[NEON_G], 8 - SK_G16_BITS); 440be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vsrc.val[NEON_B] = vshr_n_u8(vsrc.val[NEON_B], 8 - SK_B16_BITS); 441be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org 442be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // calc src * src_scale 443be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_a = vmull_u8(vsrc.val[NEON_A], valpha); 444be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_r = vmull_u8(vsrc.val[NEON_R], valpha); 445be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_g = vmull_u8(vsrc.val[NEON_G], valpha); 446be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_b = vmull_u8(vsrc.val[NEON_B], valpha); 447be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org 448be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // prepare dst_scale 449be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_a = SkDiv255Round_neon8(vres_a); 450be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_a = valpha_max - vres_a; // 255 - (sa * src_scale) / 255 451be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org 452be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // add dst * dst_scale to previous result 453be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_r = vmlaq_u16(vres_r, vdst_r, vres_a); 454be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_g = vmlaq_u16(vres_g, vdst_g, vres_a); 455be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_b = vmlaq_u16(vres_b, vdst_b, vres_a); 456be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org 457be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org#ifdef S32A_D565_BLEND_EXACT 458be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // It is possible to get exact results with this but it is slow, 459be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // even slower than C code in some cases 460be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_r = SkDiv255Round_neon8(vres_r); 461be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_g = SkDiv255Round_neon8(vres_g); 462be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_b = SkDiv255Round_neon8(vres_b); 463be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org#else 464be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_r = vrshrq_n_u16(vres_r, 8); 465be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_g = vrshrq_n_u16(vres_g, 8); 466be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_b = vrshrq_n_u16(vres_b, 8); 467be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org#endif 468be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // pack result 469be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_b = vsliq_n_u16(vres_b, vres_g, SK_G16_SHIFT); // insert green into blue 470be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_b = vsliq_n_u16(vres_b, vres_r, SK_R16_SHIFT); // insert red into green/blue 471be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org 472be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // store 473be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vst1q_u16(dst, vres_b); 474be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org dst += 8; 475be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org count -= 8; 476be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org } while (count >= 8); 477be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org } 478a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 479be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // leftovers 480be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org while (count-- > 0) { 481be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org SkPMColor sc = *src++; 482be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org if (sc) { 483be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org uint16_t dc = *dst; 484be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org unsigned dst_scale = 255 - SkMulDiv255Round(SkGetPackedA32(sc), alpha); 485be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org unsigned dr = SkMulS16(SkPacked32ToR16(sc), alpha) + SkMulS16(SkGetPackedR16(dc), dst_scale); 486be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org unsigned dg = SkMulS16(SkPacked32ToG16(sc), alpha) + SkMulS16(SkGetPackedG16(dc), dst_scale); 487be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org unsigned db = SkMulS16(SkPacked32ToB16(sc), alpha) + SkMulS16(SkGetPackedB16(dc), dst_scale); 488be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org *dst = SkPackRGB16(SkDiv255Round(dr), SkDiv255Round(dg), SkDiv255Round(db)); 489be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org } 490be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org dst += 1; 491a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 492a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com} 493a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 494a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com/* dither matrix for Neon, derived from gDitherMatrix_3Bit_16. 495a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com * each dither value is spaced out into byte lanes, and repeated 496a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com * to allow an 8-byte load from offsets 0, 1, 2 or 3 from the 497a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com * start of each row. 498a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com */ 499a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comstatic const uint8_t gDitherMatrix_Neon[48] = { 500a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 0, 4, 1, 5, 0, 4, 1, 5, 0, 4, 1, 5, 501a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 6, 2, 7, 3, 6, 2, 7, 3, 6, 2, 7, 3, 502a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1, 5, 0, 4, 1, 5, 0, 4, 1, 5, 0, 4, 503a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 7, 3, 6, 2, 7, 3, 6, 2, 7, 3, 6, 2, 504fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 505a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com}; 506a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 507a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comvoid S32_D565_Blend_Dither_neon(uint16_t *dst, const SkPMColor *src, 508a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com int count, U8CPU alpha, int x, int y) 509a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com{ 510fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 5114cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org SkASSERT(255 > alpha); 5124cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 5134cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org // rescale alpha to range 1 - 256 514a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com int scale = SkAlpha255To256(alpha); 515fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 5164cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org if (count >= 8) { 5174cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org /* select row and offset for dither array */ 5184cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org const uint8_t *dstart = &gDitherMatrix_Neon[(y&3)*12 + (x&3)]; 519fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 5204cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org uint8x8_t vdither = vld1_u8(dstart); // load dither values 5214cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org uint8x8_t vdither_g = vshr_n_u8(vdither, 1); // calc. green dither values 522fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 5234cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org int16x8_t vscale = vdupq_n_s16(scale); // duplicate scale into neon reg 5244cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org uint16x8_t vmask_b = vdupq_n_u16(0x1F); // set up blue mask 525fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 5264cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org do { 5274cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 528ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit uint8x8x4_t vsrc; 5294cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org uint8x8_t vsrc_r, vsrc_g, vsrc_b; 5304cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org uint8x8_t vsrc565_r, vsrc565_g, vsrc565_b; 5314cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org uint16x8_t vsrc_dit_r, vsrc_dit_g, vsrc_dit_b; 5324cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org uint16x8_t vsrc_res_r, vsrc_res_g, vsrc_res_b; 5334cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org uint16x8_t vdst; 5344cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org uint16x8_t vdst_r, vdst_g, vdst_b; 5354cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org int16x8_t vres_r, vres_g, vres_b; 5364cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org int8x8_t vres8_r, vres8_g, vres8_b; 5374cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 5384cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org // Load source and add dither 539ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#ifdef SK_CPU_ARM64 540ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc = sk_vld4_u8_arm64_3(src); 541ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#else 5424cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org { 5434cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org register uint8x8_t d0 asm("d0"); 5444cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org register uint8x8_t d1 asm("d1"); 5454cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org register uint8x8_t d2 asm("d2"); 5464cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org register uint8x8_t d3 asm("d3"); 5474cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 5484cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org asm ( 549ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit "vld4.8 {d0-d3},[%[src]]! " 5504cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3), [src] "+&r" (src) 5514cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org : 5524cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org ); 553ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[0] = d0; 554ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[1] = d1; 555ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[2] = d2; 5564cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org } 557ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#endif 558ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc_r = vsrc.val[NEON_R]; 559ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc_g = vsrc.val[NEON_G]; 560ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc_b = vsrc.val[NEON_B]; 5614cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 5624cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc565_g = vshr_n_u8(vsrc_g, 6); // calc. green >> 6 5634cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc565_r = vshr_n_u8(vsrc_r, 5); // calc. red >> 5 5644cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc565_b = vshr_n_u8(vsrc_b, 5); // calc. blue >> 5 5654cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 5664cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc_dit_g = vaddl_u8(vsrc_g, vdither_g); // add in dither to green and widen 5674cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc_dit_r = vaddl_u8(vsrc_r, vdither); // add in dither to red and widen 5684cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc_dit_b = vaddl_u8(vsrc_b, vdither); // add in dither to blue and widen 5694cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 5704cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc_dit_r = vsubw_u8(vsrc_dit_r, vsrc565_r); // sub shifted red from result 5714cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc_dit_g = vsubw_u8(vsrc_dit_g, vsrc565_g); // sub shifted green from result 5724cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc_dit_b = vsubw_u8(vsrc_dit_b, vsrc565_b); // sub shifted blue from result 5734cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 5744cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc_res_r = vshrq_n_u16(vsrc_dit_r, 3); 5754cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc_res_g = vshrq_n_u16(vsrc_dit_g, 2); 5764cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc_res_b = vshrq_n_u16(vsrc_dit_b, 3); 5774cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 5784cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org // Load dst and unpack 5794cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vdst = vld1q_u16(dst); 5804cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vdst_g = vshrq_n_u16(vdst, 5); // shift down to get green 5814cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vdst_r = vshrq_n_u16(vshlq_n_u16(vdst, 5), 5+5); // double shift to extract red 5824cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vdst_b = vandq_u16(vdst, vmask_b); // mask to get blue 5834cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 5844cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org // subtract dst from src and widen 5854cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres_r = vsubq_s16(vreinterpretq_s16_u16(vsrc_res_r), vreinterpretq_s16_u16(vdst_r)); 5864cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres_g = vsubq_s16(vreinterpretq_s16_u16(vsrc_res_g), vreinterpretq_s16_u16(vdst_g)); 5874cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres_b = vsubq_s16(vreinterpretq_s16_u16(vsrc_res_b), vreinterpretq_s16_u16(vdst_b)); 5884cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 5894cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org // multiply diffs by scale and shift 5904cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres_r = vmulq_s16(vres_r, vscale); 5914cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres_g = vmulq_s16(vres_g, vscale); 5924cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres_b = vmulq_s16(vres_b, vscale); 5934cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 5944cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres8_r = vshrn_n_s16(vres_r, 8); 5954cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres8_g = vshrn_n_s16(vres_g, 8); 5964cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres8_b = vshrn_n_s16(vres_b, 8); 5974cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 5984cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org // add dst to result 5994cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres_r = vaddw_s8(vreinterpretq_s16_u16(vdst_r), vres8_r); 6004cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres_g = vaddw_s8(vreinterpretq_s16_u16(vdst_g), vres8_g); 6014cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres_b = vaddw_s8(vreinterpretq_s16_u16(vdst_b), vres8_b); 6024cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 6034cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org // put result into 565 format 6044cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres_b = vsliq_n_s16(vres_b, vres_g, 5); // shift up green and insert into blue 6054cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres_b = vsliq_n_s16(vres_b, vres_r, 6+5); // shift up red and insert into blue 6064cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 6074cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org // Store result 6084cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vst1q_u16(dst, vreinterpretq_u16_s16(vres_b)); 6094cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 6104cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org // Next iteration 6114cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org dst += 8; 6124cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org count -= 8; 6134cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 6144cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org } while (count >= 8); 6154cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org } 6164cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 6174cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org // Leftovers 6184cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org if (count > 0) { 6194cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org int scale = SkAlpha255To256(alpha); 6204cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org DITHER_565_SCAN(y); 6214cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org do { 6224cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org SkPMColor c = *src++; 6234cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org SkPMColorAssert(c); 6244cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 6254cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org int dither = DITHER_VALUE(x); 6264cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org int sr = SkGetPackedR32(c); 6274cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org int sg = SkGetPackedG32(c); 6284cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org int sb = SkGetPackedB32(c); 6294cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org sr = SkDITHER_R32To565(sr, dither); 6304cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org sg = SkDITHER_G32To565(sg, dither); 6314cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org sb = SkDITHER_B32To565(sb, dither); 6324cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 6334cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org uint16_t d = *dst; 6344cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org *dst++ = SkPackRGB16(SkAlphaBlend(sr, SkGetPackedR16(d), scale), 6354cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org SkAlphaBlend(sg, SkGetPackedG16(d), scale), 6364cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org SkAlphaBlend(sb, SkGetPackedB16(d), scale)); 6374cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org DITHER_INC_X(x); 6384cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org } while (--count != 0); 639a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 640a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com} 641a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 642a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comvoid S32A_Opaque_BlitRow32_neon(SkPMColor* SK_RESTRICT dst, 643a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com const SkPMColor* SK_RESTRICT src, 644a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com int count, U8CPU alpha) { 645a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 646a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkASSERT(255 == alpha); 647a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com if (count > 0) { 648a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 649a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 650fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint8x8_t alpha_mask; 651a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 652fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com static const uint8_t alpha_mask_setup[] = {3,3,3,3,7,7,7,7}; 653fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com alpha_mask = vld1_u8(alpha_mask_setup); 654a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 655fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* do the NEON unrolled code */ 656fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#define UNROLL 4 657fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com while (count >= UNROLL) { 658fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint8x8_t src_raw, dst_raw, dst_final; 659fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint8x8_t src_raw_2, dst_raw_2, dst_final_2; 660a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 6610a5699ee482c3b5ef1e857de8a2de06c6a1fa298commit-bot@chromium.org /* The two prefetches below may make the code slighlty 6620a5699ee482c3b5ef1e857de8a2de06c6a1fa298commit-bot@chromium.org * slower for small values of count but are worth having 6630a5699ee482c3b5ef1e857de8a2de06c6a1fa298commit-bot@chromium.org * in the general case. 6640a5699ee482c3b5ef1e857de8a2de06c6a1fa298commit-bot@chromium.org */ 6650a5699ee482c3b5ef1e857de8a2de06c6a1fa298commit-bot@chromium.org __builtin_prefetch(src+32); 6660a5699ee482c3b5ef1e857de8a2de06c6a1fa298commit-bot@chromium.org __builtin_prefetch(dst+32); 6670a5699ee482c3b5ef1e857de8a2de06c6a1fa298commit-bot@chromium.org 668fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* get the source */ 669fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com src_raw = vreinterpret_u8_u32(vld1_u32(src)); 670fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#if UNROLL > 2 671fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com src_raw_2 = vreinterpret_u8_u32(vld1_u32(src+2)); 672a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#endif 673a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 674fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* get and hold the dst too */ 675fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst_raw = vreinterpret_u8_u32(vld1_u32(dst)); 676fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#if UNROLL > 2 677fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst_raw_2 = vreinterpret_u8_u32(vld1_u32(dst+2)); 678a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#endif 679a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 680fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* 1st and 2nd bits of the unrolling */ 681fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com { 682fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint8x8_t dst_cooked; 683fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint16x8_t dst_wide; 684fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint8x8_t alpha_narrow; 685fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint16x8_t alpha_wide; 686a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 687fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* get the alphas spread out properly */ 688fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com alpha_narrow = vtbl1_u8(src_raw, alpha_mask); 689fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com alpha_wide = vsubw_u8(vdupq_n_u16(256), alpha_narrow); 690a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 691fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* spread the dest */ 692fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst_wide = vmovl_u8(dst_raw); 693a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 694fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* alpha mul the dest */ 695fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst_wide = vmulq_u16 (dst_wide, alpha_wide); 696fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst_cooked = vshrn_n_u16(dst_wide, 8); 697a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 698fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* sum -- ignoring any byte lane overflows */ 699fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst_final = vadd_u8(src_raw, dst_cooked); 700fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com } 701a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 702fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#if UNROLL > 2 703fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* the 3rd and 4th bits of our unrolling */ 704fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com { 705fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint8x8_t dst_cooked; 706fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint16x8_t dst_wide; 707fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint8x8_t alpha_narrow; 708fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint16x8_t alpha_wide; 709a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 710fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com alpha_narrow = vtbl1_u8(src_raw_2, alpha_mask); 711fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com alpha_wide = vsubw_u8(vdupq_n_u16(256), alpha_narrow); 712a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 713fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* spread the dest */ 714fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst_wide = vmovl_u8(dst_raw_2); 715a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 716fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* alpha mul the dest */ 717fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst_wide = vmulq_u16 (dst_wide, alpha_wide); 718fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst_cooked = vshrn_n_u16(dst_wide, 8); 719a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 720fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* sum -- ignoring any byte lane overflows */ 721fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst_final_2 = vadd_u8(src_raw_2, dst_cooked); 722fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com } 723a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#endif 724a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 725fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com vst1_u32(dst, vreinterpret_u32_u8(dst_final)); 726fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#if UNROLL > 2 727fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com vst1_u32(dst+2, vreinterpret_u32_u8(dst_final_2)); 728a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#endif 729a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 730fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com src += UNROLL; 731fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst += UNROLL; 732fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com count -= UNROLL; 733fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com } 734fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#undef UNROLL 735a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 736fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* do any residual iterations */ 737a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com while (--count >= 0) { 738a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com *dst = SkPMSrcOver(*src, *dst); 739a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com src += 1; 740a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com dst += 1; 741a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 742a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 743a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com} 744a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 745c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.comvoid S32A_Opaque_BlitRow32_neon_src_alpha(SkPMColor* SK_RESTRICT dst, 746c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com const SkPMColor* SK_RESTRICT src, 747c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com int count, U8CPU alpha) { 748c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com SkASSERT(255 == alpha); 749c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 750c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if (count <= 0) 751c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com return; 752c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 753c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* Use these to check if src is transparent or opaque */ 754c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com const unsigned int ALPHA_OPAQ = 0xFF000000; 755c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com const unsigned int ALPHA_TRANS = 0x00FFFFFF; 756c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 757c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com#define UNROLL 4 758c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com const SkPMColor* SK_RESTRICT src_end = src + count - (UNROLL + 1); 759c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com const SkPMColor* SK_RESTRICT src_temp = src; 760c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 761c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* set up the NEON variables */ 762c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com uint8x8_t alpha_mask; 763c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com static const uint8_t alpha_mask_setup[] = {3,3,3,3,7,7,7,7}; 764c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com alpha_mask = vld1_u8(alpha_mask_setup); 765c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 766c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com uint8x8_t src_raw, dst_raw, dst_final; 767c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com uint8x8_t src_raw_2, dst_raw_2, dst_final_2; 768c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com uint8x8_t dst_cooked; 769c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com uint16x8_t dst_wide; 770c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com uint8x8_t alpha_narrow; 771c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com uint16x8_t alpha_wide; 772c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 773c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* choose the first processing type */ 774c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if( src >= src_end) 775c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com goto TAIL; 776c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if(*src <= ALPHA_TRANS) 777c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com goto ALPHA_0; 778c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if(*src >= ALPHA_OPAQ) 779c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com goto ALPHA_255; 780c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* fall-thru */ 781c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 782c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.comALPHA_1_TO_254: 783c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com do { 784c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 785c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* get the source */ 786c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com src_raw = vreinterpret_u8_u32(vld1_u32(src)); 787c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com src_raw_2 = vreinterpret_u8_u32(vld1_u32(src+2)); 788c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 789c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* get and hold the dst too */ 790c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst_raw = vreinterpret_u8_u32(vld1_u32(dst)); 791c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst_raw_2 = vreinterpret_u8_u32(vld1_u32(dst+2)); 792c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 793c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 794c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* get the alphas spread out properly */ 795c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com alpha_narrow = vtbl1_u8(src_raw, alpha_mask); 796c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* reflect SkAlpha255To256() semantics a+1 vs a+a>>7 */ 797c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* we collapsed (255-a)+1 ... */ 798c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com alpha_wide = vsubw_u8(vdupq_n_u16(256), alpha_narrow); 799c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 800c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* spread the dest */ 801c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst_wide = vmovl_u8(dst_raw); 802c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 803c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* alpha mul the dest */ 804c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst_wide = vmulq_u16 (dst_wide, alpha_wide); 805c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst_cooked = vshrn_n_u16(dst_wide, 8); 806c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 807c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* sum -- ignoring any byte lane overflows */ 808c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst_final = vadd_u8(src_raw, dst_cooked); 809c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 810c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com alpha_narrow = vtbl1_u8(src_raw_2, alpha_mask); 811c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* reflect SkAlpha255To256() semantics a+1 vs a+a>>7 */ 812c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* we collapsed (255-a)+1 ... */ 813c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com alpha_wide = vsubw_u8(vdupq_n_u16(256), alpha_narrow); 814c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 815c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* spread the dest */ 816c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst_wide = vmovl_u8(dst_raw_2); 817c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 818c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* alpha mul the dest */ 819c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst_wide = vmulq_u16 (dst_wide, alpha_wide); 820c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst_cooked = vshrn_n_u16(dst_wide, 8); 821c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 822c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* sum -- ignoring any byte lane overflows */ 823c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst_final_2 = vadd_u8(src_raw_2, dst_cooked); 824c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 825c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com vst1_u32(dst, vreinterpret_u32_u8(dst_final)); 826c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com vst1_u32(dst+2, vreinterpret_u32_u8(dst_final_2)); 827c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 828c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com src += UNROLL; 829c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst += UNROLL; 830c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 831c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* if 2 of the next pixels aren't between 1 and 254 832c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com it might make sense to go to the optimized loops */ 833c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if((src[0] <= ALPHA_TRANS && src[1] <= ALPHA_TRANS) || (src[0] >= ALPHA_OPAQ && src[1] >= ALPHA_OPAQ)) 834c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com break; 835c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 836c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com } while(src < src_end); 837c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 838c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if (src >= src_end) 839c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com goto TAIL; 840c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 841c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if(src[0] >= ALPHA_OPAQ && src[1] >= ALPHA_OPAQ) 842c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com goto ALPHA_255; 843c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 844c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /*fall-thru*/ 845c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 846c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.comALPHA_0: 847c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 848c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /*In this state, we know the current alpha is 0 and 849c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com we optimize for the next alpha also being zero. */ 850c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com src_temp = src; //so we don't have to increment dst every time 851c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com do { 852c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if(*(++src) > ALPHA_TRANS) 853c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com break; 854c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if(*(++src) > ALPHA_TRANS) 855c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com break; 856c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if(*(++src) > ALPHA_TRANS) 857c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com break; 858c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if(*(++src) > ALPHA_TRANS) 859c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com break; 860c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com } while(src < src_end); 861c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 862c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst += (src - src_temp); 863c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 864c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* no longer alpha 0, so determine where to go next. */ 865c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if( src >= src_end) 866c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com goto TAIL; 867c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if(*src >= ALPHA_OPAQ) 868c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com goto ALPHA_255; 869c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com else 870c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com goto ALPHA_1_TO_254; 871c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 872c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.comALPHA_255: 873c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com while((src[0] & src[1] & src[2] & src[3]) >= ALPHA_OPAQ) { 874c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst[0]=src[0]; 875c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst[1]=src[1]; 876c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst[2]=src[2]; 877c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst[3]=src[3]; 878c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com src+=UNROLL; 879c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst+=UNROLL; 880c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if(src >= src_end) 881c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com goto TAIL; 882c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com } 883c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 884c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com //Handle remainder. 885c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if(*src >= ALPHA_OPAQ) { *dst++ = *src++; 886c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if(*src >= ALPHA_OPAQ) { *dst++ = *src++; 887c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if(*src >= ALPHA_OPAQ) { *dst++ = *src++; } 888c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com } 889c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com } 890c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 891c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if( src >= src_end) 892c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com goto TAIL; 893c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if(*src <= ALPHA_TRANS) 894c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com goto ALPHA_0; 895c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com else 896c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com goto ALPHA_1_TO_254; 897c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 898c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.comTAIL: 899c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* do any residual iterations */ 900c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com src_end += UNROLL + 1; //goto the real end 901c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com while(src != src_end) { 902c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if( *src != 0 ) { 903c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if( *src >= ALPHA_OPAQ ) { 904c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com *dst = *src; 905c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com } 906c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com else { 907c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com *dst = SkPMSrcOver(*src, *dst); 908c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com } 909c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com } 910c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com src++; 911c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst++; 912c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com } 913c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 914c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com#undef UNROLL 915c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com return; 916c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com} 917a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 918a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com/* Neon version of S32_Blend_BlitRow32() 919a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com * portable version is in src/core/SkBlitRow_D32.cpp 920a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com */ 921a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comvoid S32_Blend_BlitRow32_neon(SkPMColor* SK_RESTRICT dst, 922a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com const SkPMColor* SK_RESTRICT src, 923a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com int count, U8CPU alpha) { 924a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkASSERT(alpha <= 255); 925fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 926374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org if (count <= 0) { 927374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org return; 928374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org } 929dfff2737f8ad3e945a4dcbe175380d4b2a91a260commit-bot@chromium.org 930374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org uint16_t src_scale = SkAlpha255To256(alpha); 931374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org uint16_t dst_scale = 256 - src_scale; 932dfff2737f8ad3e945a4dcbe175380d4b2a91a260commit-bot@chromium.org 933374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org while (count >= 2) { 934374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org uint8x8_t vsrc, vdst, vres; 935374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org uint16x8_t vsrc_wide, vdst_wide; 936dfff2737f8ad3e945a4dcbe175380d4b2a91a260commit-bot@chromium.org 937374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org /* These commented prefetches are a big win for count 938374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org * values > 64 on an A9 (Pandaboard) but hurt by 10% for count = 4. 939374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org * They also hurt a little (<5%) on an A15 940374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org */ 941374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org //__builtin_prefetch(src+32); 942374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org //__builtin_prefetch(dst+32); 943dfff2737f8ad3e945a4dcbe175380d4b2a91a260commit-bot@chromium.org 944374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org // Load 945374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vsrc = vreinterpret_u8_u32(vld1_u32(src)); 946374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vdst = vreinterpret_u8_u32(vld1_u32(dst)); 947374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org 948374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org // Process src 949374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vsrc_wide = vmovl_u8(vsrc); 950374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vsrc_wide = vmulq_u16(vsrc_wide, vdupq_n_u16(src_scale)); 951374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org 952374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org // Process dst 953374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vdst_wide = vmull_u8(vdst, vdup_n_u8(dst_scale)); 954374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org 955374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org // Combine 956374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vres = vshrn_n_u16(vdst_wide, 8) + vshrn_n_u16(vsrc_wide, 8); 957374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org 958374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org // Store 959374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vst1_u32(dst, vreinterpret_u32_u8(vres)); 960374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org 961374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org src += 2; 962374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org dst += 2; 963374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org count -= 2; 964fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com } 965fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 966fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com if (count == 1) { 967374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org uint8x8_t vsrc = vdup_n_u8(0), vdst = vdup_n_u8(0), vres; 968374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org uint16x8_t vsrc_wide, vdst_wide; 969dfff2737f8ad3e945a4dcbe175380d4b2a91a260commit-bot@chromium.org 970374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org // Load 971374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vsrc = vreinterpret_u8_u32(vld1_lane_u32(src, vreinterpret_u32_u8(vsrc), 0)); 972374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vdst = vreinterpret_u8_u32(vld1_lane_u32(dst, vreinterpret_u32_u8(vdst), 0)); 973374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org 974374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org // Process 975374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vsrc_wide = vmovl_u8(vsrc); 976374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vsrc_wide = vmulq_u16(vsrc_wide, vdupq_n_u16(src_scale)); 977374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vdst_wide = vmull_u8(vdst, vdup_n_u8(dst_scale)); 978374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vres = vshrn_n_u16(vdst_wide, 8) + vshrn_n_u16(vsrc_wide, 8); 979374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org 980374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org // Store 981374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vst1_lane_u32(dst, vreinterpret_u32_u8(vres), 0); 982a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 983a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com} 984a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 9853a2682a77f996f649de7699c9f7bee046c6d4f17mtklein#ifdef SK_CPU_ARM32 9861fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.orgvoid S32A_Blend_BlitRow32_neon(SkPMColor* SK_RESTRICT dst, 9871fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org const SkPMColor* SK_RESTRICT src, 9881fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org int count, U8CPU alpha) { 9891fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 9901fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org SkASSERT(255 >= alpha); 9911fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 9921fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org if (count <= 0) { 9931fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org return; 9941fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org } 9951fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 9961fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org unsigned alpha256 = SkAlpha255To256(alpha); 9971fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 9981fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // First deal with odd counts 9991fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org if (count & 1) { 10001fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org uint8x8_t vsrc = vdup_n_u8(0), vdst = vdup_n_u8(0), vres; 10011fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org uint16x8_t vdst_wide, vsrc_wide; 10021fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org unsigned dst_scale; 10031fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 10041fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // Load 10051fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vsrc = vreinterpret_u8_u32(vld1_lane_u32(src, vreinterpret_u32_u8(vsrc), 0)); 10061fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vdst = vreinterpret_u8_u32(vld1_lane_u32(dst, vreinterpret_u32_u8(vdst), 0)); 10071fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 10081fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // Calc dst_scale 10091fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org dst_scale = vget_lane_u8(vsrc, 3); 10101fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org dst_scale *= alpha256; 10111fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org dst_scale >>= 8; 10121fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org dst_scale = 256 - dst_scale; 10131fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 10141fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // Process src 10151fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vsrc_wide = vmovl_u8(vsrc); 10161fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vsrc_wide = vmulq_n_u16(vsrc_wide, alpha256); 10171fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 10181fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // Process dst 10191fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vdst_wide = vmovl_u8(vdst); 10201fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vdst_wide = vmulq_n_u16(vdst_wide, dst_scale); 10211fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 10221fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // Combine 10231fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vres = vshrn_n_u16(vdst_wide, 8) + vshrn_n_u16(vsrc_wide, 8); 10241fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 10251fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vst1_lane_u32(dst, vreinterpret_u32_u8(vres), 0); 10261fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org dst++; 10271fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org src++; 10281fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org count--; 10291fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org } 10301fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 10311fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org if (count) { 10321fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org uint8x8_t alpha_mask; 10331fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org static const uint8_t alpha_mask_setup[] = {3,3,3,3,7,7,7,7}; 10341fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org alpha_mask = vld1_u8(alpha_mask_setup); 10351fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 10361fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org do { 10371fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 10381fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org uint8x8_t vsrc, vdst, vres, vsrc_alphas; 10391fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org uint16x8_t vdst_wide, vsrc_wide, vsrc_scale, vdst_scale; 10401fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 10411fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org __builtin_prefetch(src+32); 10421fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org __builtin_prefetch(dst+32); 10431fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 10441fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // Load 10451fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vsrc = vreinterpret_u8_u32(vld1_u32(src)); 10461fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vdst = vreinterpret_u8_u32(vld1_u32(dst)); 10471fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 10481fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // Prepare src_scale 10491fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vsrc_scale = vdupq_n_u16(alpha256); 10501fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 10511fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // Calc dst_scale 10521fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vsrc_alphas = vtbl1_u8(vsrc, alpha_mask); 10531fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vdst_scale = vmovl_u8(vsrc_alphas); 10541fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vdst_scale *= vsrc_scale; 10551fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vdst_scale = vshrq_n_u16(vdst_scale, 8); 10561fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vdst_scale = vsubq_u16(vdupq_n_u16(256), vdst_scale); 10571fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 10581fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // Process src 10591fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vsrc_wide = vmovl_u8(vsrc); 10601fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vsrc_wide *= vsrc_scale; 10611fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 10621fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // Process dst 10631fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vdst_wide = vmovl_u8(vdst); 10641fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vdst_wide *= vdst_scale; 10651fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 10661fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // Combine 10671fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vres = vshrn_n_u16(vdst_wide, 8) + vshrn_n_u16(vsrc_wide, 8); 10681fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 10691fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vst1_u32(dst, vreinterpret_u32_u8(vres)); 10701fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 10711fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org src += 2; 10721fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org dst += 2; 10731fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org count -= 2; 10741fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org } while(count); 10751fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org } 10761fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org} 10771fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 1078a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com/////////////////////////////////////////////////////////////////////////////// 1079a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1080fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#undef DEBUG_OPAQUE_DITHER 1081a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1082fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#if defined(DEBUG_OPAQUE_DITHER) 1083a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comstatic void showme8(char *str, void *p, int len) 1084a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com{ 1085fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com static char buf[256]; 1086fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com char tbuf[32]; 1087fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com int i; 1088fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com char *pc = (char*) p; 1089fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sprintf(buf,"%8s:", str); 1090fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com for(i=0;i<len;i++) { 1091fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sprintf(tbuf, " %02x", pc[i]); 1092fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com strcat(buf, tbuf); 1093fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com } 1094fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com SkDebugf("%s\n", buf); 1095a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com} 1096a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comstatic void showme16(char *str, void *p, int len) 1097a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com{ 1098fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com static char buf[256]; 1099fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com char tbuf[32]; 1100fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com int i; 1101fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint16_t *pc = (uint16_t*) p; 1102fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sprintf(buf,"%8s:", str); 1103fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com len = (len / sizeof(uint16_t)); /* passed as bytes */ 1104fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com for(i=0;i<len;i++) { 1105fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sprintf(tbuf, " %04x", pc[i]); 1106fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com strcat(buf, tbuf); 1107fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com } 1108fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com SkDebugf("%s\n", buf); 1109a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com} 1110a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#endif 1111ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#endif // #ifdef SK_CPU_ARM32 1112a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1113a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comvoid S32A_D565_Opaque_Dither_neon (uint16_t * SK_RESTRICT dst, 1114a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com const SkPMColor* SK_RESTRICT src, 1115a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com int count, U8CPU alpha, int x, int y) { 1116a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkASSERT(255 == alpha); 1117a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1118fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#define UNROLL 8 1119a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1120a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com if (count >= UNROLL) { 1121fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1122fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org#if defined(DEBUG_OPAQUE_DITHER) 1123fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint16_t tmpbuf[UNROLL]; 1124fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com int td[UNROLL]; 1125fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com int tdv[UNROLL]; 1126fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com int ta[UNROLL]; 1127fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com int tap[UNROLL]; 1128fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint16_t in_dst[UNROLL]; 1129fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com int offset = 0; 1130fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com int noisy = 0; 1131a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#endif 1132a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1133fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org uint8x8_t dbase; 1134fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com const uint8_t *dstart = &gDitherMatrix_Neon[(y&3)*12 + (x&3)]; 1135fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dbase = vld1_u8(dstart); 1136a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1137a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com do { 1138ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit uint8x8x4_t vsrc; 1139fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint8x8_t sr, sg, sb, sa, d; 1140fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint16x8_t dst8, scale8, alpha8; 1141fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint16x8_t dst_r, dst_g, dst_b; 1142fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1143fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org#if defined(DEBUG_OPAQUE_DITHER) 1144fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org // calculate 8 elements worth into a temp buffer 1145fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org { 1146fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org int my_y = y; 1147fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org int my_x = x; 1148fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org SkPMColor* my_src = (SkPMColor*)src; 1149fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org uint16_t* my_dst = dst; 1150fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org int i; 1151fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org 1152fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org DITHER_565_SCAN(my_y); 1153fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org for(i = 0; i < UNROLL; i++) { 1154a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkPMColor c = *my_src++; 1155a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkPMColorAssert(c); 1156a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com if (c) { 1157a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com unsigned a = SkGetPackedA32(c); 1158fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1159a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com int d = SkAlphaMul(DITHER_VALUE(my_x), SkAlpha255To256(a)); 1160fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org tdv[i] = DITHER_VALUE(my_x); 1161fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org ta[i] = a; 1162fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org tap[i] = SkAlpha255To256(a); 1163fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org td[i] = d; 1164fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1165a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com unsigned sr = SkGetPackedR32(c); 1166a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com unsigned sg = SkGetPackedG32(c); 1167a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com unsigned sb = SkGetPackedB32(c); 1168a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com sr = SkDITHER_R32_FOR_565(sr, d); 1169a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com sg = SkDITHER_G32_FOR_565(sg, d); 1170a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com sb = SkDITHER_B32_FOR_565(sb, d); 1171fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1172a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com uint32_t src_expanded = (sg << 24) | (sr << 13) | (sb << 2); 1173a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com uint32_t dst_expanded = SkExpand_rgb_16(*my_dst); 1174a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3); 1175a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // now src and dst expanded are in g:11 r:10 x:1 b:10 1176a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com tmpbuf[i] = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5); 1177fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org td[i] = d; 1178a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } else { 1179fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org tmpbuf[i] = *my_dst; 1180fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org ta[i] = tdv[i] = td[i] = 0xbeef; 1181fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org } 1182fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org in_dst[i] = *my_dst; 1183a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com my_dst += 1; 1184a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com DITHER_INC_X(my_x); 1185fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org } 1186fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org } 1187a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#endif 1188a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1189ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#ifdef SK_CPU_ARM64 1190ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc = sk_vld4_u8_arm64_4(src); 1191ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#else 1192fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com { 1193fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com register uint8x8_t d0 asm("d0"); 1194fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com register uint8x8_t d1 asm("d1"); 1195fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com register uint8x8_t d2 asm("d2"); 1196fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com register uint8x8_t d3 asm("d3"); 1197a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1198ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit asm ("vld4.8 {d0-d3},[%[src]]! " 1199fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3), [src] "+r" (src) 1200fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org : 1201fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org ); 1202ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[0] = d0; 1203ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[1] = d1; 1204ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[2] = d2; 1205ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[3] = d3; 1206fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com } 1207ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#endif 1208ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit sa = vsrc.val[NEON_A]; 1209ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit sr = vsrc.val[NEON_R]; 1210ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit sg = vsrc.val[NEON_G]; 1211ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit sb = vsrc.val[NEON_B]; 1212a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1213fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org /* calculate 'd', which will be 0..7 1214fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org * dbase[] is 0..7; alpha is 0..256; 16 bits suffice 1215fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org */ 1216fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org alpha8 = vmovl_u8(dbase); 1217fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org alpha8 = vmlal_u8(alpha8, sa, dbase); 1218fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org d = vshrn_n_u16(alpha8, 8); // narrowing too 1219fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1220fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org // sr = sr - (sr>>5) + d 1221fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* watching for 8-bit overflow. d is 0..7; risky range of 1222fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com * sr is >248; and then (sr>>5) is 7 so it offsets 'd'; 1223fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org * safe as long as we do ((sr-sr>>5) + d) 1224fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org */ 1225fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sr = vsub_u8(sr, vshr_n_u8(sr, 5)); 1226fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sr = vadd_u8(sr, d); 1227fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1228fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org // sb = sb - (sb>>5) + d 1229fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sb = vsub_u8(sb, vshr_n_u8(sb, 5)); 1230fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sb = vadd_u8(sb, d); 1231fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1232fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org // sg = sg - (sg>>6) + d>>1; similar logic for overflows 1233fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sg = vsub_u8(sg, vshr_n_u8(sg, 6)); 1234fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sg = vadd_u8(sg, vshr_n_u8(d,1)); 1235fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1236fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org // need to pick up 8 dst's -- at 16 bits each, 128 bits 1237fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst8 = vld1q_u16(dst); 1238fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org dst_b = vandq_u16(dst8, vdupq_n_u16(SK_B16_MASK)); 1239fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org dst_g = vshrq_n_u16(vshlq_n_u16(dst8, SK_R16_BITS), SK_R16_BITS + SK_B16_BITS); 1240fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org dst_r = vshrq_n_u16(dst8, SK_R16_SHIFT); // clearing hi bits 1241fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org 1242fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org // blend 1243fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com scale8 = vsubw_u8(vdupq_n_u16(256), sa); 1244a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1245fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org // combine the addq and mul, save 3 insns 1246fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com scale8 = vshrq_n_u16(scale8, 3); 1247fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst_b = vmlaq_u16(vshll_n_u8(sb,2), dst_b, scale8); 1248fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst_g = vmlaq_u16(vshll_n_u8(sg,3), dst_g, scale8); 1249fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst_r = vmlaq_u16(vshll_n_u8(sr,2), dst_r, scale8); 1250a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1251fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org // repack to store 1252fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org dst8 = vshrq_n_u16(dst_b, 5); 1253fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst8 = vsliq_n_u16(dst8, vshrq_n_u16(dst_g, 5), 5); 1254fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst8 = vsliq_n_u16(dst8, vshrq_n_u16(dst_r,5), 11); 1255fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1256fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com vst1q_u16(dst, dst8); 1257fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1258fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org#if defined(DEBUG_OPAQUE_DITHER) 1259fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org // verify my 8 elements match the temp buffer 1260fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org { 1261fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org int i, bad=0; 1262fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org static int invocation; 1263a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1264fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org for (i = 0; i < UNROLL; i++) { 1265fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org if (tmpbuf[i] != dst[i]) { 1266fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org bad=1; 1267fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org } 1268fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org } 1269fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org if (bad) { 1270fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org SkDebugf("BAD S32A_D565_Opaque_Dither_neon(); invocation %d offset %d\n", 1271fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org invocation, offset); 1272fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org SkDebugf(" alpha 0x%x\n", alpha); 1273fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org for (i = 0; i < UNROLL; i++) 1274fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org SkDebugf("%2d: %s %04x w %04x id %04x s %08x d %04x %04x %04x %04x\n", 1275fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org i, ((tmpbuf[i] != dst[i])?"BAD":"got"), dst[i], tmpbuf[i], 1276fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org in_dst[i], src[i-8], td[i], tdv[i], tap[i], ta[i]); 1277fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org 1278fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org showme16("alpha8", &alpha8, sizeof(alpha8)); 1279fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org showme16("scale8", &scale8, sizeof(scale8)); 1280fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org showme8("d", &d, sizeof(d)); 1281fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org showme16("dst8", &dst8, sizeof(dst8)); 1282fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org showme16("dst_b", &dst_b, sizeof(dst_b)); 1283fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org showme16("dst_g", &dst_g, sizeof(dst_g)); 1284fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org showme16("dst_r", &dst_r, sizeof(dst_r)); 1285fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org showme8("sb", &sb, sizeof(sb)); 1286fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org showme8("sg", &sg, sizeof(sg)); 1287fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org showme8("sr", &sr, sizeof(sr)); 1288fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org 1289fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org return; 1290fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org } 1291fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org offset += UNROLL; 1292fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org invocation++; 1293fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org } 1294fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org#endif 1295fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org dst += UNROLL; 1296fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com count -= UNROLL; 1297fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org // skip x += UNROLL, since it's unchanged mod-4 1298a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } while (count >= UNROLL); 1299a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 1300fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#undef UNROLL 1301a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1302fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org // residuals 1303a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com if (count > 0) { 1304a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com DITHER_565_SCAN(y); 1305a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com do { 1306a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkPMColor c = *src++; 1307a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkPMColorAssert(c); 1308a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com if (c) { 1309a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com unsigned a = SkGetPackedA32(c); 1310fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1311a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // dither and alpha are just temporary variables to work-around 1312a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // an ICE in debug. 1313a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com unsigned dither = DITHER_VALUE(x); 1314a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com unsigned alpha = SkAlpha255To256(a); 1315a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com int d = SkAlphaMul(dither, alpha); 1316fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1317a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com unsigned sr = SkGetPackedR32(c); 1318a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com unsigned sg = SkGetPackedG32(c); 1319a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com unsigned sb = SkGetPackedB32(c); 1320a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com sr = SkDITHER_R32_FOR_565(sr, d); 1321a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com sg = SkDITHER_G32_FOR_565(sg, d); 1322a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com sb = SkDITHER_B32_FOR_565(sb, d); 1323fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1324a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com uint32_t src_expanded = (sg << 24) | (sr << 13) | (sb << 2); 1325a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com uint32_t dst_expanded = SkExpand_rgb_16(*dst); 1326a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3); 1327a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // now src and dst expanded are in g:11 r:10 x:1 b:10 1328a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5); 1329a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 1330a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com dst += 1; 1331a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com DITHER_INC_X(x); 1332a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } while (--count != 0); 1333a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 1334a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com} 1335a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1336a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com/////////////////////////////////////////////////////////////////////////////// 1337a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1338fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#undef DEBUG_S32_OPAQUE_DITHER 1339a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1340a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comvoid S32_D565_Opaque_Dither_neon(uint16_t* SK_RESTRICT dst, 1341a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com const SkPMColor* SK_RESTRICT src, 1342a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com int count, U8CPU alpha, int x, int y) { 1343a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkASSERT(255 == alpha); 1344a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1345fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#define UNROLL 8 1346a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com if (count >= UNROLL) { 1347fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint8x8_t d; 1348fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com const uint8_t *dstart = &gDitherMatrix_Neon[(y&3)*12 + (x&3)]; 1349fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com d = vld1_u8(dstart); 1350fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1351fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com while (count >= UNROLL) { 1352efbe8e9bedda21a3e061ebf3d96431a0f250a654djsollen@google.com uint8x8_t sr, sg, sb; 1353efbe8e9bedda21a3e061ebf3d96431a0f250a654djsollen@google.com uint16x8_t dr, dg, db; 1354fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint16x8_t dst8; 1355ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit uint8x8x4_t vsrc; 1356fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1357ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#ifdef SK_CPU_ARM64 1358ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc = sk_vld4_u8_arm64_3(src); 1359ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#else 1360fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com { 1361fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com register uint8x8_t d0 asm("d0"); 1362fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com register uint8x8_t d1 asm("d1"); 1363fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com register uint8x8_t d2 asm("d2"); 1364fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com register uint8x8_t d3 asm("d3"); 1365fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1366688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org asm ( 1367ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit "vld4.8 {d0-d3},[%[src]]! " 1368688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3), [src] "+&r" (src) 1369688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org : 1370688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org ); 1371ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[0] = d0; 1372ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[1] = d1; 1373ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[2] = d2; 1374fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com } 1375ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#endif 1376ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit sr = vsrc.val[NEON_R]; 1377ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit sg = vsrc.val[NEON_G]; 1378ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit sb = vsrc.val[NEON_B]; 1379ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit 1380fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* XXX: if we want to prefetch, hide it in the above asm() 1381fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com * using the gcc __builtin_prefetch(), the prefetch will 1382fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com * fall to the bottom of the loop -- it won't stick up 1383fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com * at the top of the loop, just after the vld4. 1384fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com */ 1385fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1386688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org // sr = sr - (sr>>5) + d 1387fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sr = vsub_u8(sr, vshr_n_u8(sr, 5)); 1388fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dr = vaddl_u8(sr, d); 1389fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1390688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org // sb = sb - (sb>>5) + d 1391fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sb = vsub_u8(sb, vshr_n_u8(sb, 5)); 1392fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com db = vaddl_u8(sb, d); 1393fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1394688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org // sg = sg - (sg>>6) + d>>1; similar logic for overflows 1395fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sg = vsub_u8(sg, vshr_n_u8(sg, 6)); 1396688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org dg = vaddl_u8(sg, vshr_n_u8(d, 1)); 1397fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1398688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org // pack high bits of each into 565 format (rgb, b is lsb) 1399fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst8 = vshrq_n_u16(db, 3); 1400fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst8 = vsliq_n_u16(dst8, vshrq_n_u16(dg, 2), 5); 1401688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org dst8 = vsliq_n_u16(dst8, vshrq_n_u16(dr, 3), 11); 1402fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1403688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org // store it 1404fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com vst1q_u16(dst, dst8); 1405fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1406fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#if defined(DEBUG_S32_OPAQUE_DITHER) 1407688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org // always good to know if we generated good results 1408fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com { 1409fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com int i, myx = x, myy = y; 1410fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com DITHER_565_SCAN(myy); 1411fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com for (i=0;i<UNROLL;i++) { 1412688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org // the '!' in the asm block above post-incremented src by the 8 pixels it reads. 1413688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org SkPMColor c = src[i-8]; 1414fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com unsigned dither = DITHER_VALUE(myx); 1415fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint16_t val = SkDitherRGB32To565(c, dither); 1416fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com if (val != dst[i]) { 1417fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com SkDebugf("RBE: src %08x dither %02x, want %04x got %04x dbas[i] %02x\n", 1418fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com c, dither, val, dst[i], dstart[i]); 1419fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com } 1420fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com DITHER_INC_X(myx); 1421fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com } 1422fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com } 1423a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#endif 1424a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1425fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst += UNROLL; 1426688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org // we don't need to increment src as the asm above has already done it 1427fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com count -= UNROLL; 1428688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org x += UNROLL; // probably superfluous 1429fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com } 1430a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 1431fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#undef UNROLL 1432a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1433688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org // residuals 1434a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com if (count > 0) { 1435a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com DITHER_565_SCAN(y); 1436a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com do { 1437a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkPMColor c = *src++; 1438a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkPMColorAssert(c); 1439a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkASSERT(SkGetPackedA32(c) == 255); 1440a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1441a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com unsigned dither = DITHER_VALUE(x); 1442a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com *dst++ = SkDitherRGB32To565(c, dither); 1443a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com DITHER_INC_X(x); 1444a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } while (--count != 0); 1445a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 1446a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com} 1447a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1448a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comvoid Color32_arm_neon(SkPMColor* dst, const SkPMColor* src, int count, 1449a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkPMColor color) { 1450a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com if (count <= 0) { 1451a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com return; 1452a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 1453a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1454a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com if (0 == color) { 1455a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com if (src != dst) { 1456a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com memcpy(dst, src, count * sizeof(SkPMColor)); 1457a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 1458a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com return; 1459a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 1460a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1461a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com unsigned colorA = SkGetPackedA32(color); 1462a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com if (255 == colorA) { 1463a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com sk_memset32(dst, color, count); 14645376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org return; 14655376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org } 1466a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 14675376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org unsigned scale = 256 - SkAlpha255To256(colorA); 1468a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 14695376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org if (count >= 8) { 14705376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org uint32x4_t vcolor; 14715376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org uint8x8_t vscale; 14725376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org 14735376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vcolor = vdupq_n_u32(color); 14745376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org 14755376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org // scale numerical interval [0-255], so load as 8 bits 14765376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vscale = vdup_n_u8(scale); 14775376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org 14785376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org do { 14795376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org // load src color, 8 pixels, 4 64 bit registers 14805376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org // (and increment src). 14815376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org uint32x2x4_t vsrc; 14823a2682a77f996f649de7699c9f7bee046c6d4f17mtklein#if defined(SK_CPU_ARM32) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6))) 14835376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org asm ( 14845376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org "vld1.32 %h[vsrc], [%[src]]!" 14855376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org : [vsrc] "=w" (vsrc), [src] "+r" (src) 14865376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org : : 14875376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org ); 1488866b95d65dfc01af372bbed206ec067e04c1f533kevin.petit#else // 64bit targets and Clang 14895376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vsrc.val[0] = vld1_u32(src); 14905376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vsrc.val[1] = vld1_u32(src+2); 14915376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vsrc.val[2] = vld1_u32(src+4); 14925376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vsrc.val[3] = vld1_u32(src+6); 14935376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org src += 8; 14945376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org#endif 14955376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org 14965376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org // multiply long by scale, 64 bits at a time, 14975376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org // destination into a 128 bit register. 14985376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org uint16x8x4_t vtmp; 14995376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vtmp.val[0] = vmull_u8(vreinterpret_u8_u32(vsrc.val[0]), vscale); 15005376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vtmp.val[1] = vmull_u8(vreinterpret_u8_u32(vsrc.val[1]), vscale); 15015376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vtmp.val[2] = vmull_u8(vreinterpret_u8_u32(vsrc.val[2]), vscale); 15025376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vtmp.val[3] = vmull_u8(vreinterpret_u8_u32(vsrc.val[3]), vscale); 15035376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org 15045376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org // shift the 128 bit registers, containing the 16 15055376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org // bit scaled values back to 8 bits, narrowing the 15065376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org // results to 64 bit registers. 15075376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org uint8x16x2_t vres; 15085376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vres.val[0] = vcombine_u8( 15095376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vshrn_n_u16(vtmp.val[0], 8), 15105376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vshrn_n_u16(vtmp.val[1], 8)); 15115376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vres.val[1] = vcombine_u8( 15125376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vshrn_n_u16(vtmp.val[2], 8), 15135376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vshrn_n_u16(vtmp.val[3], 8)); 15145376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org 15155376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org // adding back the color, using 128 bit registers. 15165376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org uint32x4x2_t vdst; 15175376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vdst.val[0] = vreinterpretq_u32_u8(vres.val[0] + 15185376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vreinterpretq_u8_u32(vcolor)); 15195376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vdst.val[1] = vreinterpretq_u32_u8(vres.val[1] + 15205376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vreinterpretq_u8_u32(vcolor)); 15215376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org 15225376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org // store back the 8 calculated pixels (2 128 bit 15235376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org // registers), and increment dst. 15243a2682a77f996f649de7699c9f7bee046c6d4f17mtklein#if defined(SK_CPU_ARM32) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6))) 15255376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org asm ( 15265376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org "vst1.32 %h[vdst], [%[dst]]!" 15275376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org : [dst] "+r" (dst) 15285376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org : [vdst] "w" (vdst) 15295376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org : "memory" 15305376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org ); 1531866b95d65dfc01af372bbed206ec067e04c1f533kevin.petit#else // 64bit targets and Clang 15325376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vst1q_u32(dst, vdst.val[0]); 15335376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vst1q_u32(dst+4, vdst.val[1]); 15345376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org dst += 8; 15355376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org#endif 15365376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org count -= 8; 15375376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org 15385376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org } while (count >= 8); 15395376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org } 15405376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org 15415376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org while (count > 0) { 15425376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org *dst = color + SkAlphaMulQ(*src, scale); 15435376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org src += 1; 15445376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org dst += 1; 15455376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org count--; 1546a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 1547a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com} 1548a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1549a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com/////////////////////////////////////////////////////////////////////////////// 1550a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1551a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comconst SkBlitRow::Proc sk_blitrow_platform_565_procs_arm_neon[] = { 1552a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // no dither 15530060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org S32_D565_Opaque_neon, 155495c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org S32_D565_Blend_neon, 1555ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#ifdef SK_CPU_ARM32 1556a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com S32A_D565_Opaque_neon, 1557ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#else 1558ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit NULL, 1559ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#endif 1560a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com S32A_D565_Blend_neon, 1561a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1562a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // dither 1563a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com S32_D565_Opaque_Dither_neon, 1564a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com S32_D565_Blend_Dither_neon, 1565a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com S32A_D565_Opaque_Dither_neon, 1566a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com NULL, // S32A_D565_Blend_Dither 1567a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com}; 1568a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1569a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comconst SkBlitRow::Proc32 sk_blitrow_platform_32_procs_arm_neon[] = { 1570a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com NULL, // S32_Opaque, 1571fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com S32_Blend_BlitRow32_neon, // S32_Blend, 1572c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* 1573c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com * We have two choices for S32A_Opaque procs. The one reads the src alpha 1574c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com * value and attempts to optimize accordingly. The optimization is 1575c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com * sensitive to the source content and is not a win in all cases. For 1576c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com * example, if there are a lot of transitions between the alpha states, 1577c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com * the performance will almost certainly be worse. However, for many 1578c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com * common cases the performance is equivalent or better than the standard 1579c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com * case where we do not inspect the src alpha. 1580c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com */ 1581c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com#if SK_A32_SHIFT == 24 1582c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com // This proc assumes the alpha value occupies bits 24-32 of each SkPMColor 1583c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com S32A_Opaque_BlitRow32_neon_src_alpha, // S32A_Opaque, 1584c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com#else 1585c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com S32A_Opaque_BlitRow32_neon, // S32A_Opaque, 1586c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com#endif 15873a2682a77f996f649de7699c9f7bee046c6d4f17mtklein#ifdef SK_CPU_ARM32 15881fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org S32A_Blend_BlitRow32_neon // S32A_Blend 1589866b95d65dfc01af372bbed206ec067e04c1f533kevin.petit#else 1590866b95d65dfc01af372bbed206ec067e04c1f533kevin.petit NULL 1591866b95d65dfc01af372bbed206ec067e04c1f533kevin.petit#endif 1592a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com}; 1593