1a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com/* 2a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com * Copyright 2012 The Android Open Source Project 3a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com * 4a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com * Use of this source code is governed by a BSD-style license that can be 5a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com * found in the LICENSE file. 6a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com */ 7a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 8a111e492b84c312a6bd5d5d9ef100dca48f4941ddjsollen@google.com#include "SkBlitRow_opts_arm_neon.h" 9a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 10a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#include "SkBlitMask.h" 11a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#include "SkBlitRow.h" 12a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#include "SkColorPriv.h" 13a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#include "SkDither.h" 14a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#include "SkMathPriv.h" 15a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#include "SkUtils.h" 16a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 170060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org#include "SkColor_opts_neon.h" 18a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#include <arm_neon.h> 19a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 20ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#ifdef SK_CPU_ARM64 21ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petitstatic inline uint8x8x4_t sk_vld4_u8_arm64_3(const SkPMColor* SK_RESTRICT & src) { 22ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit uint8x8x4_t vsrc; 23ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit uint8x8_t vsrc_0, vsrc_1, vsrc_2; 24ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit 25ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit asm ( 26ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit "ld4 {v0.8b - v3.8b}, [%[src]], #32 \t\n" 27ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit "mov %[vsrc0].8b, v0.8b \t\n" 28ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit "mov %[vsrc1].8b, v1.8b \t\n" 29ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit "mov %[vsrc2].8b, v2.8b \t\n" 30ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit : [vsrc0] "=w" (vsrc_0), [vsrc1] "=w" (vsrc_1), 31ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit [vsrc2] "=w" (vsrc_2), [src] "+&r" (src) 32ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit : : "v0", "v1", "v2", "v3" 33ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit ); 34ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit 35ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[0] = vsrc_0; 36ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[1] = vsrc_1; 37ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[2] = vsrc_2; 38ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit 39ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit return vsrc; 40ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit} 41ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit 42ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petitstatic inline uint8x8x4_t sk_vld4_u8_arm64_4(const SkPMColor* SK_RESTRICT & src) { 43ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit uint8x8x4_t vsrc; 44ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit uint8x8_t vsrc_0, vsrc_1, vsrc_2, vsrc_3; 45ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit 46ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit asm ( 47ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit "ld4 {v0.8b - v3.8b}, [%[src]], #32 \t\n" 48ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit "mov %[vsrc0].8b, v0.8b \t\n" 49ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit "mov %[vsrc1].8b, v1.8b \t\n" 50ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit "mov %[vsrc2].8b, v2.8b \t\n" 51ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit "mov %[vsrc3].8b, v3.8b \t\n" 52ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit : [vsrc0] "=w" (vsrc_0), [vsrc1] "=w" (vsrc_1), 53ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit [vsrc2] "=w" (vsrc_2), [vsrc3] "=w" (vsrc_3), 54ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit [src] "+&r" (src) 55ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit : : "v0", "v1", "v2", "v3" 56ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit ); 57ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit 58ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[0] = vsrc_0; 59ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[1] = vsrc_1; 60ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[2] = vsrc_2; 61ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[3] = vsrc_3; 62ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit 63ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit return vsrc; 64ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit} 65ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#endif 66ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit 670060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.orgvoid S32_D565_Opaque_neon(uint16_t* SK_RESTRICT dst, 680060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org const SkPMColor* SK_RESTRICT src, int count, 690060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org U8CPU alpha, int /*x*/, int /*y*/) { 700060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org SkASSERT(255 == alpha); 710060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org 720060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org while (count >= 8) { 730060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org uint8x8x4_t vsrc; 740060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org uint16x8_t vdst; 750060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org 760060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org // Load 77ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#ifdef SK_CPU_ARM64 78ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc = sk_vld4_u8_arm64_3(src); 79ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#else 800060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org vsrc = vld4_u8((uint8_t*)src); 81ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit src += 8; 82ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#endif 830060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org 840060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org // Convert src to 565 85bc25dfc798fff225ce65355ecda19d2b85bd0e74commit-bot@chromium.org vdst = SkPixel32ToPixel16_neon8(vsrc); 860060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org 870060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org // Store 880060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org vst1q_u16(dst, vdst); 890060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org 900060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org // Prepare next iteration 910060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org dst += 8; 920060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org count -= 8; 930060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org }; 940060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org 950060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org // Leftovers 960060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org while (count > 0) { 970060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org SkPMColor c = *src++; 980060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org SkPMColorAssert(c); 990060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org *dst = SkPixel32ToPixel16_ToU16(c); 1000060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org dst++; 1010060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org count--; 1020060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org }; 1030060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org} 1040060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org 10595c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.orgvoid S32_D565_Blend_neon(uint16_t* SK_RESTRICT dst, 10695c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org const SkPMColor* SK_RESTRICT src, int count, 10795c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org U8CPU alpha, int /*x*/, int /*y*/) { 10895c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org SkASSERT(255 > alpha); 10995c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 11095c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org uint16x8_t vmask_blue, vscale; 11195c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 11295c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org // prepare constants 11395c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vscale = vdupq_n_u16(SkAlpha255To256(alpha)); 11495c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vmask_blue = vmovq_n_u16(0x1F); 11595c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 11695c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org while (count >= 8) { 117ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit uint8x8x4_t vsrc; 11895c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org uint16x8_t vdst, vdst_r, vdst_g, vdst_b; 11995c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org uint16x8_t vres_r, vres_g, vres_b; 12095c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 12195c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org // Load src 122ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#ifdef SK_CPU_ARM64 123ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc = sk_vld4_u8_arm64_3(src); 124ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#else 12595c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org { 12695c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org register uint8x8_t d0 asm("d0"); 12795c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org register uint8x8_t d1 asm("d1"); 12895c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org register uint8x8_t d2 asm("d2"); 12995c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org register uint8x8_t d3 asm("d3"); 13095c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 13195c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org asm ( 13295c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org "vld4.8 {d0-d3},[%[src]]!" 13395c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3), [src] "+&r" (src) 13495c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org : 13595c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org ); 136ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[0] = d0; 137ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[1] = d1; 138ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[2] = d2; 13995c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org } 140ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#endif 14195c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 14295c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org // Load and unpack dst 14395c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vdst = vld1q_u16(dst); 14495c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vdst_g = vshlq_n_u16(vdst, 5); // shift green to top of lanes 14595c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vdst_b = vandq_u16(vdst, vmask_blue); // extract blue 14695c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vdst_r = vshrq_n_u16(vdst, 6+5); // extract red 14795c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vdst_g = vshrq_n_u16(vdst_g, 5+5); // extract green 14895c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 149ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit // Shift src to 565 range 150ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[NEON_R] = vshr_n_u8(vsrc.val[NEON_R], 3); 151ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[NEON_G] = vshr_n_u8(vsrc.val[NEON_G], 2); 152ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[NEON_B] = vshr_n_u8(vsrc.val[NEON_B], 3); 15395c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 15495c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org // Scale src - dst 155ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vres_r = vmovl_u8(vsrc.val[NEON_R]) - vdst_r; 156ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vres_g = vmovl_u8(vsrc.val[NEON_G]) - vdst_g; 157ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vres_b = vmovl_u8(vsrc.val[NEON_B]) - vdst_b; 15895c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 15995c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vres_r = vshrq_n_u16(vres_r * vscale, 8); 16095c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vres_g = vshrq_n_u16(vres_g * vscale, 8); 16195c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vres_b = vshrq_n_u16(vres_b * vscale, 8); 16295c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 16395c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vres_r += vdst_r; 16495c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vres_g += vdst_g; 16595c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vres_b += vdst_b; 16695c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 16795c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org // Combine 16895c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vres_b = vsliq_n_u16(vres_b, vres_g, 5); // insert green into blue 16995c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vres_b = vsliq_n_u16(vres_b, vres_r, 6+5); // insert red into green/blue 17095c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 17195c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org // Store 17295c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vst1q_u16(dst, vres_b); 17395c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org dst += 8; 17495c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org count -= 8; 17595c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org } 17695c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org if (count > 0) { 17795c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org int scale = SkAlpha255To256(alpha); 17895c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org do { 17995c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org SkPMColor c = *src++; 18095c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org SkPMColorAssert(c); 18195c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org uint16_t d = *dst; 18295c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org *dst++ = SkPackRGB16( 18395c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org SkAlphaBlend(SkPacked32ToR16(c), SkGetPackedR16(d), scale), 18495c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org SkAlphaBlend(SkPacked32ToG16(c), SkGetPackedG16(d), scale), 18595c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org SkAlphaBlend(SkPacked32ToB16(c), SkGetPackedB16(d), scale)); 18695c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org } while (--count != 0); 18795c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org } 18895c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org} 18995c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 190ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#ifdef SK_CPU_ARM32 191a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comvoid S32A_D565_Opaque_neon(uint16_t* SK_RESTRICT dst, 192a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com const SkPMColor* SK_RESTRICT src, int count, 193a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com U8CPU alpha, int /*x*/, int /*y*/) { 194a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkASSERT(255 == alpha); 195a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 196a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com if (count >= 8) { 197efbe8e9bedda21a3e061ebf3d96431a0f250a654djsollen@google.com uint16_t* SK_RESTRICT keep_dst = 0; 198fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 199a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com asm volatile ( 200a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "ands ip, %[count], #7 \n\t" 201a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmov.u8 d31, #1<<7 \n\t" 202a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vld1.16 {q12}, [%[dst]] \n\t" 203a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vld4.8 {d0-d3}, [%[src]] \n\t" 204a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // Thumb does not support the standard ARM conditional 205a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // instructions but instead requires the 'it' instruction 206a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // to signal conditional execution 207a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "it eq \n\t" 208a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "moveq ip, #8 \n\t" 209a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "mov %[keep_dst], %[dst] \n\t" 210fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 211a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "add %[src], %[src], ip, LSL#2 \n\t" 212a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "add %[dst], %[dst], ip, LSL#1 \n\t" 213a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "subs %[count], %[count], ip \n\t" 214a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "b 9f \n\t" 215a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // LOOP 216a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "2: \n\t" 217fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 218a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vld1.16 {q12}, [%[dst]]! \n\t" 219a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vld4.8 {d0-d3}, [%[src]]! \n\t" 220a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vst1.16 {q10}, [%[keep_dst]] \n\t" 221a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "sub %[keep_dst], %[dst], #8*2 \n\t" 222a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "subs %[count], %[count], #8 \n\t" 223a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "9: \n\t" 224a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "pld [%[dst],#32] \n\t" 225a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // expand 0565 q12 to 8888 {d4-d7} 226a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmovn.u16 d4, q12 \n\t" 227a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshr.u16 q11, q12, #5 \n\t" 228a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshr.u16 q10, q12, #6+5 \n\t" 229a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmovn.u16 d5, q11 \n\t" 230a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmovn.u16 d6, q10 \n\t" 231a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshl.u8 d4, d4, #3 \n\t" 232a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshl.u8 d5, d5, #2 \n\t" 233a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshl.u8 d6, d6, #3 \n\t" 234fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 235a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmovl.u8 q14, d31 \n\t" 236a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmovl.u8 q13, d31 \n\t" 237a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmovl.u8 q12, d31 \n\t" 238fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 239a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // duplicate in 4/2/1 & 8pix vsns 240a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmvn.8 d30, d3 \n\t" 241a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmlal.u8 q14, d30, d6 \n\t" 242a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmlal.u8 q13, d30, d5 \n\t" 243a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmlal.u8 q12, d30, d4 \n\t" 244a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshr.u16 q8, q14, #5 \n\t" 245a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshr.u16 q9, q13, #6 \n\t" 246a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vaddhn.u16 d6, q14, q8 \n\t" 247a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshr.u16 q8, q12, #5 \n\t" 248a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vaddhn.u16 d5, q13, q9 \n\t" 249a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vqadd.u8 d6, d6, d0 \n\t" // moved up 250a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vaddhn.u16 d4, q12, q8 \n\t" 251a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // intentionally don't calculate alpha 252a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // result in d4-d6 253fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 254a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vqadd.u8 d5, d5, d1 \n\t" 255a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vqadd.u8 d4, d4, d2 \n\t" 256fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 257a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // pack 8888 {d4-d6} to 0565 q10 258a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshll.u8 q10, d6, #8 \n\t" 259a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshll.u8 q3, d5, #8 \n\t" 260a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshll.u8 q2, d4, #8 \n\t" 261a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vsri.u16 q10, q3, #5 \n\t" 262a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vsri.u16 q10, q2, #11 \n\t" 263fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 264a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "bne 2b \n\t" 265fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 266a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "1: \n\t" 267a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vst1.16 {q10}, [%[keep_dst]] \n\t" 268a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com : [count] "+r" (count) 269fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com : [dst] "r" (dst), [keep_dst] "r" (keep_dst), [src] "r" (src) 270a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com : "ip", "cc", "memory", "d0","d1","d2","d3","d4","d5","d6","d7", 271a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "d16","d17","d18","d19","d20","d21","d22","d23","d24","d25","d26","d27","d28","d29", 272a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "d30","d31" 273a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com ); 274a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 275fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com else 276a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com { // handle count < 8 277efbe8e9bedda21a3e061ebf3d96431a0f250a654djsollen@google.com uint16_t* SK_RESTRICT keep_dst = 0; 278fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 279a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com asm volatile ( 280a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmov.u8 d31, #1<<7 \n\t" 281a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "mov %[keep_dst], %[dst] \n\t" 282fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 283a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "tst %[count], #4 \n\t" 284a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "beq 14f \n\t" 285a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vld1.16 {d25}, [%[dst]]! \n\t" 286a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vld1.32 {q1}, [%[src]]! \n\t" 287fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 288a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "14: \n\t" 289a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "tst %[count], #2 \n\t" 290a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "beq 12f \n\t" 291a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vld1.32 {d24[1]}, [%[dst]]! \n\t" 292a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vld1.32 {d1}, [%[src]]! \n\t" 293fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 294a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "12: \n\t" 295a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "tst %[count], #1 \n\t" 296a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "beq 11f \n\t" 297a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vld1.16 {d24[1]}, [%[dst]]! \n\t" 298a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vld1.32 {d0[1]}, [%[src]]! \n\t" 299fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 300a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "11: \n\t" 301a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // unzips achieve the same as a vld4 operation 302ea13afff6e46d8a969611cdd56c996bfb05a27c1thakis "vuzp.u16 q0, q1 \n\t" 303a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vuzp.u8 d0, d1 \n\t" 304a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vuzp.u8 d2, d3 \n\t" 305a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // expand 0565 q12 to 8888 {d4-d7} 306a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmovn.u16 d4, q12 \n\t" 307a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshr.u16 q11, q12, #5 \n\t" 308a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshr.u16 q10, q12, #6+5 \n\t" 309a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmovn.u16 d5, q11 \n\t" 310a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmovn.u16 d6, q10 \n\t" 311a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshl.u8 d4, d4, #3 \n\t" 312a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshl.u8 d5, d5, #2 \n\t" 313a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshl.u8 d6, d6, #3 \n\t" 314fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 315a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmovl.u8 q14, d31 \n\t" 316a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmovl.u8 q13, d31 \n\t" 317a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmovl.u8 q12, d31 \n\t" 318fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 319a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // duplicate in 4/2/1 & 8pix vsns 320a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmvn.8 d30, d3 \n\t" 321a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmlal.u8 q14, d30, d6 \n\t" 322a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmlal.u8 q13, d30, d5 \n\t" 323a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmlal.u8 q12, d30, d4 \n\t" 324a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshr.u16 q8, q14, #5 \n\t" 325a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshr.u16 q9, q13, #6 \n\t" 326a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vaddhn.u16 d6, q14, q8 \n\t" 327a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshr.u16 q8, q12, #5 \n\t" 328a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vaddhn.u16 d5, q13, q9 \n\t" 329a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vqadd.u8 d6, d6, d0 \n\t" // moved up 330a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vaddhn.u16 d4, q12, q8 \n\t" 331a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // intentionally don't calculate alpha 332a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // result in d4-d6 333fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 334a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vqadd.u8 d5, d5, d1 \n\t" 335a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vqadd.u8 d4, d4, d2 \n\t" 336fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 337a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // pack 8888 {d4-d6} to 0565 q10 338a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshll.u8 q10, d6, #8 \n\t" 339a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshll.u8 q3, d5, #8 \n\t" 340a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshll.u8 q2, d4, #8 \n\t" 341a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vsri.u16 q10, q3, #5 \n\t" 342a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vsri.u16 q10, q2, #11 \n\t" 343fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 344a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // store 345a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "tst %[count], #4 \n\t" 346a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "beq 24f \n\t" 347a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vst1.16 {d21}, [%[keep_dst]]! \n\t" 348fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 349a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "24: \n\t" 350a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "tst %[count], #2 \n\t" 351a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "beq 22f \n\t" 352a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vst1.32 {d20[1]}, [%[keep_dst]]! \n\t" 353fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 354a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "22: \n\t" 355a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "tst %[count], #1 \n\t" 356a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "beq 21f \n\t" 357a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vst1.16 {d20[1]}, [%[keep_dst]]! \n\t" 358fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 359a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "21: \n\t" 360a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com : [count] "+r" (count) 361a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com : [dst] "r" (dst), [keep_dst] "r" (keep_dst), [src] "r" (src) 362a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com : "ip", "cc", "memory", "d0","d1","d2","d3","d4","d5","d6","d7", 363a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "d16","d17","d18","d19","d20","d21","d22","d23","d24","d25","d26","d27","d28","d29", 364a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "d30","d31" 365a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com ); 366a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 367a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com} 3680d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit 3690d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit#else // #ifdef SK_CPU_ARM32 3700d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit 3710d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petitvoid S32A_D565_Opaque_neon(uint16_t* SK_RESTRICT dst, 3720d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit const SkPMColor* SK_RESTRICT src, int count, 3730d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit U8CPU alpha, int /*x*/, int /*y*/) { 3740d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit SkASSERT(255 == alpha); 3750d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit 3760d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit if (count >= 16) { 3770d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit asm ( 3780d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "movi v4.8h, #0x80 \t\n" 3790d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit 3800d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "1: \t\n" 3810d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "sub %[count], %[count], #16 \t\n" 3820d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "ld1 {v16.8h-v17.8h}, [%[dst]] \t\n" 3830d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "ld4 {v0.16b-v3.16b}, [%[src]], #64 \t\n" 3840d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "prfm pldl1keep, [%[src],#512] \t\n" 3850d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "prfm pldl1keep, [%[dst],#256] \t\n" 3860d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "ushr v20.8h, v17.8h, #5 \t\n" 3870d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "ushr v31.8h, v16.8h, #5 \t\n" 3880d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "xtn v6.8b, v31.8h \t\n" 3890d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "xtn2 v6.16b, v20.8h \t\n" 3900d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "ushr v20.8h, v17.8h, #11 \t\n" 3910d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "shl v19.16b, v6.16b, #2 \t\n" 3920d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "ushr v31.8h, v16.8h, #11 \t\n" 3930d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "xtn v22.8b, v31.8h \t\n" 3940d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "xtn2 v22.16b, v20.8h \t\n" 3950d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "shl v18.16b, v22.16b, #3 \t\n" 3960d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "mvn v3.16b, v3.16b \t\n" 3970d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "xtn v16.8b, v16.8h \t\n" 3980d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "mov v7.16b, v4.16b \t\n" 3990d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "xtn2 v16.16b, v17.8h \t\n" 4000d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "umlal v7.8h, v3.8b, v19.8b \t\n" 4010d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "shl v16.16b, v16.16b, #3 \t\n" 4020d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "mov v22.16b, v4.16b \t\n" 4030d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "ushr v24.8h, v7.8h, #6 \t\n" 4040d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "umlal v22.8h, v3.8b, v18.8b \t\n" 4050d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "ushr v20.8h, v22.8h, #5 \t\n" 4060d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "addhn v20.8b, v22.8h, v20.8h \t\n" 4070d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "cmp %[count], #16 \t\n" 4080d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "mov v6.16b, v4.16b \t\n" 4090d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "mov v5.16b, v4.16b \t\n" 4100d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "umlal v6.8h, v3.8b, v16.8b \t\n" 4110d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "umlal2 v5.8h, v3.16b, v19.16b \t\n" 4120d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "mov v17.16b, v4.16b \t\n" 4130d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "ushr v19.8h, v6.8h, #5 \t\n" 4140d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "umlal2 v17.8h, v3.16b, v18.16b \t\n" 4150d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "addhn v7.8b, v7.8h, v24.8h \t\n" 4160d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "ushr v18.8h, v5.8h, #6 \t\n" 4170d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "ushr v21.8h, v17.8h, #5 \t\n" 4180d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "addhn2 v7.16b, v5.8h, v18.8h \t\n" 4190d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "addhn2 v20.16b, v17.8h, v21.8h \t\n" 4200d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "mov v22.16b, v4.16b \t\n" 4210d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "addhn v6.8b, v6.8h, v19.8h \t\n" 4220d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "umlal2 v22.8h, v3.16b, v16.16b \t\n" 4230d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "ushr v5.8h, v22.8h, #5 \t\n" 4240d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "addhn2 v6.16b, v22.8h, v5.8h \t\n" 4250d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "uqadd v7.16b, v1.16b, v7.16b \t\n" 4260be677d35c7ef1bf8a7a694d1838fa11333d1beckevin.petit#if SK_PMCOLOR_BYTE_ORDER(B,G,R,A) 4270d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "uqadd v20.16b, v2.16b, v20.16b \t\n" 4280d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "uqadd v6.16b, v0.16b, v6.16b \t\n" 4290be677d35c7ef1bf8a7a694d1838fa11333d1beckevin.petit#elif SK_PMCOLOR_BYTE_ORDER(R,G,B,A) 4300be677d35c7ef1bf8a7a694d1838fa11333d1beckevin.petit "uqadd v20.16b, v0.16b, v20.16b \t\n" 4310be677d35c7ef1bf8a7a694d1838fa11333d1beckevin.petit "uqadd v6.16b, v2.16b, v6.16b \t\n" 4320be677d35c7ef1bf8a7a694d1838fa11333d1beckevin.petit#else 4330be677d35c7ef1bf8a7a694d1838fa11333d1beckevin.petit#error "This function only supports BGRA and RGBA." 4340be677d35c7ef1bf8a7a694d1838fa11333d1beckevin.petit#endif 4350d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "shll v22.8h, v20.8b, #8 \t\n" 4360d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "shll v5.8h, v7.8b, #8 \t\n" 4370d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "sri v22.8h, v5.8h, #5 \t\n" 4380d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "shll v17.8h, v6.8b, #8 \t\n" 4390d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "shll2 v23.8h, v20.16b, #8 \t\n" 4400d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "shll2 v7.8h, v7.16b, #8 \t\n" 4410d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "sri v22.8h, v17.8h, #11 \t\n" 4420d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "sri v23.8h, v7.8h, #5 \t\n" 4430d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "shll2 v6.8h, v6.16b, #8 \t\n" 4440d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "st1 {v22.8h}, [%[dst]], #16 \t\n" 4450d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "sri v23.8h, v6.8h, #11 \t\n" 4460d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "st1 {v23.8h}, [%[dst]], #16 \t\n" 4470d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "b.ge 1b \t\n" 4480d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit : [dst] "+&r" (dst), [src] "+&r" (src), [count] "+&r" (count) 4490d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit :: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", 4500d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", 4510d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "v31" 4520d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit ); 4530d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit } 4540d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit // Leftovers 4550d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit if (count > 0) { 4560d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit do { 4570d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit SkPMColor c = *src++; 4580d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit SkPMColorAssert(c); 4590d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit if (c) { 4600d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit *dst = SkSrcOver32To16(c, *dst); 4610d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit } 4620d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit dst += 1; 4630d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit } while (--count != 0); 4640d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit } 4650d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit} 4660d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit#endif // #ifdef SK_CPU_ARM32 467a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 468be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.orgstatic inline uint16x8_t SkDiv255Round_neon8(uint16x8_t prod) { 469be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org prod += vdupq_n_u16(128); 470be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org prod += vshrq_n_u16(prod, 8); 471be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org return vshrq_n_u16(prod, 8); 472be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org} 473be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org 474a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comvoid S32A_D565_Blend_neon(uint16_t* SK_RESTRICT dst, 475a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com const SkPMColor* SK_RESTRICT src, int count, 476a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com U8CPU alpha, int /*x*/, int /*y*/) { 477be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org SkASSERT(255 > alpha); 478a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 479be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org /* This code implements a Neon version of S32A_D565_Blend. The results have 480be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org * a few mismatches compared to the original code. These mismatches never 481be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org * exceed 1. 482a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com */ 483fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 484be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org if (count >= 8) { 485be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org uint16x8_t valpha_max, vmask_blue; 486be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org uint8x8_t valpha; 487be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org 488be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // prepare constants 489be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org valpha_max = vmovq_n_u16(255); 490be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org valpha = vdup_n_u8(alpha); 491be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vmask_blue = vmovq_n_u16(SK_B16_MASK); 492be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org 493be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org do { 494be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org uint16x8_t vdst, vdst_r, vdst_g, vdst_b; 495be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org uint16x8_t vres_a, vres_r, vres_g, vres_b; 496be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org uint8x8x4_t vsrc; 497be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org 498be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // load pixels 499be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vdst = vld1q_u16(dst); 500ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#ifdef SK_CPU_ARM64 501ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc = sk_vld4_u8_arm64_4(src); 502ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#else 503be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org#if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6)) 504be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org asm ( 505be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org "vld4.u8 %h[vsrc], [%[src]]!" 506be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org : [vsrc] "=w" (vsrc), [src] "+&r" (src) 507be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org : : 508be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org ); 509a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#else 510be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org register uint8x8_t d0 asm("d0"); 511be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org register uint8x8_t d1 asm("d1"); 512be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org register uint8x8_t d2 asm("d2"); 513be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org register uint8x8_t d3 asm("d3"); 514be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org 515be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org asm volatile ( 516be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org "vld4.u8 {d0-d3},[%[src]]!;" 517be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3), 518be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org [src] "+&r" (src) 519be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org : : 520be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org ); 521be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vsrc.val[0] = d0; 522be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vsrc.val[1] = d1; 523be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vsrc.val[2] = d2; 524be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vsrc.val[3] = d3; 525a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#endif 526ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#endif // #ifdef SK_CPU_ARM64 527fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 528fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 529be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // deinterleave dst 530be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vdst_g = vshlq_n_u16(vdst, SK_R16_BITS); // shift green to top of lanes 531be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vdst_b = vdst & vmask_blue; // extract blue 532be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vdst_r = vshrq_n_u16(vdst, SK_R16_SHIFT); // extract red 533be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vdst_g = vshrq_n_u16(vdst_g, SK_R16_BITS + SK_B16_BITS); // extract green 534be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org 535be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // shift src to 565 536be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vsrc.val[NEON_R] = vshr_n_u8(vsrc.val[NEON_R], 8 - SK_R16_BITS); 537be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vsrc.val[NEON_G] = vshr_n_u8(vsrc.val[NEON_G], 8 - SK_G16_BITS); 538be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vsrc.val[NEON_B] = vshr_n_u8(vsrc.val[NEON_B], 8 - SK_B16_BITS); 539be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org 540be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // calc src * src_scale 541be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_a = vmull_u8(vsrc.val[NEON_A], valpha); 542be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_r = vmull_u8(vsrc.val[NEON_R], valpha); 543be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_g = vmull_u8(vsrc.val[NEON_G], valpha); 544be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_b = vmull_u8(vsrc.val[NEON_B], valpha); 545be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org 546be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // prepare dst_scale 547be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_a = SkDiv255Round_neon8(vres_a); 548be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_a = valpha_max - vres_a; // 255 - (sa * src_scale) / 255 549be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org 550be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // add dst * dst_scale to previous result 551be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_r = vmlaq_u16(vres_r, vdst_r, vres_a); 552be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_g = vmlaq_u16(vres_g, vdst_g, vres_a); 553be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_b = vmlaq_u16(vres_b, vdst_b, vres_a); 554be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org 555be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org#ifdef S32A_D565_BLEND_EXACT 556be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // It is possible to get exact results with this but it is slow, 557be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // even slower than C code in some cases 558be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_r = SkDiv255Round_neon8(vres_r); 559be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_g = SkDiv255Round_neon8(vres_g); 560be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_b = SkDiv255Round_neon8(vres_b); 561be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org#else 562be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_r = vrshrq_n_u16(vres_r, 8); 563be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_g = vrshrq_n_u16(vres_g, 8); 564be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_b = vrshrq_n_u16(vres_b, 8); 565be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org#endif 566be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // pack result 567be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_b = vsliq_n_u16(vres_b, vres_g, SK_G16_SHIFT); // insert green into blue 568be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_b = vsliq_n_u16(vres_b, vres_r, SK_R16_SHIFT); // insert red into green/blue 569be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org 570be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // store 571be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vst1q_u16(dst, vres_b); 572be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org dst += 8; 573be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org count -= 8; 574be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org } while (count >= 8); 575be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org } 576a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 577be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // leftovers 578be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org while (count-- > 0) { 579be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org SkPMColor sc = *src++; 580be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org if (sc) { 581be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org uint16_t dc = *dst; 582be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org unsigned dst_scale = 255 - SkMulDiv255Round(SkGetPackedA32(sc), alpha); 583be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org unsigned dr = SkMulS16(SkPacked32ToR16(sc), alpha) + SkMulS16(SkGetPackedR16(dc), dst_scale); 584be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org unsigned dg = SkMulS16(SkPacked32ToG16(sc), alpha) + SkMulS16(SkGetPackedG16(dc), dst_scale); 585be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org unsigned db = SkMulS16(SkPacked32ToB16(sc), alpha) + SkMulS16(SkGetPackedB16(dc), dst_scale); 586be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org *dst = SkPackRGB16(SkDiv255Round(dr), SkDiv255Round(dg), SkDiv255Round(db)); 587be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org } 588be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org dst += 1; 589a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 590a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com} 591a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 592a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com/* dither matrix for Neon, derived from gDitherMatrix_3Bit_16. 593a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com * each dither value is spaced out into byte lanes, and repeated 594a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com * to allow an 8-byte load from offsets 0, 1, 2 or 3 from the 595a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com * start of each row. 596a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com */ 597a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comstatic const uint8_t gDitherMatrix_Neon[48] = { 598a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 0, 4, 1, 5, 0, 4, 1, 5, 0, 4, 1, 5, 599a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 6, 2, 7, 3, 6, 2, 7, 3, 6, 2, 7, 3, 600a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1, 5, 0, 4, 1, 5, 0, 4, 1, 5, 0, 4, 601a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 7, 3, 6, 2, 7, 3, 6, 2, 7, 3, 6, 2, 602fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 603a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com}; 604a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 605a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comvoid S32_D565_Blend_Dither_neon(uint16_t *dst, const SkPMColor *src, 606a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com int count, U8CPU alpha, int x, int y) 607a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com{ 608fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 6094cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org SkASSERT(255 > alpha); 6104cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 6114cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org // rescale alpha to range 1 - 256 612a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com int scale = SkAlpha255To256(alpha); 613fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 6144cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org if (count >= 8) { 6154cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org /* select row and offset for dither array */ 6164cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org const uint8_t *dstart = &gDitherMatrix_Neon[(y&3)*12 + (x&3)]; 617fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 6184cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org uint8x8_t vdither = vld1_u8(dstart); // load dither values 6194cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org uint8x8_t vdither_g = vshr_n_u8(vdither, 1); // calc. green dither values 620fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 6214cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org int16x8_t vscale = vdupq_n_s16(scale); // duplicate scale into neon reg 6224cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org uint16x8_t vmask_b = vdupq_n_u16(0x1F); // set up blue mask 623fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 6244cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org do { 6254cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 626ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit uint8x8x4_t vsrc; 6274cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org uint8x8_t vsrc_r, vsrc_g, vsrc_b; 6284cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org uint8x8_t vsrc565_r, vsrc565_g, vsrc565_b; 6294cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org uint16x8_t vsrc_dit_r, vsrc_dit_g, vsrc_dit_b; 6304cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org uint16x8_t vsrc_res_r, vsrc_res_g, vsrc_res_b; 6314cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org uint16x8_t vdst; 6324cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org uint16x8_t vdst_r, vdst_g, vdst_b; 6334cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org int16x8_t vres_r, vres_g, vres_b; 6344cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org int8x8_t vres8_r, vres8_g, vres8_b; 6354cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 6364cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org // Load source and add dither 637ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#ifdef SK_CPU_ARM64 638ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc = sk_vld4_u8_arm64_3(src); 639ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#else 6404cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org { 6414cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org register uint8x8_t d0 asm("d0"); 6424cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org register uint8x8_t d1 asm("d1"); 6434cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org register uint8x8_t d2 asm("d2"); 6444cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org register uint8x8_t d3 asm("d3"); 6454cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 6464cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org asm ( 647ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit "vld4.8 {d0-d3},[%[src]]! " 6484cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3), [src] "+&r" (src) 6494cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org : 6504cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org ); 651ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[0] = d0; 652ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[1] = d1; 653ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[2] = d2; 6544cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org } 655ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#endif 656ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc_r = vsrc.val[NEON_R]; 657ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc_g = vsrc.val[NEON_G]; 658ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc_b = vsrc.val[NEON_B]; 6594cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 6604cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc565_g = vshr_n_u8(vsrc_g, 6); // calc. green >> 6 6614cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc565_r = vshr_n_u8(vsrc_r, 5); // calc. red >> 5 6624cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc565_b = vshr_n_u8(vsrc_b, 5); // calc. blue >> 5 6634cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 6644cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc_dit_g = vaddl_u8(vsrc_g, vdither_g); // add in dither to green and widen 6654cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc_dit_r = vaddl_u8(vsrc_r, vdither); // add in dither to red and widen 6664cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc_dit_b = vaddl_u8(vsrc_b, vdither); // add in dither to blue and widen 6674cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 6684cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc_dit_r = vsubw_u8(vsrc_dit_r, vsrc565_r); // sub shifted red from result 6694cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc_dit_g = vsubw_u8(vsrc_dit_g, vsrc565_g); // sub shifted green from result 6704cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc_dit_b = vsubw_u8(vsrc_dit_b, vsrc565_b); // sub shifted blue from result 6714cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 6724cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc_res_r = vshrq_n_u16(vsrc_dit_r, 3); 6734cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc_res_g = vshrq_n_u16(vsrc_dit_g, 2); 6744cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc_res_b = vshrq_n_u16(vsrc_dit_b, 3); 6754cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 6764cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org // Load dst and unpack 6774cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vdst = vld1q_u16(dst); 6784cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vdst_g = vshrq_n_u16(vdst, 5); // shift down to get green 6794cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vdst_r = vshrq_n_u16(vshlq_n_u16(vdst, 5), 5+5); // double shift to extract red 6804cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vdst_b = vandq_u16(vdst, vmask_b); // mask to get blue 6814cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 6824cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org // subtract dst from src and widen 6834cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres_r = vsubq_s16(vreinterpretq_s16_u16(vsrc_res_r), vreinterpretq_s16_u16(vdst_r)); 6844cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres_g = vsubq_s16(vreinterpretq_s16_u16(vsrc_res_g), vreinterpretq_s16_u16(vdst_g)); 6854cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres_b = vsubq_s16(vreinterpretq_s16_u16(vsrc_res_b), vreinterpretq_s16_u16(vdst_b)); 6864cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 6874cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org // multiply diffs by scale and shift 6884cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres_r = vmulq_s16(vres_r, vscale); 6894cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres_g = vmulq_s16(vres_g, vscale); 6904cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres_b = vmulq_s16(vres_b, vscale); 6914cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 6924cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres8_r = vshrn_n_s16(vres_r, 8); 6934cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres8_g = vshrn_n_s16(vres_g, 8); 6944cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres8_b = vshrn_n_s16(vres_b, 8); 6954cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 6964cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org // add dst to result 6974cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres_r = vaddw_s8(vreinterpretq_s16_u16(vdst_r), vres8_r); 6984cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres_g = vaddw_s8(vreinterpretq_s16_u16(vdst_g), vres8_g); 6994cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres_b = vaddw_s8(vreinterpretq_s16_u16(vdst_b), vres8_b); 7004cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 7014cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org // put result into 565 format 7024cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres_b = vsliq_n_s16(vres_b, vres_g, 5); // shift up green and insert into blue 7034cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres_b = vsliq_n_s16(vres_b, vres_r, 6+5); // shift up red and insert into blue 7044cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 7054cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org // Store result 7064cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vst1q_u16(dst, vreinterpretq_u16_s16(vres_b)); 7074cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 7084cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org // Next iteration 7094cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org dst += 8; 7104cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org count -= 8; 7114cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 7124cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org } while (count >= 8); 7134cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org } 7144cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 7154cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org // Leftovers 7164cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org if (count > 0) { 7174cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org int scale = SkAlpha255To256(alpha); 7184cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org DITHER_565_SCAN(y); 7194cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org do { 7204cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org SkPMColor c = *src++; 7214cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org SkPMColorAssert(c); 7224cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 7234cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org int dither = DITHER_VALUE(x); 7244cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org int sr = SkGetPackedR32(c); 7254cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org int sg = SkGetPackedG32(c); 7264cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org int sb = SkGetPackedB32(c); 7274cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org sr = SkDITHER_R32To565(sr, dither); 7284cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org sg = SkDITHER_G32To565(sg, dither); 7294cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org sb = SkDITHER_B32To565(sb, dither); 7304cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 7314cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org uint16_t d = *dst; 7324cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org *dst++ = SkPackRGB16(SkAlphaBlend(sr, SkGetPackedR16(d), scale), 7334cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org SkAlphaBlend(sg, SkGetPackedG16(d), scale), 7344cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org SkAlphaBlend(sb, SkGetPackedB16(d), scale)); 7354cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org DITHER_INC_X(x); 7364cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org } while (--count != 0); 737a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 738a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com} 739a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 740a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comvoid S32A_Opaque_BlitRow32_neon(SkPMColor* SK_RESTRICT dst, 741a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com const SkPMColor* SK_RESTRICT src, 742a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com int count, U8CPU alpha) { 743a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 744a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkASSERT(255 == alpha); 745a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com if (count > 0) { 746a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 747a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 748fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint8x8_t alpha_mask; 749a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 750fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com static const uint8_t alpha_mask_setup[] = {3,3,3,3,7,7,7,7}; 751fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com alpha_mask = vld1_u8(alpha_mask_setup); 752a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 753fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* do the NEON unrolled code */ 754fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#define UNROLL 4 755fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com while (count >= UNROLL) { 756fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint8x8_t src_raw, dst_raw, dst_final; 757fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint8x8_t src_raw_2, dst_raw_2, dst_final_2; 758a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 7590a5699ee482c3b5ef1e857de8a2de06c6a1fa298commit-bot@chromium.org /* The two prefetches below may make the code slighlty 7600a5699ee482c3b5ef1e857de8a2de06c6a1fa298commit-bot@chromium.org * slower for small values of count but are worth having 7610a5699ee482c3b5ef1e857de8a2de06c6a1fa298commit-bot@chromium.org * in the general case. 7620a5699ee482c3b5ef1e857de8a2de06c6a1fa298commit-bot@chromium.org */ 7630a5699ee482c3b5ef1e857de8a2de06c6a1fa298commit-bot@chromium.org __builtin_prefetch(src+32); 7640a5699ee482c3b5ef1e857de8a2de06c6a1fa298commit-bot@chromium.org __builtin_prefetch(dst+32); 7650a5699ee482c3b5ef1e857de8a2de06c6a1fa298commit-bot@chromium.org 766fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* get the source */ 767fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com src_raw = vreinterpret_u8_u32(vld1_u32(src)); 768fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#if UNROLL > 2 769fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com src_raw_2 = vreinterpret_u8_u32(vld1_u32(src+2)); 770a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#endif 771a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 772fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* get and hold the dst too */ 773fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst_raw = vreinterpret_u8_u32(vld1_u32(dst)); 774fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#if UNROLL > 2 775fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst_raw_2 = vreinterpret_u8_u32(vld1_u32(dst+2)); 776a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#endif 777a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 778fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* 1st and 2nd bits of the unrolling */ 779fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com { 780fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint8x8_t dst_cooked; 781fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint16x8_t dst_wide; 782fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint8x8_t alpha_narrow; 783fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint16x8_t alpha_wide; 784a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 785fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* get the alphas spread out properly */ 786fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com alpha_narrow = vtbl1_u8(src_raw, alpha_mask); 787fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com alpha_wide = vsubw_u8(vdupq_n_u16(256), alpha_narrow); 788a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 789fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* spread the dest */ 790fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst_wide = vmovl_u8(dst_raw); 791a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 792fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* alpha mul the dest */ 793fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst_wide = vmulq_u16 (dst_wide, alpha_wide); 794fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst_cooked = vshrn_n_u16(dst_wide, 8); 795a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 796fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* sum -- ignoring any byte lane overflows */ 797fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst_final = vadd_u8(src_raw, dst_cooked); 798fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com } 799a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 800fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#if UNROLL > 2 801fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* the 3rd and 4th bits of our unrolling */ 802fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com { 803fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint8x8_t dst_cooked; 804fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint16x8_t dst_wide; 805fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint8x8_t alpha_narrow; 806fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint16x8_t alpha_wide; 807a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 808fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com alpha_narrow = vtbl1_u8(src_raw_2, alpha_mask); 809fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com alpha_wide = vsubw_u8(vdupq_n_u16(256), alpha_narrow); 810a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 811fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* spread the dest */ 812fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst_wide = vmovl_u8(dst_raw_2); 813a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 814fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* alpha mul the dest */ 815fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst_wide = vmulq_u16 (dst_wide, alpha_wide); 816fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst_cooked = vshrn_n_u16(dst_wide, 8); 817a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 818fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* sum -- ignoring any byte lane overflows */ 819fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst_final_2 = vadd_u8(src_raw_2, dst_cooked); 820fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com } 821a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#endif 822a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 823fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com vst1_u32(dst, vreinterpret_u32_u8(dst_final)); 824fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#if UNROLL > 2 825fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com vst1_u32(dst+2, vreinterpret_u32_u8(dst_final_2)); 826a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#endif 827a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 828fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com src += UNROLL; 829fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst += UNROLL; 830fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com count -= UNROLL; 831fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com } 832fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#undef UNROLL 833a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 834fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* do any residual iterations */ 835a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com while (--count >= 0) { 836a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com *dst = SkPMSrcOver(*src, *dst); 837a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com src += 1; 838a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com dst += 1; 839a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 840a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 841a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com} 842a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 843c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.comvoid S32A_Opaque_BlitRow32_neon_src_alpha(SkPMColor* SK_RESTRICT dst, 844c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com const SkPMColor* SK_RESTRICT src, 845c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com int count, U8CPU alpha) { 846c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com SkASSERT(255 == alpha); 847c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 848c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if (count <= 0) 849c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com return; 850c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 851c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* Use these to check if src is transparent or opaque */ 852c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com const unsigned int ALPHA_OPAQ = 0xFF000000; 853c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com const unsigned int ALPHA_TRANS = 0x00FFFFFF; 854c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 855c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com#define UNROLL 4 856c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com const SkPMColor* SK_RESTRICT src_end = src + count - (UNROLL + 1); 857c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com const SkPMColor* SK_RESTRICT src_temp = src; 858c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 859c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* set up the NEON variables */ 860c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com uint8x8_t alpha_mask; 861c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com static const uint8_t alpha_mask_setup[] = {3,3,3,3,7,7,7,7}; 862c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com alpha_mask = vld1_u8(alpha_mask_setup); 863c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 864c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com uint8x8_t src_raw, dst_raw, dst_final; 865c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com uint8x8_t src_raw_2, dst_raw_2, dst_final_2; 866c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com uint8x8_t dst_cooked; 867c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com uint16x8_t dst_wide; 868c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com uint8x8_t alpha_narrow; 869c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com uint16x8_t alpha_wide; 870c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 871c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* choose the first processing type */ 872c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if( src >= src_end) 873c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com goto TAIL; 874c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if(*src <= ALPHA_TRANS) 875c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com goto ALPHA_0; 876c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if(*src >= ALPHA_OPAQ) 877c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com goto ALPHA_255; 878c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* fall-thru */ 879c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 880c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.comALPHA_1_TO_254: 881c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com do { 882c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 883c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* get the source */ 884c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com src_raw = vreinterpret_u8_u32(vld1_u32(src)); 885c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com src_raw_2 = vreinterpret_u8_u32(vld1_u32(src+2)); 886c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 887c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* get and hold the dst too */ 888c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst_raw = vreinterpret_u8_u32(vld1_u32(dst)); 889c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst_raw_2 = vreinterpret_u8_u32(vld1_u32(dst+2)); 890c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 891c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 892c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* get the alphas spread out properly */ 893c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com alpha_narrow = vtbl1_u8(src_raw, alpha_mask); 894c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* reflect SkAlpha255To256() semantics a+1 vs a+a>>7 */ 895c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* we collapsed (255-a)+1 ... */ 896c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com alpha_wide = vsubw_u8(vdupq_n_u16(256), alpha_narrow); 897c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 898c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* spread the dest */ 899c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst_wide = vmovl_u8(dst_raw); 900c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 901c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* alpha mul the dest */ 902c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst_wide = vmulq_u16 (dst_wide, alpha_wide); 903c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst_cooked = vshrn_n_u16(dst_wide, 8); 904c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 905c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* sum -- ignoring any byte lane overflows */ 906c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst_final = vadd_u8(src_raw, dst_cooked); 907c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 908c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com alpha_narrow = vtbl1_u8(src_raw_2, alpha_mask); 909c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* reflect SkAlpha255To256() semantics a+1 vs a+a>>7 */ 910c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* we collapsed (255-a)+1 ... */ 911c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com alpha_wide = vsubw_u8(vdupq_n_u16(256), alpha_narrow); 912c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 913c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* spread the dest */ 914c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst_wide = vmovl_u8(dst_raw_2); 915c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 916c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* alpha mul the dest */ 917c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst_wide = vmulq_u16 (dst_wide, alpha_wide); 918c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst_cooked = vshrn_n_u16(dst_wide, 8); 919c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 920c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* sum -- ignoring any byte lane overflows */ 921c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst_final_2 = vadd_u8(src_raw_2, dst_cooked); 922c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 923c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com vst1_u32(dst, vreinterpret_u32_u8(dst_final)); 924c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com vst1_u32(dst+2, vreinterpret_u32_u8(dst_final_2)); 925c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 926c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com src += UNROLL; 927c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst += UNROLL; 928c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 929c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* if 2 of the next pixels aren't between 1 and 254 930c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com it might make sense to go to the optimized loops */ 931c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if((src[0] <= ALPHA_TRANS && src[1] <= ALPHA_TRANS) || (src[0] >= ALPHA_OPAQ && src[1] >= ALPHA_OPAQ)) 932c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com break; 933c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 934c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com } while(src < src_end); 935c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 936c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if (src >= src_end) 937c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com goto TAIL; 938c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 939c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if(src[0] >= ALPHA_OPAQ && src[1] >= ALPHA_OPAQ) 940c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com goto ALPHA_255; 941c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 942c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /*fall-thru*/ 943c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 944c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.comALPHA_0: 945c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 946c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /*In this state, we know the current alpha is 0 and 947c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com we optimize for the next alpha also being zero. */ 948c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com src_temp = src; //so we don't have to increment dst every time 949c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com do { 950c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if(*(++src) > ALPHA_TRANS) 951c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com break; 952c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if(*(++src) > ALPHA_TRANS) 953c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com break; 954c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if(*(++src) > ALPHA_TRANS) 955c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com break; 956c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if(*(++src) > ALPHA_TRANS) 957c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com break; 958c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com } while(src < src_end); 959c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 960c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst += (src - src_temp); 961c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 962c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* no longer alpha 0, so determine where to go next. */ 963c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if( src >= src_end) 964c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com goto TAIL; 965c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if(*src >= ALPHA_OPAQ) 966c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com goto ALPHA_255; 967c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com else 968c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com goto ALPHA_1_TO_254; 969c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 970c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.comALPHA_255: 971c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com while((src[0] & src[1] & src[2] & src[3]) >= ALPHA_OPAQ) { 972c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst[0]=src[0]; 973c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst[1]=src[1]; 974c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst[2]=src[2]; 975c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst[3]=src[3]; 976c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com src+=UNROLL; 977c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst+=UNROLL; 978c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if(src >= src_end) 979c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com goto TAIL; 980c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com } 981c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 982c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com //Handle remainder. 983c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if(*src >= ALPHA_OPAQ) { *dst++ = *src++; 984c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if(*src >= ALPHA_OPAQ) { *dst++ = *src++; 985c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if(*src >= ALPHA_OPAQ) { *dst++ = *src++; } 986c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com } 987c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com } 988c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 989c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if( src >= src_end) 990c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com goto TAIL; 991c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if(*src <= ALPHA_TRANS) 992c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com goto ALPHA_0; 993c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com else 994c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com goto ALPHA_1_TO_254; 995c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 996c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.comTAIL: 997c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* do any residual iterations */ 998c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com src_end += UNROLL + 1; //goto the real end 999c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com while(src != src_end) { 1000c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if( *src != 0 ) { 1001c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com if( *src >= ALPHA_OPAQ ) { 1002c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com *dst = *src; 1003c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com } 1004c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com else { 1005c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com *dst = SkPMSrcOver(*src, *dst); 1006c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com } 1007c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com } 1008c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com src++; 1009c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com dst++; 1010c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com } 1011c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com 1012c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com#undef UNROLL 1013c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com return; 1014c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com} 1015a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1016a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com/* Neon version of S32_Blend_BlitRow32() 1017a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com * portable version is in src/core/SkBlitRow_D32.cpp 1018a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com */ 1019a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comvoid S32_Blend_BlitRow32_neon(SkPMColor* SK_RESTRICT dst, 1020a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com const SkPMColor* SK_RESTRICT src, 1021a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com int count, U8CPU alpha) { 1022a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkASSERT(alpha <= 255); 1023fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1024374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org if (count <= 0) { 1025374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org return; 1026374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org } 1027dfff2737f8ad3e945a4dcbe175380d4b2a91a260commit-bot@chromium.org 1028374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org uint16_t src_scale = SkAlpha255To256(alpha); 1029374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org uint16_t dst_scale = 256 - src_scale; 1030dfff2737f8ad3e945a4dcbe175380d4b2a91a260commit-bot@chromium.org 1031374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org while (count >= 2) { 1032374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org uint8x8_t vsrc, vdst, vres; 1033374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org uint16x8_t vsrc_wide, vdst_wide; 1034dfff2737f8ad3e945a4dcbe175380d4b2a91a260commit-bot@chromium.org 1035374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org /* These commented prefetches are a big win for count 1036374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org * values > 64 on an A9 (Pandaboard) but hurt by 10% for count = 4. 1037374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org * They also hurt a little (<5%) on an A15 1038374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org */ 1039374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org //__builtin_prefetch(src+32); 1040374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org //__builtin_prefetch(dst+32); 1041dfff2737f8ad3e945a4dcbe175380d4b2a91a260commit-bot@chromium.org 1042374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org // Load 1043374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vsrc = vreinterpret_u8_u32(vld1_u32(src)); 1044374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vdst = vreinterpret_u8_u32(vld1_u32(dst)); 1045374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org 1046374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org // Process src 1047374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vsrc_wide = vmovl_u8(vsrc); 1048374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vsrc_wide = vmulq_u16(vsrc_wide, vdupq_n_u16(src_scale)); 1049374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org 1050374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org // Process dst 1051374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vdst_wide = vmull_u8(vdst, vdup_n_u8(dst_scale)); 1052374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org 1053374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org // Combine 1054374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vres = vshrn_n_u16(vdst_wide, 8) + vshrn_n_u16(vsrc_wide, 8); 1055374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org 1056374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org // Store 1057374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vst1_u32(dst, vreinterpret_u32_u8(vres)); 1058374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org 1059374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org src += 2; 1060374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org dst += 2; 1061374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org count -= 2; 1062fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com } 1063fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1064fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com if (count == 1) { 1065374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org uint8x8_t vsrc = vdup_n_u8(0), vdst = vdup_n_u8(0), vres; 1066374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org uint16x8_t vsrc_wide, vdst_wide; 1067dfff2737f8ad3e945a4dcbe175380d4b2a91a260commit-bot@chromium.org 1068374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org // Load 1069374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vsrc = vreinterpret_u8_u32(vld1_lane_u32(src, vreinterpret_u32_u8(vsrc), 0)); 1070374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vdst = vreinterpret_u8_u32(vld1_lane_u32(dst, vreinterpret_u32_u8(vdst), 0)); 1071374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org 1072374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org // Process 1073374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vsrc_wide = vmovl_u8(vsrc); 1074374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vsrc_wide = vmulq_u16(vsrc_wide, vdupq_n_u16(src_scale)); 1075374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vdst_wide = vmull_u8(vdst, vdup_n_u8(dst_scale)); 1076374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vres = vshrn_n_u16(vdst_wide, 8) + vshrn_n_u16(vsrc_wide, 8); 1077374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org 1078374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org // Store 1079374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vst1_lane_u32(dst, vreinterpret_u32_u8(vres), 0); 1080a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 1081a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com} 1082a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 10833a2682a77f996f649de7699c9f7bee046c6d4f17mtklein#ifdef SK_CPU_ARM32 10841fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.orgvoid S32A_Blend_BlitRow32_neon(SkPMColor* SK_RESTRICT dst, 10851fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org const SkPMColor* SK_RESTRICT src, 10861fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org int count, U8CPU alpha) { 10871fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 10881fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org SkASSERT(255 >= alpha); 10891fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 10901fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org if (count <= 0) { 10911fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org return; 10921fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org } 10931fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 10941fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org unsigned alpha256 = SkAlpha255To256(alpha); 10951fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 10961fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // First deal with odd counts 10971fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org if (count & 1) { 10981fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org uint8x8_t vsrc = vdup_n_u8(0), vdst = vdup_n_u8(0), vres; 10991fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org uint16x8_t vdst_wide, vsrc_wide; 11001fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org unsigned dst_scale; 11011fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 11021fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // Load 11031fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vsrc = vreinterpret_u8_u32(vld1_lane_u32(src, vreinterpret_u32_u8(vsrc), 0)); 11041fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vdst = vreinterpret_u8_u32(vld1_lane_u32(dst, vreinterpret_u32_u8(vdst), 0)); 11051fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 11061fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // Calc dst_scale 11071fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org dst_scale = vget_lane_u8(vsrc, 3); 11081fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org dst_scale *= alpha256; 11091fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org dst_scale >>= 8; 11101fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org dst_scale = 256 - dst_scale; 11111fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 11121fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // Process src 11131fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vsrc_wide = vmovl_u8(vsrc); 11141fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vsrc_wide = vmulq_n_u16(vsrc_wide, alpha256); 11151fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 11161fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // Process dst 11171fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vdst_wide = vmovl_u8(vdst); 11181fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vdst_wide = vmulq_n_u16(vdst_wide, dst_scale); 11191fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 11201fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // Combine 11211fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vres = vshrn_n_u16(vdst_wide, 8) + vshrn_n_u16(vsrc_wide, 8); 11221fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 11231fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vst1_lane_u32(dst, vreinterpret_u32_u8(vres), 0); 11241fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org dst++; 11251fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org src++; 11261fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org count--; 11271fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org } 11281fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 11291fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org if (count) { 11301fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org uint8x8_t alpha_mask; 11311fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org static const uint8_t alpha_mask_setup[] = {3,3,3,3,7,7,7,7}; 11321fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org alpha_mask = vld1_u8(alpha_mask_setup); 11331fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 11341fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org do { 11351fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 11361fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org uint8x8_t vsrc, vdst, vres, vsrc_alphas; 11371fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org uint16x8_t vdst_wide, vsrc_wide, vsrc_scale, vdst_scale; 11381fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 11391fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org __builtin_prefetch(src+32); 11401fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org __builtin_prefetch(dst+32); 11411fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 11421fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // Load 11431fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vsrc = vreinterpret_u8_u32(vld1_u32(src)); 11441fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vdst = vreinterpret_u8_u32(vld1_u32(dst)); 11451fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 11461fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // Prepare src_scale 11471fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vsrc_scale = vdupq_n_u16(alpha256); 11481fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 11491fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // Calc dst_scale 11501fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vsrc_alphas = vtbl1_u8(vsrc, alpha_mask); 11511fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vdst_scale = vmovl_u8(vsrc_alphas); 11521fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vdst_scale *= vsrc_scale; 11531fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vdst_scale = vshrq_n_u16(vdst_scale, 8); 11541fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vdst_scale = vsubq_u16(vdupq_n_u16(256), vdst_scale); 11551fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 11561fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // Process src 11571fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vsrc_wide = vmovl_u8(vsrc); 11581fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vsrc_wide *= vsrc_scale; 11591fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 11601fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // Process dst 11611fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vdst_wide = vmovl_u8(vdst); 11621fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vdst_wide *= vdst_scale; 11631fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 11641fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // Combine 11651fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vres = vshrn_n_u16(vdst_wide, 8) + vshrn_n_u16(vsrc_wide, 8); 11661fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 11671fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vst1_u32(dst, vreinterpret_u32_u8(vres)); 11681fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 11691fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org src += 2; 11701fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org dst += 2; 11711fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org count -= 2; 11721fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org } while(count); 11731fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org } 11741fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org} 11751fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 1176a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com/////////////////////////////////////////////////////////////////////////////// 1177a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1178fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#undef DEBUG_OPAQUE_DITHER 1179a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1180fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#if defined(DEBUG_OPAQUE_DITHER) 1181a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comstatic void showme8(char *str, void *p, int len) 1182a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com{ 1183fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com static char buf[256]; 1184fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com char tbuf[32]; 1185fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com int i; 1186fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com char *pc = (char*) p; 1187fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sprintf(buf,"%8s:", str); 1188fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com for(i=0;i<len;i++) { 1189fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sprintf(tbuf, " %02x", pc[i]); 1190fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com strcat(buf, tbuf); 1191fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com } 1192fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com SkDebugf("%s\n", buf); 1193a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com} 1194a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comstatic void showme16(char *str, void *p, int len) 1195a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com{ 1196fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com static char buf[256]; 1197fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com char tbuf[32]; 1198fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com int i; 1199fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint16_t *pc = (uint16_t*) p; 1200fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sprintf(buf,"%8s:", str); 1201fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com len = (len / sizeof(uint16_t)); /* passed as bytes */ 1202fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com for(i=0;i<len;i++) { 1203fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sprintf(tbuf, " %04x", pc[i]); 1204fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com strcat(buf, tbuf); 1205fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com } 1206fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com SkDebugf("%s\n", buf); 1207a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com} 1208a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#endif 1209ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#endif // #ifdef SK_CPU_ARM32 1210a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1211a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comvoid S32A_D565_Opaque_Dither_neon (uint16_t * SK_RESTRICT dst, 1212a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com const SkPMColor* SK_RESTRICT src, 1213a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com int count, U8CPU alpha, int x, int y) { 1214a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkASSERT(255 == alpha); 1215a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1216fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#define UNROLL 8 1217a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1218a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com if (count >= UNROLL) { 1219fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1220fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org#if defined(DEBUG_OPAQUE_DITHER) 1221fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint16_t tmpbuf[UNROLL]; 1222fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com int td[UNROLL]; 1223fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com int tdv[UNROLL]; 1224fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com int ta[UNROLL]; 1225fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com int tap[UNROLL]; 1226fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint16_t in_dst[UNROLL]; 1227fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com int offset = 0; 1228fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com int noisy = 0; 1229a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#endif 1230a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1231fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org uint8x8_t dbase; 1232fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com const uint8_t *dstart = &gDitherMatrix_Neon[(y&3)*12 + (x&3)]; 1233fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dbase = vld1_u8(dstart); 1234a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1235a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com do { 1236ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit uint8x8x4_t vsrc; 1237fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint8x8_t sr, sg, sb, sa, d; 1238fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint16x8_t dst8, scale8, alpha8; 1239fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint16x8_t dst_r, dst_g, dst_b; 1240fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1241fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org#if defined(DEBUG_OPAQUE_DITHER) 1242fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org // calculate 8 elements worth into a temp buffer 1243fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org { 1244fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org int my_y = y; 1245fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org int my_x = x; 1246fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org SkPMColor* my_src = (SkPMColor*)src; 1247fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org uint16_t* my_dst = dst; 1248fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org int i; 1249fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org 1250fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org DITHER_565_SCAN(my_y); 1251fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org for(i = 0; i < UNROLL; i++) { 1252a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkPMColor c = *my_src++; 1253a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkPMColorAssert(c); 1254a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com if (c) { 1255a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com unsigned a = SkGetPackedA32(c); 1256fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1257a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com int d = SkAlphaMul(DITHER_VALUE(my_x), SkAlpha255To256(a)); 1258fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org tdv[i] = DITHER_VALUE(my_x); 1259fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org ta[i] = a; 1260fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org tap[i] = SkAlpha255To256(a); 1261fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org td[i] = d; 1262fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1263a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com unsigned sr = SkGetPackedR32(c); 1264a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com unsigned sg = SkGetPackedG32(c); 1265a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com unsigned sb = SkGetPackedB32(c); 1266a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com sr = SkDITHER_R32_FOR_565(sr, d); 1267a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com sg = SkDITHER_G32_FOR_565(sg, d); 1268a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com sb = SkDITHER_B32_FOR_565(sb, d); 1269fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1270a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com uint32_t src_expanded = (sg << 24) | (sr << 13) | (sb << 2); 1271a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com uint32_t dst_expanded = SkExpand_rgb_16(*my_dst); 1272a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3); 1273a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // now src and dst expanded are in g:11 r:10 x:1 b:10 1274a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com tmpbuf[i] = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5); 1275fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org td[i] = d; 1276a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } else { 1277fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org tmpbuf[i] = *my_dst; 1278fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org ta[i] = tdv[i] = td[i] = 0xbeef; 1279fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org } 1280fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org in_dst[i] = *my_dst; 1281a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com my_dst += 1; 1282a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com DITHER_INC_X(my_x); 1283fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org } 1284fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org } 1285a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#endif 1286a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1287ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#ifdef SK_CPU_ARM64 1288ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc = sk_vld4_u8_arm64_4(src); 1289ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#else 1290fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com { 1291fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com register uint8x8_t d0 asm("d0"); 1292fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com register uint8x8_t d1 asm("d1"); 1293fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com register uint8x8_t d2 asm("d2"); 1294fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com register uint8x8_t d3 asm("d3"); 1295a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1296ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit asm ("vld4.8 {d0-d3},[%[src]]! " 1297fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3), [src] "+r" (src) 1298fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org : 1299fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org ); 1300ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[0] = d0; 1301ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[1] = d1; 1302ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[2] = d2; 1303ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[3] = d3; 1304fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com } 1305ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#endif 1306ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit sa = vsrc.val[NEON_A]; 1307ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit sr = vsrc.val[NEON_R]; 1308ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit sg = vsrc.val[NEON_G]; 1309ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit sb = vsrc.val[NEON_B]; 1310a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1311fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org /* calculate 'd', which will be 0..7 1312fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org * dbase[] is 0..7; alpha is 0..256; 16 bits suffice 1313fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org */ 1314fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org alpha8 = vmovl_u8(dbase); 1315fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org alpha8 = vmlal_u8(alpha8, sa, dbase); 1316fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org d = vshrn_n_u16(alpha8, 8); // narrowing too 1317fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1318fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org // sr = sr - (sr>>5) + d 1319fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* watching for 8-bit overflow. d is 0..7; risky range of 1320fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com * sr is >248; and then (sr>>5) is 7 so it offsets 'd'; 1321fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org * safe as long as we do ((sr-sr>>5) + d) 1322fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org */ 1323fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sr = vsub_u8(sr, vshr_n_u8(sr, 5)); 1324fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sr = vadd_u8(sr, d); 1325fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1326fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org // sb = sb - (sb>>5) + d 1327fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sb = vsub_u8(sb, vshr_n_u8(sb, 5)); 1328fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sb = vadd_u8(sb, d); 1329fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1330fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org // sg = sg - (sg>>6) + d>>1; similar logic for overflows 1331fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sg = vsub_u8(sg, vshr_n_u8(sg, 6)); 1332fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sg = vadd_u8(sg, vshr_n_u8(d,1)); 1333fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1334fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org // need to pick up 8 dst's -- at 16 bits each, 128 bits 1335fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst8 = vld1q_u16(dst); 1336fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org dst_b = vandq_u16(dst8, vdupq_n_u16(SK_B16_MASK)); 1337fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org dst_g = vshrq_n_u16(vshlq_n_u16(dst8, SK_R16_BITS), SK_R16_BITS + SK_B16_BITS); 1338fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org dst_r = vshrq_n_u16(dst8, SK_R16_SHIFT); // clearing hi bits 1339fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org 1340fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org // blend 1341fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com scale8 = vsubw_u8(vdupq_n_u16(256), sa); 1342a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1343fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org // combine the addq and mul, save 3 insns 1344fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com scale8 = vshrq_n_u16(scale8, 3); 1345fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst_b = vmlaq_u16(vshll_n_u8(sb,2), dst_b, scale8); 1346fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst_g = vmlaq_u16(vshll_n_u8(sg,3), dst_g, scale8); 1347fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst_r = vmlaq_u16(vshll_n_u8(sr,2), dst_r, scale8); 1348a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1349fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org // repack to store 1350fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org dst8 = vshrq_n_u16(dst_b, 5); 1351fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst8 = vsliq_n_u16(dst8, vshrq_n_u16(dst_g, 5), 5); 1352fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst8 = vsliq_n_u16(dst8, vshrq_n_u16(dst_r,5), 11); 1353fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1354fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com vst1q_u16(dst, dst8); 1355fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1356fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org#if defined(DEBUG_OPAQUE_DITHER) 1357fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org // verify my 8 elements match the temp buffer 1358fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org { 1359fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org int i, bad=0; 1360fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org static int invocation; 1361a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1362fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org for (i = 0; i < UNROLL; i++) { 1363fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org if (tmpbuf[i] != dst[i]) { 1364fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org bad=1; 1365fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org } 1366fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org } 1367fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org if (bad) { 1368fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org SkDebugf("BAD S32A_D565_Opaque_Dither_neon(); invocation %d offset %d\n", 1369fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org invocation, offset); 1370fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org SkDebugf(" alpha 0x%x\n", alpha); 1371fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org for (i = 0; i < UNROLL; i++) 1372fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org SkDebugf("%2d: %s %04x w %04x id %04x s %08x d %04x %04x %04x %04x\n", 1373fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org i, ((tmpbuf[i] != dst[i])?"BAD":"got"), dst[i], tmpbuf[i], 1374fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org in_dst[i], src[i-8], td[i], tdv[i], tap[i], ta[i]); 1375fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org 1376fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org showme16("alpha8", &alpha8, sizeof(alpha8)); 1377fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org showme16("scale8", &scale8, sizeof(scale8)); 1378fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org showme8("d", &d, sizeof(d)); 1379fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org showme16("dst8", &dst8, sizeof(dst8)); 1380fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org showme16("dst_b", &dst_b, sizeof(dst_b)); 1381fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org showme16("dst_g", &dst_g, sizeof(dst_g)); 1382fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org showme16("dst_r", &dst_r, sizeof(dst_r)); 1383fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org showme8("sb", &sb, sizeof(sb)); 1384fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org showme8("sg", &sg, sizeof(sg)); 1385fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org showme8("sr", &sr, sizeof(sr)); 1386fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org 1387fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org return; 1388fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org } 1389fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org offset += UNROLL; 1390fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org invocation++; 1391fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org } 1392fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org#endif 1393fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org dst += UNROLL; 1394fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com count -= UNROLL; 1395fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org // skip x += UNROLL, since it's unchanged mod-4 1396a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } while (count >= UNROLL); 1397a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 1398fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#undef UNROLL 1399a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1400fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org // residuals 1401a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com if (count > 0) { 1402a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com DITHER_565_SCAN(y); 1403a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com do { 1404a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkPMColor c = *src++; 1405a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkPMColorAssert(c); 1406a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com if (c) { 1407a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com unsigned a = SkGetPackedA32(c); 1408fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1409a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // dither and alpha are just temporary variables to work-around 1410a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // an ICE in debug. 1411a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com unsigned dither = DITHER_VALUE(x); 1412a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com unsigned alpha = SkAlpha255To256(a); 1413a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com int d = SkAlphaMul(dither, alpha); 1414fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1415a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com unsigned sr = SkGetPackedR32(c); 1416a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com unsigned sg = SkGetPackedG32(c); 1417a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com unsigned sb = SkGetPackedB32(c); 1418a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com sr = SkDITHER_R32_FOR_565(sr, d); 1419a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com sg = SkDITHER_G32_FOR_565(sg, d); 1420a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com sb = SkDITHER_B32_FOR_565(sb, d); 1421fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1422a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com uint32_t src_expanded = (sg << 24) | (sr << 13) | (sb << 2); 1423a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com uint32_t dst_expanded = SkExpand_rgb_16(*dst); 1424a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3); 1425a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // now src and dst expanded are in g:11 r:10 x:1 b:10 1426a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5); 1427a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 1428a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com dst += 1; 1429a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com DITHER_INC_X(x); 1430a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } while (--count != 0); 1431a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 1432a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com} 1433a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1434a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com/////////////////////////////////////////////////////////////////////////////// 1435a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1436fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#undef DEBUG_S32_OPAQUE_DITHER 1437a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1438a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comvoid S32_D565_Opaque_Dither_neon(uint16_t* SK_RESTRICT dst, 1439a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com const SkPMColor* SK_RESTRICT src, 1440a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com int count, U8CPU alpha, int x, int y) { 1441a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkASSERT(255 == alpha); 1442a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1443fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#define UNROLL 8 1444a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com if (count >= UNROLL) { 1445fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint8x8_t d; 1446fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com const uint8_t *dstart = &gDitherMatrix_Neon[(y&3)*12 + (x&3)]; 1447fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com d = vld1_u8(dstart); 1448fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1449fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com while (count >= UNROLL) { 1450efbe8e9bedda21a3e061ebf3d96431a0f250a654djsollen@google.com uint8x8_t sr, sg, sb; 1451efbe8e9bedda21a3e061ebf3d96431a0f250a654djsollen@google.com uint16x8_t dr, dg, db; 1452fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint16x8_t dst8; 1453ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit uint8x8x4_t vsrc; 1454fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1455ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#ifdef SK_CPU_ARM64 1456ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc = sk_vld4_u8_arm64_3(src); 1457ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#else 1458fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com { 1459fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com register uint8x8_t d0 asm("d0"); 1460fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com register uint8x8_t d1 asm("d1"); 1461fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com register uint8x8_t d2 asm("d2"); 1462fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com register uint8x8_t d3 asm("d3"); 1463fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1464688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org asm ( 1465ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit "vld4.8 {d0-d3},[%[src]]! " 1466688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3), [src] "+&r" (src) 1467688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org : 1468688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org ); 1469ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[0] = d0; 1470ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[1] = d1; 1471ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[2] = d2; 1472fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com } 1473ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#endif 1474ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit sr = vsrc.val[NEON_R]; 1475ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit sg = vsrc.val[NEON_G]; 1476ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit sb = vsrc.val[NEON_B]; 1477ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit 1478fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* XXX: if we want to prefetch, hide it in the above asm() 1479fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com * using the gcc __builtin_prefetch(), the prefetch will 1480fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com * fall to the bottom of the loop -- it won't stick up 1481fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com * at the top of the loop, just after the vld4. 1482fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com */ 1483fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1484688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org // sr = sr - (sr>>5) + d 1485fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sr = vsub_u8(sr, vshr_n_u8(sr, 5)); 1486fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dr = vaddl_u8(sr, d); 1487fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1488688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org // sb = sb - (sb>>5) + d 1489fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sb = vsub_u8(sb, vshr_n_u8(sb, 5)); 1490fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com db = vaddl_u8(sb, d); 1491fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1492688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org // sg = sg - (sg>>6) + d>>1; similar logic for overflows 1493fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sg = vsub_u8(sg, vshr_n_u8(sg, 6)); 1494688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org dg = vaddl_u8(sg, vshr_n_u8(d, 1)); 1495fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1496688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org // pack high bits of each into 565 format (rgb, b is lsb) 1497fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst8 = vshrq_n_u16(db, 3); 1498fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst8 = vsliq_n_u16(dst8, vshrq_n_u16(dg, 2), 5); 1499688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org dst8 = vsliq_n_u16(dst8, vshrq_n_u16(dr, 3), 11); 1500fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1501688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org // store it 1502fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com vst1q_u16(dst, dst8); 1503fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1504fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#if defined(DEBUG_S32_OPAQUE_DITHER) 1505688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org // always good to know if we generated good results 1506fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com { 1507fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com int i, myx = x, myy = y; 1508fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com DITHER_565_SCAN(myy); 1509fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com for (i=0;i<UNROLL;i++) { 1510688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org // the '!' in the asm block above post-incremented src by the 8 pixels it reads. 1511688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org SkPMColor c = src[i-8]; 1512fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com unsigned dither = DITHER_VALUE(myx); 1513fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint16_t val = SkDitherRGB32To565(c, dither); 1514fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com if (val != dst[i]) { 1515fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com SkDebugf("RBE: src %08x dither %02x, want %04x got %04x dbas[i] %02x\n", 1516fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com c, dither, val, dst[i], dstart[i]); 1517fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com } 1518fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com DITHER_INC_X(myx); 1519fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com } 1520fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com } 1521a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#endif 1522a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1523fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst += UNROLL; 1524688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org // we don't need to increment src as the asm above has already done it 1525fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com count -= UNROLL; 1526688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org x += UNROLL; // probably superfluous 1527fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com } 1528a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 1529fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#undef UNROLL 1530a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1531688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org // residuals 1532a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com if (count > 0) { 1533a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com DITHER_565_SCAN(y); 1534a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com do { 1535a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkPMColor c = *src++; 1536a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkPMColorAssert(c); 1537a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkASSERT(SkGetPackedA32(c) == 255); 1538a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1539a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com unsigned dither = DITHER_VALUE(x); 1540a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com *dst++ = SkDitherRGB32To565(c, dither); 1541a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com DITHER_INC_X(x); 1542a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } while (--count != 0); 1543a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 1544a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com} 1545a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1546a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comvoid Color32_arm_neon(SkPMColor* dst, const SkPMColor* src, int count, 1547a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkPMColor color) { 1548a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com if (count <= 0) { 1549a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com return; 1550a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 1551a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1552a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com if (0 == color) { 1553a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com if (src != dst) { 1554a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com memcpy(dst, src, count * sizeof(SkPMColor)); 1555a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 1556a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com return; 1557a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 1558a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1559a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com unsigned colorA = SkGetPackedA32(color); 1560a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com if (255 == colorA) { 1561a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com sk_memset32(dst, color, count); 15625376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org return; 15635376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org } 1564a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 15655376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org unsigned scale = 256 - SkAlpha255To256(colorA); 1566a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 15675376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org if (count >= 8) { 15685376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org uint32x4_t vcolor; 15695376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org uint8x8_t vscale; 15705376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org 15715376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vcolor = vdupq_n_u32(color); 15725376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org 15735376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org // scale numerical interval [0-255], so load as 8 bits 15745376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vscale = vdup_n_u8(scale); 15755376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org 15765376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org do { 15775376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org // load src color, 8 pixels, 4 64 bit registers 15785376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org // (and increment src). 15795376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org uint32x2x4_t vsrc; 15803a2682a77f996f649de7699c9f7bee046c6d4f17mtklein#if defined(SK_CPU_ARM32) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6))) 15815376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org asm ( 15825376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org "vld1.32 %h[vsrc], [%[src]]!" 15835376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org : [vsrc] "=w" (vsrc), [src] "+r" (src) 15845376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org : : 15855376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org ); 1586866b95d65dfc01af372bbed206ec067e04c1f533kevin.petit#else // 64bit targets and Clang 15875376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vsrc.val[0] = vld1_u32(src); 15885376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vsrc.val[1] = vld1_u32(src+2); 15895376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vsrc.val[2] = vld1_u32(src+4); 15905376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vsrc.val[3] = vld1_u32(src+6); 15915376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org src += 8; 15925376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org#endif 15935376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org 15945376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org // multiply long by scale, 64 bits at a time, 15955376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org // destination into a 128 bit register. 15965376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org uint16x8x4_t vtmp; 15975376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vtmp.val[0] = vmull_u8(vreinterpret_u8_u32(vsrc.val[0]), vscale); 15985376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vtmp.val[1] = vmull_u8(vreinterpret_u8_u32(vsrc.val[1]), vscale); 15995376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vtmp.val[2] = vmull_u8(vreinterpret_u8_u32(vsrc.val[2]), vscale); 16005376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vtmp.val[3] = vmull_u8(vreinterpret_u8_u32(vsrc.val[3]), vscale); 16015376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org 16025376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org // shift the 128 bit registers, containing the 16 16035376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org // bit scaled values back to 8 bits, narrowing the 16045376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org // results to 64 bit registers. 16055376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org uint8x16x2_t vres; 16065376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vres.val[0] = vcombine_u8( 16075376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vshrn_n_u16(vtmp.val[0], 8), 16085376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vshrn_n_u16(vtmp.val[1], 8)); 16095376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vres.val[1] = vcombine_u8( 16105376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vshrn_n_u16(vtmp.val[2], 8), 16115376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vshrn_n_u16(vtmp.val[3], 8)); 16125376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org 16135376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org // adding back the color, using 128 bit registers. 16145376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org uint32x4x2_t vdst; 16155376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vdst.val[0] = vreinterpretq_u32_u8(vres.val[0] + 16165376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vreinterpretq_u8_u32(vcolor)); 16175376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vdst.val[1] = vreinterpretq_u32_u8(vres.val[1] + 16185376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vreinterpretq_u8_u32(vcolor)); 16195376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org 16205376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org // store back the 8 calculated pixels (2 128 bit 16215376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org // registers), and increment dst. 16223a2682a77f996f649de7699c9f7bee046c6d4f17mtklein#if defined(SK_CPU_ARM32) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6))) 16235376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org asm ( 16245376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org "vst1.32 %h[vdst], [%[dst]]!" 16255376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org : [dst] "+r" (dst) 16265376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org : [vdst] "w" (vdst) 16275376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org : "memory" 16285376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org ); 1629866b95d65dfc01af372bbed206ec067e04c1f533kevin.petit#else // 64bit targets and Clang 16305376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vst1q_u32(dst, vdst.val[0]); 16315376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org vst1q_u32(dst+4, vdst.val[1]); 16325376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org dst += 8; 16335376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org#endif 16345376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org count -= 8; 16355376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org 16365376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org } while (count >= 8); 16375376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org } 16385376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org 16395376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org while (count > 0) { 16405376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org *dst = color + SkAlphaMulQ(*src, scale); 16415376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org src += 1; 16425376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org dst += 1; 16435376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org count--; 1644a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 1645a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com} 1646a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1647a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com/////////////////////////////////////////////////////////////////////////////// 1648a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1649a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comconst SkBlitRow::Proc sk_blitrow_platform_565_procs_arm_neon[] = { 1650a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // no dither 16510060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org S32_D565_Opaque_neon, 165295c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org S32_D565_Blend_neon, 1653a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com S32A_D565_Opaque_neon, 1654fa115bd4543631244f3b9accb3541b28f4222a96mtklein#if 0 1655a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com S32A_D565_Blend_neon, 1656fa115bd4543631244f3b9accb3541b28f4222a96mtklein#else 1657fa115bd4543631244f3b9accb3541b28f4222a96mtklein NULL, // https://code.google.com/p/skia/issues/detail?id=2845 1658fa115bd4543631244f3b9accb3541b28f4222a96mtklein // https://code.google.com/p/skia/issues/detail?id=2797 16595b2c2c6fd09752641b14766678d62fe50b4e3ef3reed#endif 1660a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1661a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // dither 1662a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com S32_D565_Opaque_Dither_neon, 1663a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com S32_D565_Blend_Dither_neon, 1664a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com S32A_D565_Opaque_Dither_neon, 1665a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com NULL, // S32A_D565_Blend_Dither 1666a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com}; 1667a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1668a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comconst SkBlitRow::Proc32 sk_blitrow_platform_32_procs_arm_neon[] = { 1669a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com NULL, // S32_Opaque, 1670fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com S32_Blend_BlitRow32_neon, // S32_Blend, 1671c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com /* 1672c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com * We have two choices for S32A_Opaque procs. The one reads the src alpha 1673c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com * value and attempts to optimize accordingly. The optimization is 1674c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com * sensitive to the source content and is not a win in all cases. For 1675c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com * example, if there are a lot of transitions between the alpha states, 1676c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com * the performance will almost certainly be worse. However, for many 1677c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com * common cases the performance is equivalent or better than the standard 1678c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com * case where we do not inspect the src alpha. 1679c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com */ 1680c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com#if SK_A32_SHIFT == 24 1681c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com // This proc assumes the alpha value occupies bits 24-32 of each SkPMColor 1682c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com S32A_Opaque_BlitRow32_neon_src_alpha, // S32A_Opaque, 1683c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com#else 1684c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com S32A_Opaque_BlitRow32_neon, // S32A_Opaque, 1685c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com#endif 16863a2682a77f996f649de7699c9f7bee046c6d4f17mtklein#ifdef SK_CPU_ARM32 16871fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org S32A_Blend_BlitRow32_neon // S32A_Blend 1688866b95d65dfc01af372bbed206ec067e04c1f533kevin.petit#else 1689866b95d65dfc01af372bbed206ec067e04c1f533kevin.petit NULL 1690866b95d65dfc01af372bbed206ec067e04c1f533kevin.petit#endif 1691a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com}; 1692