1a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com/* 2a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com * Copyright 2012 The Android Open Source Project 3a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com * 4a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com * Use of this source code is governed by a BSD-style license that can be 5a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com * found in the LICENSE file. 6a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com */ 7a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 8a111e492b84c312a6bd5d5d9ef100dca48f4941ddjsollen@google.com#include "SkBlitRow_opts_arm_neon.h" 9a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 10a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#include "SkBlitMask.h" 11a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#include "SkBlitRow.h" 12a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#include "SkColorPriv.h" 13a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#include "SkDither.h" 14a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#include "SkMathPriv.h" 15a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#include "SkUtils.h" 16a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 170060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org#include "SkColor_opts_neon.h" 18a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#include <arm_neon.h> 19a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 20ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#ifdef SK_CPU_ARM64 21ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petitstatic inline uint8x8x4_t sk_vld4_u8_arm64_3(const SkPMColor* SK_RESTRICT & src) { 22ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit uint8x8x4_t vsrc; 23ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit uint8x8_t vsrc_0, vsrc_1, vsrc_2; 24ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit 25ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit asm ( 26ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit "ld4 {v0.8b - v3.8b}, [%[src]], #32 \t\n" 27ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit "mov %[vsrc0].8b, v0.8b \t\n" 28ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit "mov %[vsrc1].8b, v1.8b \t\n" 29ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit "mov %[vsrc2].8b, v2.8b \t\n" 30ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit : [vsrc0] "=w" (vsrc_0), [vsrc1] "=w" (vsrc_1), 31ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit [vsrc2] "=w" (vsrc_2), [src] "+&r" (src) 32ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit : : "v0", "v1", "v2", "v3" 33ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit ); 34ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit 35ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[0] = vsrc_0; 36ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[1] = vsrc_1; 37ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[2] = vsrc_2; 38ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit 39ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit return vsrc; 40ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit} 41ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit 42ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petitstatic inline uint8x8x4_t sk_vld4_u8_arm64_4(const SkPMColor* SK_RESTRICT & src) { 43ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit uint8x8x4_t vsrc; 44ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit uint8x8_t vsrc_0, vsrc_1, vsrc_2, vsrc_3; 45ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit 46ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit asm ( 47ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit "ld4 {v0.8b - v3.8b}, [%[src]], #32 \t\n" 48ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit "mov %[vsrc0].8b, v0.8b \t\n" 49ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit "mov %[vsrc1].8b, v1.8b \t\n" 50ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit "mov %[vsrc2].8b, v2.8b \t\n" 51ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit "mov %[vsrc3].8b, v3.8b \t\n" 52ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit : [vsrc0] "=w" (vsrc_0), [vsrc1] "=w" (vsrc_1), 53ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit [vsrc2] "=w" (vsrc_2), [vsrc3] "=w" (vsrc_3), 54ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit [src] "+&r" (src) 55ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit : : "v0", "v1", "v2", "v3" 56ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit ); 57ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit 58ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[0] = vsrc_0; 59ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[1] = vsrc_1; 60ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[2] = vsrc_2; 61ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[3] = vsrc_3; 62ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit 63ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit return vsrc; 64ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit} 65ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#endif 66ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit 670060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.orgvoid S32_D565_Opaque_neon(uint16_t* SK_RESTRICT dst, 680060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org const SkPMColor* SK_RESTRICT src, int count, 690060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org U8CPU alpha, int /*x*/, int /*y*/) { 700060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org SkASSERT(255 == alpha); 710060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org 720060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org while (count >= 8) { 730060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org uint8x8x4_t vsrc; 740060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org uint16x8_t vdst; 750060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org 760060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org // Load 77ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#ifdef SK_CPU_ARM64 78ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc = sk_vld4_u8_arm64_3(src); 79ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#else 800060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org vsrc = vld4_u8((uint8_t*)src); 81ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit src += 8; 82ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#endif 830060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org 840060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org // Convert src to 565 85bc25dfc798fff225ce65355ecda19d2b85bd0e74commit-bot@chromium.org vdst = SkPixel32ToPixel16_neon8(vsrc); 860060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org 870060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org // Store 880060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org vst1q_u16(dst, vdst); 890060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org 900060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org // Prepare next iteration 910060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org dst += 8; 920060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org count -= 8; 930060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org }; 940060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org 950060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org // Leftovers 960060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org while (count > 0) { 970060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org SkPMColor c = *src++; 980060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org SkPMColorAssert(c); 990060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org *dst = SkPixel32ToPixel16_ToU16(c); 1000060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org dst++; 1010060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org count--; 1020060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org }; 1030060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org} 1040060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org 10595c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.orgvoid S32_D565_Blend_neon(uint16_t* SK_RESTRICT dst, 10695c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org const SkPMColor* SK_RESTRICT src, int count, 10795c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org U8CPU alpha, int /*x*/, int /*y*/) { 10895c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org SkASSERT(255 > alpha); 10995c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 11095c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org uint16x8_t vmask_blue, vscale; 11195c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 11295c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org // prepare constants 11395c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vscale = vdupq_n_u16(SkAlpha255To256(alpha)); 11495c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vmask_blue = vmovq_n_u16(0x1F); 11595c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 11695c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org while (count >= 8) { 117ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit uint8x8x4_t vsrc; 11895c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org uint16x8_t vdst, vdst_r, vdst_g, vdst_b; 11995c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org uint16x8_t vres_r, vres_g, vres_b; 12095c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 12195c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org // Load src 122ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#ifdef SK_CPU_ARM64 123ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc = sk_vld4_u8_arm64_3(src); 124ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#else 12595c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org { 12695c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org register uint8x8_t d0 asm("d0"); 12795c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org register uint8x8_t d1 asm("d1"); 12895c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org register uint8x8_t d2 asm("d2"); 12995c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org register uint8x8_t d3 asm("d3"); 13095c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 13195c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org asm ( 13295c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org "vld4.8 {d0-d3},[%[src]]!" 13395c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3), [src] "+&r" (src) 13495c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org : 13595c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org ); 136ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[0] = d0; 137ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[1] = d1; 138ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[2] = d2; 13995c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org } 140ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#endif 14195c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 14295c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org // Load and unpack dst 14395c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vdst = vld1q_u16(dst); 14495c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vdst_g = vshlq_n_u16(vdst, 5); // shift green to top of lanes 14595c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vdst_b = vandq_u16(vdst, vmask_blue); // extract blue 14695c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vdst_r = vshrq_n_u16(vdst, 6+5); // extract red 14795c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vdst_g = vshrq_n_u16(vdst_g, 5+5); // extract green 14895c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 149ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit // Shift src to 565 range 150ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[NEON_R] = vshr_n_u8(vsrc.val[NEON_R], 3); 151ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[NEON_G] = vshr_n_u8(vsrc.val[NEON_G], 2); 152ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[NEON_B] = vshr_n_u8(vsrc.val[NEON_B], 3); 15395c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 15495c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org // Scale src - dst 155ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vres_r = vmovl_u8(vsrc.val[NEON_R]) - vdst_r; 156ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vres_g = vmovl_u8(vsrc.val[NEON_G]) - vdst_g; 157ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vres_b = vmovl_u8(vsrc.val[NEON_B]) - vdst_b; 15895c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 15995c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vres_r = vshrq_n_u16(vres_r * vscale, 8); 16095c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vres_g = vshrq_n_u16(vres_g * vscale, 8); 16195c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vres_b = vshrq_n_u16(vres_b * vscale, 8); 16295c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 16395c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vres_r += vdst_r; 16495c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vres_g += vdst_g; 16595c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vres_b += vdst_b; 16695c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 16795c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org // Combine 16895c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vres_b = vsliq_n_u16(vres_b, vres_g, 5); // insert green into blue 16995c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vres_b = vsliq_n_u16(vres_b, vres_r, 6+5); // insert red into green/blue 17095c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 17195c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org // Store 17295c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org vst1q_u16(dst, vres_b); 17395c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org dst += 8; 17495c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org count -= 8; 17595c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org } 17695c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org if (count > 0) { 17795c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org int scale = SkAlpha255To256(alpha); 17895c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org do { 17995c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org SkPMColor c = *src++; 18095c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org SkPMColorAssert(c); 18195c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org uint16_t d = *dst; 18295c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org *dst++ = SkPackRGB16( 18395c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org SkAlphaBlend(SkPacked32ToR16(c), SkGetPackedR16(d), scale), 18495c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org SkAlphaBlend(SkPacked32ToG16(c), SkGetPackedG16(d), scale), 18595c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org SkAlphaBlend(SkPacked32ToB16(c), SkGetPackedB16(d), scale)); 18695c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org } while (--count != 0); 18795c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org } 18895c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org} 18995c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org 190ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#ifdef SK_CPU_ARM32 191a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comvoid S32A_D565_Opaque_neon(uint16_t* SK_RESTRICT dst, 192a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com const SkPMColor* SK_RESTRICT src, int count, 193a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com U8CPU alpha, int /*x*/, int /*y*/) { 194a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkASSERT(255 == alpha); 195a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 196a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com if (count >= 8) { 1974b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein uint16_t* SK_RESTRICT keep_dst = 0; 198fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 199a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com asm volatile ( 2004b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "ands ip, %[count], #7 \n\t" 2014b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "vmov.u8 d31, #1<<7 \n\t" 2024b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "vld1.16 {q12}, [%[dst]] \n\t" 2034b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "vld4.8 {d0-d3}, [%[src]] \n\t" 204a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // Thumb does not support the standard ARM conditional 205a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // instructions but instead requires the 'it' instruction 206a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // to signal conditional execution 2074b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "it eq \n\t" 2084b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "moveq ip, #8 \n\t" 2094b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "mov %[keep_dst], %[dst] \n\t" 2104b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein 2114b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "add %[src], %[src], ip, LSL#2 \n\t" 2124b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "add %[dst], %[dst], ip, LSL#1 \n\t" 2134b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "subs %[count], %[count], ip \n\t" 2144b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "b 9f \n\t" 215a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // LOOP 216a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "2: \n\t" 217fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 2184b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "vld1.16 {q12}, [%[dst]]! \n\t" 2194b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "vld4.8 {d0-d3}, [%[src]]! \n\t" 2204b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "vst1.16 {q10}, [%[keep_dst]] \n\t" 2214b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "sub %[keep_dst], %[dst], #8*2 \n\t" 2224b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "subs %[count], %[count], #8 \n\t" 223a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "9: \n\t" 2244b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "pld [%[dst],#32] \n\t" 225a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // expand 0565 q12 to 8888 {d4-d7} 2264b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "vmovn.u16 d4, q12 \n\t" 2274b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "vshr.u16 q11, q12, #5 \n\t" 2284b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "vshr.u16 q10, q12, #6+5 \n\t" 2294b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "vmovn.u16 d5, q11 \n\t" 2304b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "vmovn.u16 d6, q10 \n\t" 2314b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "vshl.u8 d4, d4, #3 \n\t" 2324b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "vshl.u8 d5, d5, #2 \n\t" 2334b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "vshl.u8 d6, d6, #3 \n\t" 2344b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein 2354b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "vmovl.u8 q14, d31 \n\t" 2364b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "vmovl.u8 q13, d31 \n\t" 2374b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "vmovl.u8 q12, d31 \n\t" 238fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 239a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // duplicate in 4/2/1 & 8pix vsns 2404b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "vmvn.8 d30, d3 \n\t" 2414b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "vmlal.u8 q14, d30, d6 \n\t" 2424b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "vmlal.u8 q13, d30, d5 \n\t" 2434b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "vmlal.u8 q12, d30, d4 \n\t" 2444b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "vshr.u16 q8, q14, #5 \n\t" 2454b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "vshr.u16 q9, q13, #6 \n\t" 2464b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "vaddhn.u16 d6, q14, q8 \n\t" 2474b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "vshr.u16 q8, q12, #5 \n\t" 2484b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "vaddhn.u16 d5, q13, q9 \n\t" 2494b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "vaddhn.u16 d4, q12, q8 \n\t" 250a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // intentionally don't calculate alpha 251a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // result in d4-d6 252fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 2533f55eed73f5af405909c2c10bff179d80526d423Mike Klein #ifdef SK_PMCOLOR_IS_RGBA 2544b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "vqadd.u8 d6, d6, d0 \n\t" 2554b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "vqadd.u8 d5, d5, d1 \n\t" 2564b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "vqadd.u8 d4, d4, d2 \n\t" 2573f55eed73f5af405909c2c10bff179d80526d423Mike Klein #else 2584b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "vqadd.u8 d6, d6, d2 \n\t" 2594b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "vqadd.u8 d5, d5, d1 \n\t" 2604b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "vqadd.u8 d4, d4, d0 \n\t" 2613f55eed73f5af405909c2c10bff179d80526d423Mike Klein #endif 262fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 263a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // pack 8888 {d4-d6} to 0565 q10 2644b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "vshll.u8 q10, d6, #8 \n\t" 2654b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "vshll.u8 q3, d5, #8 \n\t" 2664b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "vshll.u8 q2, d4, #8 \n\t" 2674b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "vsri.u16 q10, q3, #5 \n\t" 2684b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "vsri.u16 q10, q2, #11 \n\t" 269fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 2704b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "bne 2b \n\t" 271fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 272a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "1: \n\t" 2734b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein "vst1.16 {q10}, [%[keep_dst]] \n\t" 274a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com : [count] "+r" (count) 2754b19b403944dd4ab70507c0dea2aa3d38f145eacMike Klein : [dst] "r" (dst), [keep_dst] "r" (keep_dst), [src] "r" (src) 276a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com : "ip", "cc", "memory", "d0","d1","d2","d3","d4","d5","d6","d7", 277a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "d16","d17","d18","d19","d20","d21","d22","d23","d24","d25","d26","d27","d28","d29", 278a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "d30","d31" 279a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com ); 280a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 281fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com else 282a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com { // handle count < 8 283efbe8e9bedda21a3e061ebf3d96431a0f250a654djsollen@google.com uint16_t* SK_RESTRICT keep_dst = 0; 284fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 285a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com asm volatile ( 286a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmov.u8 d31, #1<<7 \n\t" 287a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "mov %[keep_dst], %[dst] \n\t" 288fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 289a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "tst %[count], #4 \n\t" 290a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "beq 14f \n\t" 291a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vld1.16 {d25}, [%[dst]]! \n\t" 292a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vld1.32 {q1}, [%[src]]! \n\t" 293fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 294a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "14: \n\t" 295a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "tst %[count], #2 \n\t" 296a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "beq 12f \n\t" 297a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vld1.32 {d24[1]}, [%[dst]]! \n\t" 298a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vld1.32 {d1}, [%[src]]! \n\t" 299fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 300a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "12: \n\t" 301a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "tst %[count], #1 \n\t" 302a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "beq 11f \n\t" 303a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vld1.16 {d24[1]}, [%[dst]]! \n\t" 304a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vld1.32 {d0[1]}, [%[src]]! \n\t" 305fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 306a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "11: \n\t" 307a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // unzips achieve the same as a vld4 operation 308ea13afff6e46d8a969611cdd56c996bfb05a27c1thakis "vuzp.u16 q0, q1 \n\t" 309a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vuzp.u8 d0, d1 \n\t" 310a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vuzp.u8 d2, d3 \n\t" 311a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // expand 0565 q12 to 8888 {d4-d7} 312a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmovn.u16 d4, q12 \n\t" 313a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshr.u16 q11, q12, #5 \n\t" 314a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshr.u16 q10, q12, #6+5 \n\t" 315a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmovn.u16 d5, q11 \n\t" 316a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmovn.u16 d6, q10 \n\t" 317a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshl.u8 d4, d4, #3 \n\t" 318a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshl.u8 d5, d5, #2 \n\t" 319a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshl.u8 d6, d6, #3 \n\t" 320fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 321a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmovl.u8 q14, d31 \n\t" 322a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmovl.u8 q13, d31 \n\t" 323a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmovl.u8 q12, d31 \n\t" 324fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 325a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // duplicate in 4/2/1 & 8pix vsns 326a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmvn.8 d30, d3 \n\t" 327a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmlal.u8 q14, d30, d6 \n\t" 328a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmlal.u8 q13, d30, d5 \n\t" 329a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vmlal.u8 q12, d30, d4 \n\t" 330a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshr.u16 q8, q14, #5 \n\t" 331a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshr.u16 q9, q13, #6 \n\t" 332a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vaddhn.u16 d6, q14, q8 \n\t" 333a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshr.u16 q8, q12, #5 \n\t" 334a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vaddhn.u16 d5, q13, q9 \n\t" 335a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vaddhn.u16 d4, q12, q8 \n\t" 336a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // intentionally don't calculate alpha 337a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // result in d4-d6 338fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 3393f55eed73f5af405909c2c10bff179d80526d423Mike Klein #ifdef SK_PMCOLOR_IS_RGBA 3403f55eed73f5af405909c2c10bff179d80526d423Mike Klein "vqadd.u8 d6, d6, d0 \n\t" 341a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vqadd.u8 d5, d5, d1 \n\t" 342a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vqadd.u8 d4, d4, d2 \n\t" 3433f55eed73f5af405909c2c10bff179d80526d423Mike Klein #else 3443f55eed73f5af405909c2c10bff179d80526d423Mike Klein "vqadd.u8 d6, d6, d2 \n\t" 3453f55eed73f5af405909c2c10bff179d80526d423Mike Klein "vqadd.u8 d5, d5, d1 \n\t" 3463f55eed73f5af405909c2c10bff179d80526d423Mike Klein "vqadd.u8 d4, d4, d0 \n\t" 3473f55eed73f5af405909c2c10bff179d80526d423Mike Klein #endif 348fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 349a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // pack 8888 {d4-d6} to 0565 q10 350a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshll.u8 q10, d6, #8 \n\t" 351a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshll.u8 q3, d5, #8 \n\t" 352a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vshll.u8 q2, d4, #8 \n\t" 353a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vsri.u16 q10, q3, #5 \n\t" 354a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vsri.u16 q10, q2, #11 \n\t" 355fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 356a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // store 357a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "tst %[count], #4 \n\t" 358a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "beq 24f \n\t" 359a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vst1.16 {d21}, [%[keep_dst]]! \n\t" 360fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 361a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "24: \n\t" 362a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "tst %[count], #2 \n\t" 363a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "beq 22f \n\t" 364a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vst1.32 {d20[1]}, [%[keep_dst]]! \n\t" 365fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 366a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "22: \n\t" 367a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "tst %[count], #1 \n\t" 368a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "beq 21f \n\t" 369a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "vst1.16 {d20[1]}, [%[keep_dst]]! \n\t" 370fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 371a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "21: \n\t" 372a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com : [count] "+r" (count) 373a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com : [dst] "r" (dst), [keep_dst] "r" (keep_dst), [src] "r" (src) 374a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com : "ip", "cc", "memory", "d0","d1","d2","d3","d4","d5","d6","d7", 375a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "d16","d17","d18","d19","d20","d21","d22","d23","d24","d25","d26","d27","d28","d29", 376a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com "d30","d31" 377a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com ); 378a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 379a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com} 3800d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit 3810d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit#else // #ifdef SK_CPU_ARM32 3820d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit 3830d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petitvoid S32A_D565_Opaque_neon(uint16_t* SK_RESTRICT dst, 3840d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit const SkPMColor* SK_RESTRICT src, int count, 3850d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit U8CPU alpha, int /*x*/, int /*y*/) { 3860d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit SkASSERT(255 == alpha); 3870d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit 3880d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit if (count >= 16) { 3890d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit asm ( 3900d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "movi v4.8h, #0x80 \t\n" 3910d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit 3920d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "1: \t\n" 393f61088321c0906f62431c029173c1a7a70856ec7mtklein "sub %w[count], %w[count], #16 \t\n" 3940d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "ld1 {v16.8h-v17.8h}, [%[dst]] \t\n" 3950d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "ld4 {v0.16b-v3.16b}, [%[src]], #64 \t\n" 3960d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "prfm pldl1keep, [%[src],#512] \t\n" 3970d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "prfm pldl1keep, [%[dst],#256] \t\n" 3980d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "ushr v20.8h, v17.8h, #5 \t\n" 3990d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "ushr v31.8h, v16.8h, #5 \t\n" 4000d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "xtn v6.8b, v31.8h \t\n" 4010d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "xtn2 v6.16b, v20.8h \t\n" 4020d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "ushr v20.8h, v17.8h, #11 \t\n" 4030d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "shl v19.16b, v6.16b, #2 \t\n" 4040d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "ushr v31.8h, v16.8h, #11 \t\n" 4050d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "xtn v22.8b, v31.8h \t\n" 4060d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "xtn2 v22.16b, v20.8h \t\n" 4070d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "shl v18.16b, v22.16b, #3 \t\n" 4080d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "mvn v3.16b, v3.16b \t\n" 4090d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "xtn v16.8b, v16.8h \t\n" 4100d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "mov v7.16b, v4.16b \t\n" 4110d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "xtn2 v16.16b, v17.8h \t\n" 4120d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "umlal v7.8h, v3.8b, v19.8b \t\n" 4130d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "shl v16.16b, v16.16b, #3 \t\n" 4140d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "mov v22.16b, v4.16b \t\n" 4150d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "ushr v24.8h, v7.8h, #6 \t\n" 4160d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "umlal v22.8h, v3.8b, v18.8b \t\n" 4170d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "ushr v20.8h, v22.8h, #5 \t\n" 4180d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "addhn v20.8b, v22.8h, v20.8h \t\n" 419f61088321c0906f62431c029173c1a7a70856ec7mtklein "cmp %w[count], #16 \t\n" 4200d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "mov v6.16b, v4.16b \t\n" 4210d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "mov v5.16b, v4.16b \t\n" 4220d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "umlal v6.8h, v3.8b, v16.8b \t\n" 4230d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "umlal2 v5.8h, v3.16b, v19.16b \t\n" 4240d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "mov v17.16b, v4.16b \t\n" 4250d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "ushr v19.8h, v6.8h, #5 \t\n" 4260d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "umlal2 v17.8h, v3.16b, v18.16b \t\n" 4270d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "addhn v7.8b, v7.8h, v24.8h \t\n" 4280d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "ushr v18.8h, v5.8h, #6 \t\n" 4290d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "ushr v21.8h, v17.8h, #5 \t\n" 4300d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "addhn2 v7.16b, v5.8h, v18.8h \t\n" 4310d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "addhn2 v20.16b, v17.8h, v21.8h \t\n" 4320d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "mov v22.16b, v4.16b \t\n" 4330d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "addhn v6.8b, v6.8h, v19.8h \t\n" 4340d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "umlal2 v22.8h, v3.16b, v16.16b \t\n" 4350d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "ushr v5.8h, v22.8h, #5 \t\n" 4360d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "addhn2 v6.16b, v22.8h, v5.8h \t\n" 4370d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "uqadd v7.16b, v1.16b, v7.16b \t\n" 4380be677d35c7ef1bf8a7a694d1838fa11333d1beckevin.petit#if SK_PMCOLOR_BYTE_ORDER(B,G,R,A) 4390d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "uqadd v20.16b, v2.16b, v20.16b \t\n" 4400d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "uqadd v6.16b, v0.16b, v6.16b \t\n" 4410be677d35c7ef1bf8a7a694d1838fa11333d1beckevin.petit#elif SK_PMCOLOR_BYTE_ORDER(R,G,B,A) 4420be677d35c7ef1bf8a7a694d1838fa11333d1beckevin.petit "uqadd v20.16b, v0.16b, v20.16b \t\n" 4430be677d35c7ef1bf8a7a694d1838fa11333d1beckevin.petit "uqadd v6.16b, v2.16b, v6.16b \t\n" 4440be677d35c7ef1bf8a7a694d1838fa11333d1beckevin.petit#else 4450be677d35c7ef1bf8a7a694d1838fa11333d1beckevin.petit#error "This function only supports BGRA and RGBA." 4460be677d35c7ef1bf8a7a694d1838fa11333d1beckevin.petit#endif 4470d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "shll v22.8h, v20.8b, #8 \t\n" 4480d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "shll v5.8h, v7.8b, #8 \t\n" 4490d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "sri v22.8h, v5.8h, #5 \t\n" 4500d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "shll v17.8h, v6.8b, #8 \t\n" 4510d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "shll2 v23.8h, v20.16b, #8 \t\n" 4520d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "shll2 v7.8h, v7.16b, #8 \t\n" 4530d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "sri v22.8h, v17.8h, #11 \t\n" 4540d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "sri v23.8h, v7.8h, #5 \t\n" 4550d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "shll2 v6.8h, v6.16b, #8 \t\n" 4560d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "st1 {v22.8h}, [%[dst]], #16 \t\n" 4570d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "sri v23.8h, v6.8h, #11 \t\n" 4580d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "st1 {v23.8h}, [%[dst]], #16 \t\n" 4590d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "b.ge 1b \t\n" 4600d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit : [dst] "+&r" (dst), [src] "+&r" (src), [count] "+&r" (count) 4610d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit :: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", 4620d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", 4630d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit "v31" 4640d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit ); 4650d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit } 4660d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit // Leftovers 4670d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit if (count > 0) { 4680d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit do { 4690d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit SkPMColor c = *src++; 4700d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit SkPMColorAssert(c); 4710d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit if (c) { 4720d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit *dst = SkSrcOver32To16(c, *dst); 4730d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit } 4740d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit dst += 1; 4750d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit } while (--count != 0); 4760d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit } 4770d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit} 4780d448303099037d56d8f97ce87d8cae05dd9fdabkevin.petit#endif // #ifdef SK_CPU_ARM32 479a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 480402448d6818cab9d7b7633a0c18fcf574c915357mleestatic uint32_t pmcolor_to_expand16(SkPMColor c) { 481402448d6818cab9d7b7633a0c18fcf574c915357mlee unsigned r = SkGetPackedR32(c); 482402448d6818cab9d7b7633a0c18fcf574c915357mlee unsigned g = SkGetPackedG32(c); 483402448d6818cab9d7b7633a0c18fcf574c915357mlee unsigned b = SkGetPackedB32(c); 484402448d6818cab9d7b7633a0c18fcf574c915357mlee return (g << 24) | (r << 13) | (b << 2); 485402448d6818cab9d7b7633a0c18fcf574c915357mlee} 486402448d6818cab9d7b7633a0c18fcf574c915357mlee 487402448d6818cab9d7b7633a0c18fcf574c915357mleevoid Color32A_D565_neon(uint16_t dst[], SkPMColor src, int count, int x, int y) { 488402448d6818cab9d7b7633a0c18fcf574c915357mlee uint32_t src_expand; 489402448d6818cab9d7b7633a0c18fcf574c915357mlee unsigned scale; 490402448d6818cab9d7b7633a0c18fcf574c915357mlee uint16x8_t vmask_blue; 491402448d6818cab9d7b7633a0c18fcf574c915357mlee 492402448d6818cab9d7b7633a0c18fcf574c915357mlee if (count <= 0) return; 493402448d6818cab9d7b7633a0c18fcf574c915357mlee SkASSERT(((size_t)dst & 0x01) == 0); 494402448d6818cab9d7b7633a0c18fcf574c915357mlee 495402448d6818cab9d7b7633a0c18fcf574c915357mlee /* 496402448d6818cab9d7b7633a0c18fcf574c915357mlee * This preamble code is in order to make dst aligned to 8 bytes 497402448d6818cab9d7b7633a0c18fcf574c915357mlee * in the next mutiple bytes read & write access. 498402448d6818cab9d7b7633a0c18fcf574c915357mlee */ 499402448d6818cab9d7b7633a0c18fcf574c915357mlee src_expand = pmcolor_to_expand16(src); 500402448d6818cab9d7b7633a0c18fcf574c915357mlee scale = SkAlpha255To256(0xFF - SkGetPackedA32(src)) >> 3; 501402448d6818cab9d7b7633a0c18fcf574c915357mlee 502402448d6818cab9d7b7633a0c18fcf574c915357mlee#define DST_ALIGN 8 503402448d6818cab9d7b7633a0c18fcf574c915357mlee 504402448d6818cab9d7b7633a0c18fcf574c915357mlee /* 505402448d6818cab9d7b7633a0c18fcf574c915357mlee * preamble_size is in byte, meantime, this blend32_16_row_neon updates 2 bytes at a time. 506402448d6818cab9d7b7633a0c18fcf574c915357mlee */ 507402448d6818cab9d7b7633a0c18fcf574c915357mlee int preamble_size = (DST_ALIGN - (size_t)dst) & (DST_ALIGN - 1); 508402448d6818cab9d7b7633a0c18fcf574c915357mlee 509402448d6818cab9d7b7633a0c18fcf574c915357mlee for (int i = 0; i < preamble_size; i+=2, dst++) { 510402448d6818cab9d7b7633a0c18fcf574c915357mlee uint32_t dst_expand = SkExpand_rgb_16(*dst) * scale; 511402448d6818cab9d7b7633a0c18fcf574c915357mlee *dst = SkCompact_rgb_16((src_expand + dst_expand) >> 5); 512402448d6818cab9d7b7633a0c18fcf574c915357mlee if (--count == 0) 513402448d6818cab9d7b7633a0c18fcf574c915357mlee break; 514402448d6818cab9d7b7633a0c18fcf574c915357mlee } 515402448d6818cab9d7b7633a0c18fcf574c915357mlee 516402448d6818cab9d7b7633a0c18fcf574c915357mlee int count16 = 0; 517402448d6818cab9d7b7633a0c18fcf574c915357mlee count16 = count >> 4; 518402448d6818cab9d7b7633a0c18fcf574c915357mlee vmask_blue = vmovq_n_u16(SK_B16_MASK); 519402448d6818cab9d7b7633a0c18fcf574c915357mlee 520402448d6818cab9d7b7633a0c18fcf574c915357mlee if (count16) { 521402448d6818cab9d7b7633a0c18fcf574c915357mlee uint16x8_t wide_sr; 522402448d6818cab9d7b7633a0c18fcf574c915357mlee uint16x8_t wide_sg; 523402448d6818cab9d7b7633a0c18fcf574c915357mlee uint16x8_t wide_sb; 524402448d6818cab9d7b7633a0c18fcf574c915357mlee uint16x8_t wide_256_sa; 525402448d6818cab9d7b7633a0c18fcf574c915357mlee 526402448d6818cab9d7b7633a0c18fcf574c915357mlee unsigned sr = SkGetPackedR32(src); 527402448d6818cab9d7b7633a0c18fcf574c915357mlee unsigned sg = SkGetPackedG32(src); 528402448d6818cab9d7b7633a0c18fcf574c915357mlee unsigned sb = SkGetPackedB32(src); 529402448d6818cab9d7b7633a0c18fcf574c915357mlee unsigned sa = SkGetPackedA32(src); 530402448d6818cab9d7b7633a0c18fcf574c915357mlee 531402448d6818cab9d7b7633a0c18fcf574c915357mlee // Operation: dst_rgb = src_rgb + ((256 - src_a) >> 3) x dst_rgb 532402448d6818cab9d7b7633a0c18fcf574c915357mlee // sr: 8-bit based, dr: 5-bit based, with dr x ((256-sa)>>3), 5-bit left shifted, 533402448d6818cab9d7b7633a0c18fcf574c915357mlee //thus, for sr, do 2-bit left shift to match MSB : (8 + 2 = 5 + 5) 534402448d6818cab9d7b7633a0c18fcf574c915357mlee wide_sr = vshlq_n_u16(vmovl_u8(vdup_n_u8(sr)), 2); // widen and src_red shift 535402448d6818cab9d7b7633a0c18fcf574c915357mlee 536402448d6818cab9d7b7633a0c18fcf574c915357mlee // sg: 8-bit based, dg: 6-bit based, with dg x ((256-sa)>>3), 5-bit left shifted, 537402448d6818cab9d7b7633a0c18fcf574c915357mlee //thus, for sg, do 3-bit left shift to match MSB : (8 + 3 = 6 + 5) 538402448d6818cab9d7b7633a0c18fcf574c915357mlee wide_sg = vshlq_n_u16(vmovl_u8(vdup_n_u8(sg)), 3); // widen and src_grn shift 539402448d6818cab9d7b7633a0c18fcf574c915357mlee 540402448d6818cab9d7b7633a0c18fcf574c915357mlee // sb: 8-bit based, db: 5-bit based, with db x ((256-sa)>>3), 5-bit left shifted, 541402448d6818cab9d7b7633a0c18fcf574c915357mlee //thus, for sb, do 2-bit left shift to match MSB : (8 + 2 = 5 + 5) 542402448d6818cab9d7b7633a0c18fcf574c915357mlee wide_sb = vshlq_n_u16(vmovl_u8(vdup_n_u8(sb)), 2); // widen and src blu shift 543402448d6818cab9d7b7633a0c18fcf574c915357mlee 544402448d6818cab9d7b7633a0c18fcf574c915357mlee wide_256_sa = 545402448d6818cab9d7b7633a0c18fcf574c915357mlee vshrq_n_u16(vsubw_u8(vdupq_n_u16(256), vdup_n_u8(sa)), 3); // (256 - sa) >> 3 546402448d6818cab9d7b7633a0c18fcf574c915357mlee 547402448d6818cab9d7b7633a0c18fcf574c915357mlee while (count16-- > 0) { 548402448d6818cab9d7b7633a0c18fcf574c915357mlee uint16x8_t vdst1, vdst1_r, vdst1_g, vdst1_b; 549402448d6818cab9d7b7633a0c18fcf574c915357mlee uint16x8_t vdst2, vdst2_r, vdst2_g, vdst2_b; 550402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst1 = vld1q_u16(dst); 551402448d6818cab9d7b7633a0c18fcf574c915357mlee dst += 8; 552402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst2 = vld1q_u16(dst); 553402448d6818cab9d7b7633a0c18fcf574c915357mlee dst -= 8; //to store dst again. 554402448d6818cab9d7b7633a0c18fcf574c915357mlee 555402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst1_g = vshlq_n_u16(vdst1, SK_R16_BITS); // shift green to top of lanes 556402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst1_b = vdst1 & vmask_blue; // extract blue 557402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst1_r = vshrq_n_u16(vdst1, SK_R16_SHIFT); // extract red 558402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst1_g = vshrq_n_u16(vdst1_g, SK_R16_BITS + SK_B16_BITS); // extract green 559402448d6818cab9d7b7633a0c18fcf574c915357mlee 560402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst2_g = vshlq_n_u16(vdst2, SK_R16_BITS); // shift green to top of lanes 561402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst2_b = vdst2 & vmask_blue; // extract blue 562402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst2_r = vshrq_n_u16(vdst2, SK_R16_SHIFT); // extract red 563402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst2_g = vshrq_n_u16(vdst2_g, SK_R16_BITS + SK_B16_BITS); // extract green 564402448d6818cab9d7b7633a0c18fcf574c915357mlee 565402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst1_r = vmlaq_u16(wide_sr, wide_256_sa, vdst1_r); // sr + (256-sa) x dr1 566402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst1_g = vmlaq_u16(wide_sg, wide_256_sa, vdst1_g); // sg + (256-sa) x dg1 567402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst1_b = vmlaq_u16(wide_sb, wide_256_sa, vdst1_b); // sb + (256-sa) x db1 568402448d6818cab9d7b7633a0c18fcf574c915357mlee 569402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst2_r = vmlaq_u16(wide_sr, wide_256_sa, vdst2_r); // sr + (256-sa) x dr2 570402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst2_g = vmlaq_u16(wide_sg, wide_256_sa, vdst2_g); // sg + (256-sa) x dg2 571402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst2_b = vmlaq_u16(wide_sb, wide_256_sa, vdst2_b); // sb + (256-sa) x db2 572402448d6818cab9d7b7633a0c18fcf574c915357mlee 573402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst1_r = vshrq_n_u16(vdst1_r, 5); // 5-bit right shift for 5-bit red 574402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst1_g = vshrq_n_u16(vdst1_g, 5); // 5-bit right shift for 6-bit green 575402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst1_b = vshrq_n_u16(vdst1_b, 5); // 5-bit right shift for 5-bit blue 576402448d6818cab9d7b7633a0c18fcf574c915357mlee 577402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst1 = vsliq_n_u16(vdst1_b, vdst1_g, SK_G16_SHIFT); // insert green into blue 578402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst1 = vsliq_n_u16(vdst1, vdst1_r, SK_R16_SHIFT); // insert red into green/blue 579402448d6818cab9d7b7633a0c18fcf574c915357mlee 580402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst2_r = vshrq_n_u16(vdst2_r, 5); // 5-bit right shift for 5-bit red 581402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst2_g = vshrq_n_u16(vdst2_g, 5); // 5-bit right shift for 6-bit green 582402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst2_b = vshrq_n_u16(vdst2_b, 5); // 5-bit right shift for 5-bit blue 583402448d6818cab9d7b7633a0c18fcf574c915357mlee 584402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst2 = vsliq_n_u16(vdst2_b, vdst2_g, SK_G16_SHIFT); // insert green into blue 585402448d6818cab9d7b7633a0c18fcf574c915357mlee vdst2 = vsliq_n_u16(vdst2, vdst2_r, SK_R16_SHIFT); // insert red into green/blue 586402448d6818cab9d7b7633a0c18fcf574c915357mlee 587402448d6818cab9d7b7633a0c18fcf574c915357mlee vst1q_u16(dst, vdst1); 588402448d6818cab9d7b7633a0c18fcf574c915357mlee dst += 8; 589402448d6818cab9d7b7633a0c18fcf574c915357mlee vst1q_u16(dst, vdst2); 590402448d6818cab9d7b7633a0c18fcf574c915357mlee dst += 8; 591402448d6818cab9d7b7633a0c18fcf574c915357mlee } 592402448d6818cab9d7b7633a0c18fcf574c915357mlee } 593402448d6818cab9d7b7633a0c18fcf574c915357mlee 594402448d6818cab9d7b7633a0c18fcf574c915357mlee count &= 0xF; 595402448d6818cab9d7b7633a0c18fcf574c915357mlee if (count > 0) { 596402448d6818cab9d7b7633a0c18fcf574c915357mlee do { 597402448d6818cab9d7b7633a0c18fcf574c915357mlee uint32_t dst_expand = SkExpand_rgb_16(*dst) * scale; 598402448d6818cab9d7b7633a0c18fcf574c915357mlee *dst = SkCompact_rgb_16((src_expand + dst_expand) >> 5); 599402448d6818cab9d7b7633a0c18fcf574c915357mlee dst += 1; 600402448d6818cab9d7b7633a0c18fcf574c915357mlee } while (--count != 0); 601402448d6818cab9d7b7633a0c18fcf574c915357mlee } 602402448d6818cab9d7b7633a0c18fcf574c915357mlee} 603402448d6818cab9d7b7633a0c18fcf574c915357mlee 604be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.orgstatic inline uint16x8_t SkDiv255Round_neon8(uint16x8_t prod) { 605be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org prod += vdupq_n_u16(128); 606be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org prod += vshrq_n_u16(prod, 8); 607be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org return vshrq_n_u16(prod, 8); 608be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org} 609be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org 610a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comvoid S32A_D565_Blend_neon(uint16_t* SK_RESTRICT dst, 611a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com const SkPMColor* SK_RESTRICT src, int count, 612a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com U8CPU alpha, int /*x*/, int /*y*/) { 613be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org SkASSERT(255 > alpha); 614a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 615be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org /* This code implements a Neon version of S32A_D565_Blend. The results have 616be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org * a few mismatches compared to the original code. These mismatches never 617be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org * exceed 1. 618a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com */ 619fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 620be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org if (count >= 8) { 621be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org uint16x8_t valpha_max, vmask_blue; 622be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org uint8x8_t valpha; 623be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org 624be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // prepare constants 625be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org valpha_max = vmovq_n_u16(255); 626be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org valpha = vdup_n_u8(alpha); 627be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vmask_blue = vmovq_n_u16(SK_B16_MASK); 628be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org 629be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org do { 630be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org uint16x8_t vdst, vdst_r, vdst_g, vdst_b; 631be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org uint16x8_t vres_a, vres_r, vres_g, vres_b; 632be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org uint8x8x4_t vsrc; 633be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org 634be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // load pixels 635be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vdst = vld1q_u16(dst); 636ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#ifdef SK_CPU_ARM64 637ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc = sk_vld4_u8_arm64_4(src); 638e683e810a35e4ea91b00104590e15a56cb35ad39mtklein#elif (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6)) 639be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org asm ( 640be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org "vld4.u8 %h[vsrc], [%[src]]!" 641be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org : [vsrc] "=w" (vsrc), [src] "+&r" (src) 642be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org : : 643be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org ); 644a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#else 645be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org register uint8x8_t d0 asm("d0"); 646be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org register uint8x8_t d1 asm("d1"); 647be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org register uint8x8_t d2 asm("d2"); 648be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org register uint8x8_t d3 asm("d3"); 649be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org 650be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org asm volatile ( 651be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org "vld4.u8 {d0-d3},[%[src]]!;" 652be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3), 653be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org [src] "+&r" (src) 654be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org : : 655be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org ); 656be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vsrc.val[0] = d0; 657be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vsrc.val[1] = d1; 658be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vsrc.val[2] = d2; 659be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vsrc.val[3] = d3; 660a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#endif 661fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 662fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 663be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // deinterleave dst 664be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vdst_g = vshlq_n_u16(vdst, SK_R16_BITS); // shift green to top of lanes 665be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vdst_b = vdst & vmask_blue; // extract blue 666be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vdst_r = vshrq_n_u16(vdst, SK_R16_SHIFT); // extract red 667be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vdst_g = vshrq_n_u16(vdst_g, SK_R16_BITS + SK_B16_BITS); // extract green 668be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org 669be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // shift src to 565 670be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vsrc.val[NEON_R] = vshr_n_u8(vsrc.val[NEON_R], 8 - SK_R16_BITS); 671be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vsrc.val[NEON_G] = vshr_n_u8(vsrc.val[NEON_G], 8 - SK_G16_BITS); 672be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vsrc.val[NEON_B] = vshr_n_u8(vsrc.val[NEON_B], 8 - SK_B16_BITS); 673be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org 674be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // calc src * src_scale 675be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_a = vmull_u8(vsrc.val[NEON_A], valpha); 676be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_r = vmull_u8(vsrc.val[NEON_R], valpha); 677be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_g = vmull_u8(vsrc.val[NEON_G], valpha); 678be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_b = vmull_u8(vsrc.val[NEON_B], valpha); 679be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org 680be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // prepare dst_scale 681be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_a = SkDiv255Round_neon8(vres_a); 682be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_a = valpha_max - vres_a; // 255 - (sa * src_scale) / 255 683be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org 684be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // add dst * dst_scale to previous result 685be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_r = vmlaq_u16(vres_r, vdst_r, vres_a); 686be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_g = vmlaq_u16(vres_g, vdst_g, vres_a); 687be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_b = vmlaq_u16(vres_b, vdst_b, vres_a); 688be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org 689be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org#ifdef S32A_D565_BLEND_EXACT 690be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // It is possible to get exact results with this but it is slow, 691be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // even slower than C code in some cases 692be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_r = SkDiv255Round_neon8(vres_r); 693be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_g = SkDiv255Round_neon8(vres_g); 694be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_b = SkDiv255Round_neon8(vres_b); 695be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org#else 696be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_r = vrshrq_n_u16(vres_r, 8); 697be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_g = vrshrq_n_u16(vres_g, 8); 698be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_b = vrshrq_n_u16(vres_b, 8); 699be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org#endif 700be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // pack result 701be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_b = vsliq_n_u16(vres_b, vres_g, SK_G16_SHIFT); // insert green into blue 702be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vres_b = vsliq_n_u16(vres_b, vres_r, SK_R16_SHIFT); // insert red into green/blue 703be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org 704be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // store 705be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org vst1q_u16(dst, vres_b); 706be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org dst += 8; 707be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org count -= 8; 708be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org } while (count >= 8); 709be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org } 710a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 711be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org // leftovers 712be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org while (count-- > 0) { 713be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org SkPMColor sc = *src++; 714be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org if (sc) { 715be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org uint16_t dc = *dst; 716be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org unsigned dst_scale = 255 - SkMulDiv255Round(SkGetPackedA32(sc), alpha); 7173848427d884b72114854c8eef9662691f23fae7bmtklein unsigned dr = (SkPacked32ToR16(sc) * alpha) + (SkGetPackedR16(dc) * dst_scale); 7183848427d884b72114854c8eef9662691f23fae7bmtklein unsigned dg = (SkPacked32ToG16(sc) * alpha) + (SkGetPackedG16(dc) * dst_scale); 7193848427d884b72114854c8eef9662691f23fae7bmtklein unsigned db = (SkPacked32ToB16(sc) * alpha) + (SkGetPackedB16(dc) * dst_scale); 720be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org *dst = SkPackRGB16(SkDiv255Round(dr), SkDiv255Round(dg), SkDiv255Round(db)); 721be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org } 722be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org dst += 1; 723a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 724a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com} 725a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 726a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com/* dither matrix for Neon, derived from gDitherMatrix_3Bit_16. 727a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com * each dither value is spaced out into byte lanes, and repeated 728a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com * to allow an 8-byte load from offsets 0, 1, 2 or 3 from the 729a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com * start of each row. 730a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com */ 731a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comstatic const uint8_t gDitherMatrix_Neon[48] = { 732a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 0, 4, 1, 5, 0, 4, 1, 5, 0, 4, 1, 5, 733a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 6, 2, 7, 3, 6, 2, 7, 3, 6, 2, 7, 3, 734a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1, 5, 0, 4, 1, 5, 0, 4, 1, 5, 0, 4, 735a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 7, 3, 6, 2, 7, 3, 6, 2, 7, 3, 6, 2, 736fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 737a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com}; 738a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 739a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comvoid S32_D565_Blend_Dither_neon(uint16_t *dst, const SkPMColor *src, 740a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com int count, U8CPU alpha, int x, int y) 741a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com{ 742fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 7434cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org SkASSERT(255 > alpha); 7444cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 7454cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org // rescale alpha to range 1 - 256 746a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com int scale = SkAlpha255To256(alpha); 747fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 7484cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org if (count >= 8) { 7494cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org /* select row and offset for dither array */ 7504cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org const uint8_t *dstart = &gDitherMatrix_Neon[(y&3)*12 + (x&3)]; 751fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 7524cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org uint8x8_t vdither = vld1_u8(dstart); // load dither values 7534cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org uint8x8_t vdither_g = vshr_n_u8(vdither, 1); // calc. green dither values 754fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 7554cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org int16x8_t vscale = vdupq_n_s16(scale); // duplicate scale into neon reg 7564cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org uint16x8_t vmask_b = vdupq_n_u16(0x1F); // set up blue mask 757fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 7584cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org do { 7594cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 760ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit uint8x8x4_t vsrc; 7614cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org uint8x8_t vsrc_r, vsrc_g, vsrc_b; 7624cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org uint8x8_t vsrc565_r, vsrc565_g, vsrc565_b; 7634cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org uint16x8_t vsrc_dit_r, vsrc_dit_g, vsrc_dit_b; 7644cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org uint16x8_t vsrc_res_r, vsrc_res_g, vsrc_res_b; 7654cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org uint16x8_t vdst; 7664cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org uint16x8_t vdst_r, vdst_g, vdst_b; 7674cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org int16x8_t vres_r, vres_g, vres_b; 7684cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org int8x8_t vres8_r, vres8_g, vres8_b; 7694cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 7704cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org // Load source and add dither 771ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#ifdef SK_CPU_ARM64 772ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc = sk_vld4_u8_arm64_3(src); 773ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#else 7744cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org { 7754cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org register uint8x8_t d0 asm("d0"); 7764cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org register uint8x8_t d1 asm("d1"); 7774cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org register uint8x8_t d2 asm("d2"); 7784cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org register uint8x8_t d3 asm("d3"); 7794cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 7804cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org asm ( 781ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit "vld4.8 {d0-d3},[%[src]]! " 7824cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3), [src] "+&r" (src) 7834cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org : 7844cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org ); 785ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[0] = d0; 786ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[1] = d1; 787ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[2] = d2; 7884cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org } 789ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#endif 790ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc_r = vsrc.val[NEON_R]; 791ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc_g = vsrc.val[NEON_G]; 792ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc_b = vsrc.val[NEON_B]; 7934cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 7944cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc565_g = vshr_n_u8(vsrc_g, 6); // calc. green >> 6 7954cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc565_r = vshr_n_u8(vsrc_r, 5); // calc. red >> 5 7964cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc565_b = vshr_n_u8(vsrc_b, 5); // calc. blue >> 5 7974cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 7984cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc_dit_g = vaddl_u8(vsrc_g, vdither_g); // add in dither to green and widen 7994cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc_dit_r = vaddl_u8(vsrc_r, vdither); // add in dither to red and widen 8004cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc_dit_b = vaddl_u8(vsrc_b, vdither); // add in dither to blue and widen 8014cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 8024cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc_dit_r = vsubw_u8(vsrc_dit_r, vsrc565_r); // sub shifted red from result 8034cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc_dit_g = vsubw_u8(vsrc_dit_g, vsrc565_g); // sub shifted green from result 8044cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc_dit_b = vsubw_u8(vsrc_dit_b, vsrc565_b); // sub shifted blue from result 8054cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 8064cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc_res_r = vshrq_n_u16(vsrc_dit_r, 3); 8074cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc_res_g = vshrq_n_u16(vsrc_dit_g, 2); 8084cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vsrc_res_b = vshrq_n_u16(vsrc_dit_b, 3); 8094cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 8104cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org // Load dst and unpack 8114cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vdst = vld1q_u16(dst); 8124cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vdst_g = vshrq_n_u16(vdst, 5); // shift down to get green 8134cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vdst_r = vshrq_n_u16(vshlq_n_u16(vdst, 5), 5+5); // double shift to extract red 8144cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vdst_b = vandq_u16(vdst, vmask_b); // mask to get blue 8154cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 8164cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org // subtract dst from src and widen 8174cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres_r = vsubq_s16(vreinterpretq_s16_u16(vsrc_res_r), vreinterpretq_s16_u16(vdst_r)); 8184cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres_g = vsubq_s16(vreinterpretq_s16_u16(vsrc_res_g), vreinterpretq_s16_u16(vdst_g)); 8194cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres_b = vsubq_s16(vreinterpretq_s16_u16(vsrc_res_b), vreinterpretq_s16_u16(vdst_b)); 8204cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 8214cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org // multiply diffs by scale and shift 8224cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres_r = vmulq_s16(vres_r, vscale); 8234cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres_g = vmulq_s16(vres_g, vscale); 8244cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres_b = vmulq_s16(vres_b, vscale); 8254cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 8264cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres8_r = vshrn_n_s16(vres_r, 8); 8274cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres8_g = vshrn_n_s16(vres_g, 8); 8284cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres8_b = vshrn_n_s16(vres_b, 8); 8294cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 8304cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org // add dst to result 8314cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres_r = vaddw_s8(vreinterpretq_s16_u16(vdst_r), vres8_r); 8324cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres_g = vaddw_s8(vreinterpretq_s16_u16(vdst_g), vres8_g); 8334cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres_b = vaddw_s8(vreinterpretq_s16_u16(vdst_b), vres8_b); 8344cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 8354cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org // put result into 565 format 8364cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres_b = vsliq_n_s16(vres_b, vres_g, 5); // shift up green and insert into blue 8374cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vres_b = vsliq_n_s16(vres_b, vres_r, 6+5); // shift up red and insert into blue 8384cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 8394cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org // Store result 8404cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org vst1q_u16(dst, vreinterpretq_u16_s16(vres_b)); 8414cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 8424cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org // Next iteration 8434cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org dst += 8; 8444cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org count -= 8; 8454cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 8464cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org } while (count >= 8); 8474cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org } 8484cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 8494cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org // Leftovers 8504cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org if (count > 0) { 8514cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org int scale = SkAlpha255To256(alpha); 8524cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org DITHER_565_SCAN(y); 8534cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org do { 8544cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org SkPMColor c = *src++; 8554cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org SkPMColorAssert(c); 8564cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 8574cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org int dither = DITHER_VALUE(x); 8584cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org int sr = SkGetPackedR32(c); 8594cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org int sg = SkGetPackedG32(c); 8604cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org int sb = SkGetPackedB32(c); 8614cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org sr = SkDITHER_R32To565(sr, dither); 8624cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org sg = SkDITHER_G32To565(sg, dither); 8634cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org sb = SkDITHER_B32To565(sb, dither); 8644cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org 8654cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org uint16_t d = *dst; 8664cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org *dst++ = SkPackRGB16(SkAlphaBlend(sr, SkGetPackedR16(d), scale), 8674cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org SkAlphaBlend(sg, SkGetPackedG16(d), scale), 8684cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org SkAlphaBlend(sb, SkGetPackedB16(d), scale)); 8694cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org DITHER_INC_X(x); 8704cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org } while (--count != 0); 871a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 872a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com} 873a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 874a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com/* Neon version of S32_Blend_BlitRow32() 875a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com * portable version is in src/core/SkBlitRow_D32.cpp 876a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com */ 877a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comvoid S32_Blend_BlitRow32_neon(SkPMColor* SK_RESTRICT dst, 878a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com const SkPMColor* SK_RESTRICT src, 879a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com int count, U8CPU alpha) { 880a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkASSERT(alpha <= 255); 881fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 882374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org if (count <= 0) { 883374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org return; 884374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org } 885dfff2737f8ad3e945a4dcbe175380d4b2a91a260commit-bot@chromium.org 886374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org uint16_t src_scale = SkAlpha255To256(alpha); 887374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org uint16_t dst_scale = 256 - src_scale; 888dfff2737f8ad3e945a4dcbe175380d4b2a91a260commit-bot@chromium.org 889374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org while (count >= 2) { 890374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org uint8x8_t vsrc, vdst, vres; 891374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org uint16x8_t vsrc_wide, vdst_wide; 892dfff2737f8ad3e945a4dcbe175380d4b2a91a260commit-bot@chromium.org 893374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org /* These commented prefetches are a big win for count 894374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org * values > 64 on an A9 (Pandaboard) but hurt by 10% for count = 4. 895374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org * They also hurt a little (<5%) on an A15 896374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org */ 897374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org //__builtin_prefetch(src+32); 898374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org //__builtin_prefetch(dst+32); 899dfff2737f8ad3e945a4dcbe175380d4b2a91a260commit-bot@chromium.org 900374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org // Load 901374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vsrc = vreinterpret_u8_u32(vld1_u32(src)); 902374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vdst = vreinterpret_u8_u32(vld1_u32(dst)); 903374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org 904374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org // Process src 905374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vsrc_wide = vmovl_u8(vsrc); 906374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vsrc_wide = vmulq_u16(vsrc_wide, vdupq_n_u16(src_scale)); 907374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org 908374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org // Process dst 909374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vdst_wide = vmull_u8(vdst, vdup_n_u8(dst_scale)); 910374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org 911374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org // Combine 91240254c2c2dc28a34f96294d5a1ad94a99b0be8a6lsalzman vdst_wide += vsrc_wide; 91340254c2c2dc28a34f96294d5a1ad94a99b0be8a6lsalzman vres = vshrn_n_u16(vdst_wide, 8); 914374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org 915374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org // Store 916374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vst1_u32(dst, vreinterpret_u32_u8(vres)); 917374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org 918374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org src += 2; 919374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org dst += 2; 920374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org count -= 2; 921fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com } 922fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 923fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com if (count == 1) { 924374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org uint8x8_t vsrc = vdup_n_u8(0), vdst = vdup_n_u8(0), vres; 925374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org uint16x8_t vsrc_wide, vdst_wide; 926dfff2737f8ad3e945a4dcbe175380d4b2a91a260commit-bot@chromium.org 927374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org // Load 928374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vsrc = vreinterpret_u8_u32(vld1_lane_u32(src, vreinterpret_u32_u8(vsrc), 0)); 929374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vdst = vreinterpret_u8_u32(vld1_lane_u32(dst, vreinterpret_u32_u8(vdst), 0)); 930374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org 931374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org // Process 932374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vsrc_wide = vmovl_u8(vsrc); 933374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vsrc_wide = vmulq_u16(vsrc_wide, vdupq_n_u16(src_scale)); 934374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vdst_wide = vmull_u8(vdst, vdup_n_u8(dst_scale)); 93540254c2c2dc28a34f96294d5a1ad94a99b0be8a6lsalzman vdst_wide += vsrc_wide; 93640254c2c2dc28a34f96294d5a1ad94a99b0be8a6lsalzman vres = vshrn_n_u16(vdst_wide, 8); 937374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org 938374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org // Store 939374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org vst1_lane_u32(dst, vreinterpret_u32_u8(vres), 0); 940a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 941a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com} 942a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 9433a2682a77f996f649de7699c9f7bee046c6d4f17mtklein#ifdef SK_CPU_ARM32 9441fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.orgvoid S32A_Blend_BlitRow32_neon(SkPMColor* SK_RESTRICT dst, 9451fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org const SkPMColor* SK_RESTRICT src, 9461fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org int count, U8CPU alpha) { 9471fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 94840254c2c2dc28a34f96294d5a1ad94a99b0be8a6lsalzman SkASSERT(255 > alpha); 9491fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 9501fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org if (count <= 0) { 9511fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org return; 9521fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org } 9531fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 9541fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org unsigned alpha256 = SkAlpha255To256(alpha); 9551fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 9561fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // First deal with odd counts 9571fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org if (count & 1) { 9581fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org uint8x8_t vsrc = vdup_n_u8(0), vdst = vdup_n_u8(0), vres; 9591fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org uint16x8_t vdst_wide, vsrc_wide; 9601fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org unsigned dst_scale; 9611fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 9621fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // Load 9631fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vsrc = vreinterpret_u8_u32(vld1_lane_u32(src, vreinterpret_u32_u8(vsrc), 0)); 9641fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vdst = vreinterpret_u8_u32(vld1_lane_u32(dst, vreinterpret_u32_u8(vdst), 0)); 9651fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 9661fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // Calc dst_scale 9671fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org dst_scale = vget_lane_u8(vsrc, 3); 96840254c2c2dc28a34f96294d5a1ad94a99b0be8a6lsalzman dst_scale = SkAlphaMulInv256(dst_scale, alpha256); 9691fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 9701fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // Process src 9711fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vsrc_wide = vmovl_u8(vsrc); 9721fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vsrc_wide = vmulq_n_u16(vsrc_wide, alpha256); 9731fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 9741fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // Process dst 9751fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vdst_wide = vmovl_u8(vdst); 9761fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vdst_wide = vmulq_n_u16(vdst_wide, dst_scale); 9771fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 9781fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // Combine 97940254c2c2dc28a34f96294d5a1ad94a99b0be8a6lsalzman vdst_wide += vsrc_wide; 98040254c2c2dc28a34f96294d5a1ad94a99b0be8a6lsalzman vres = vshrn_n_u16(vdst_wide, 8); 9811fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 9821fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vst1_lane_u32(dst, vreinterpret_u32_u8(vres), 0); 9831fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org dst++; 9841fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org src++; 9851fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org count--; 9861fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org } 9871fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 9881fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org if (count) { 9891fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org uint8x8_t alpha_mask; 9901fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org static const uint8_t alpha_mask_setup[] = {3,3,3,3,7,7,7,7}; 9911fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org alpha_mask = vld1_u8(alpha_mask_setup); 9921fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 9931fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org do { 9941fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 9951fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org uint8x8_t vsrc, vdst, vres, vsrc_alphas; 9961fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org uint16x8_t vdst_wide, vsrc_wide, vsrc_scale, vdst_scale; 9971fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 9981fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org __builtin_prefetch(src+32); 9991fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org __builtin_prefetch(dst+32); 10001fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 10011fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // Load 10021fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vsrc = vreinterpret_u8_u32(vld1_u32(src)); 10031fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vdst = vreinterpret_u8_u32(vld1_u32(dst)); 10041fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 10051fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // Prepare src_scale 10061fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vsrc_scale = vdupq_n_u16(alpha256); 10071fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 10081fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // Calc dst_scale 10091fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vsrc_alphas = vtbl1_u8(vsrc, alpha_mask); 10101fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vdst_scale = vmovl_u8(vsrc_alphas); 101140254c2c2dc28a34f96294d5a1ad94a99b0be8a6lsalzman // Calculate SkAlphaMulInv256(vdst_scale, vsrc_scale). 101240254c2c2dc28a34f96294d5a1ad94a99b0be8a6lsalzman // A 16-bit lane would overflow if we used 0xFFFF here, 101340254c2c2dc28a34f96294d5a1ad94a99b0be8a6lsalzman // so use an approximation with 0xFF00 that is off by 1, 101440254c2c2dc28a34f96294d5a1ad94a99b0be8a6lsalzman // and add back 1 after to get the correct value. 101540254c2c2dc28a34f96294d5a1ad94a99b0be8a6lsalzman // This is valid if alpha256 <= 255. 101640254c2c2dc28a34f96294d5a1ad94a99b0be8a6lsalzman vdst_scale = vmlsq_u16(vdupq_n_u16(0xFF00), vdst_scale, vsrc_scale); 101740254c2c2dc28a34f96294d5a1ad94a99b0be8a6lsalzman vdst_scale = vsraq_n_u16(vdst_scale, vdst_scale, 8); 101840254c2c2dc28a34f96294d5a1ad94a99b0be8a6lsalzman vdst_scale = vsraq_n_u16(vdupq_n_u16(1), vdst_scale, 8); 10191fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 10201fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // Process src 10211fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vsrc_wide = vmovl_u8(vsrc); 10221fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vsrc_wide *= vsrc_scale; 10231fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 10241fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // Process dst 10251fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vdst_wide = vmovl_u8(vdst); 10261fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vdst_wide *= vdst_scale; 10271fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 10281fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org // Combine 102940254c2c2dc28a34f96294d5a1ad94a99b0be8a6lsalzman vdst_wide += vsrc_wide; 103040254c2c2dc28a34f96294d5a1ad94a99b0be8a6lsalzman vres = vshrn_n_u16(vdst_wide, 8); 10311fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 10321fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org vst1_u32(dst, vreinterpret_u32_u8(vres)); 10331fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 10341fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org src += 2; 10351fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org dst += 2; 10361fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org count -= 2; 10371fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org } while(count); 10381fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org } 10391fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org} 10401fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org 1041a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com/////////////////////////////////////////////////////////////////////////////// 1042a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1043ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#endif // #ifdef SK_CPU_ARM32 1044a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1045a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comvoid S32A_D565_Opaque_Dither_neon (uint16_t * SK_RESTRICT dst, 1046a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com const SkPMColor* SK_RESTRICT src, 1047a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com int count, U8CPU alpha, int x, int y) { 1048a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkASSERT(255 == alpha); 1049a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1050fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#define UNROLL 8 1051a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1052a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com if (count >= UNROLL) { 1053fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1054fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org uint8x8_t dbase; 1055fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com const uint8_t *dstart = &gDitherMatrix_Neon[(y&3)*12 + (x&3)]; 1056fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dbase = vld1_u8(dstart); 1057a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1058a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com do { 1059ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit uint8x8x4_t vsrc; 1060fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint8x8_t sr, sg, sb, sa, d; 1061fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint16x8_t dst8, scale8, alpha8; 1062fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint16x8_t dst_r, dst_g, dst_b; 1063fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1064ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#ifdef SK_CPU_ARM64 1065ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc = sk_vld4_u8_arm64_4(src); 1066ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#else 1067fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com { 1068fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com register uint8x8_t d0 asm("d0"); 1069fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com register uint8x8_t d1 asm("d1"); 1070fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com register uint8x8_t d2 asm("d2"); 1071fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com register uint8x8_t d3 asm("d3"); 1072a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1073ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit asm ("vld4.8 {d0-d3},[%[src]]! " 1074fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3), [src] "+r" (src) 1075fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org : 1076fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org ); 1077ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[0] = d0; 1078ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[1] = d1; 1079ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[2] = d2; 1080ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[3] = d3; 1081fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com } 1082ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#endif 1083ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit sa = vsrc.val[NEON_A]; 1084ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit sr = vsrc.val[NEON_R]; 1085ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit sg = vsrc.val[NEON_G]; 1086ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit sb = vsrc.val[NEON_B]; 1087a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1088fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org /* calculate 'd', which will be 0..7 1089fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org * dbase[] is 0..7; alpha is 0..256; 16 bits suffice 1090fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org */ 1091fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org alpha8 = vmovl_u8(dbase); 1092fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org alpha8 = vmlal_u8(alpha8, sa, dbase); 1093fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org d = vshrn_n_u16(alpha8, 8); // narrowing too 1094fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1095fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org // sr = sr - (sr>>5) + d 1096fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* watching for 8-bit overflow. d is 0..7; risky range of 1097fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com * sr is >248; and then (sr>>5) is 7 so it offsets 'd'; 1098fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org * safe as long as we do ((sr-sr>>5) + d) 1099fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org */ 1100fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sr = vsub_u8(sr, vshr_n_u8(sr, 5)); 1101fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sr = vadd_u8(sr, d); 1102fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1103fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org // sb = sb - (sb>>5) + d 1104fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sb = vsub_u8(sb, vshr_n_u8(sb, 5)); 1105fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sb = vadd_u8(sb, d); 1106fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1107fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org // sg = sg - (sg>>6) + d>>1; similar logic for overflows 1108fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sg = vsub_u8(sg, vshr_n_u8(sg, 6)); 1109fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sg = vadd_u8(sg, vshr_n_u8(d,1)); 1110fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1111fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org // need to pick up 8 dst's -- at 16 bits each, 128 bits 1112fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst8 = vld1q_u16(dst); 1113fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org dst_b = vandq_u16(dst8, vdupq_n_u16(SK_B16_MASK)); 1114fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org dst_g = vshrq_n_u16(vshlq_n_u16(dst8, SK_R16_BITS), SK_R16_BITS + SK_B16_BITS); 1115fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org dst_r = vshrq_n_u16(dst8, SK_R16_SHIFT); // clearing hi bits 1116fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org 1117fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org // blend 1118fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com scale8 = vsubw_u8(vdupq_n_u16(256), sa); 1119a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1120fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org // combine the addq and mul, save 3 insns 1121fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com scale8 = vshrq_n_u16(scale8, 3); 1122fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst_b = vmlaq_u16(vshll_n_u8(sb,2), dst_b, scale8); 1123fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst_g = vmlaq_u16(vshll_n_u8(sg,3), dst_g, scale8); 1124fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst_r = vmlaq_u16(vshll_n_u8(sr,2), dst_r, scale8); 1125a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1126fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org // repack to store 1127fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org dst8 = vshrq_n_u16(dst_b, 5); 1128fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst8 = vsliq_n_u16(dst8, vshrq_n_u16(dst_g, 5), 5); 1129fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst8 = vsliq_n_u16(dst8, vshrq_n_u16(dst_r,5), 11); 1130fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1131fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com vst1q_u16(dst, dst8); 1132fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1133fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org dst += UNROLL; 1134fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com count -= UNROLL; 1135fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org // skip x += UNROLL, since it's unchanged mod-4 1136a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } while (count >= UNROLL); 1137a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 1138fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#undef UNROLL 1139a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1140fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org // residuals 1141a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com if (count > 0) { 1142a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com DITHER_565_SCAN(y); 1143a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com do { 1144a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkPMColor c = *src++; 1145a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkPMColorAssert(c); 1146a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com if (c) { 1147a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com unsigned a = SkGetPackedA32(c); 1148fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1149a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // dither and alpha are just temporary variables to work-around 1150a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // an ICE in debug. 1151a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com unsigned dither = DITHER_VALUE(x); 1152a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com unsigned alpha = SkAlpha255To256(a); 1153a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com int d = SkAlphaMul(dither, alpha); 1154fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1155a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com unsigned sr = SkGetPackedR32(c); 1156a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com unsigned sg = SkGetPackedG32(c); 1157a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com unsigned sb = SkGetPackedB32(c); 1158a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com sr = SkDITHER_R32_FOR_565(sr, d); 1159a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com sg = SkDITHER_G32_FOR_565(sg, d); 1160a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com sb = SkDITHER_B32_FOR_565(sb, d); 1161fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1162a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com uint32_t src_expanded = (sg << 24) | (sr << 13) | (sb << 2); 1163a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com uint32_t dst_expanded = SkExpand_rgb_16(*dst); 1164a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3); 1165a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // now src and dst expanded are in g:11 r:10 x:1 b:10 1166a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5); 1167a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 1168a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com dst += 1; 1169a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com DITHER_INC_X(x); 1170a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } while (--count != 0); 1171a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 1172a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com} 1173a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1174a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com/////////////////////////////////////////////////////////////////////////////// 1175a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1176a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comvoid S32_D565_Opaque_Dither_neon(uint16_t* SK_RESTRICT dst, 1177a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com const SkPMColor* SK_RESTRICT src, 1178a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com int count, U8CPU alpha, int x, int y) { 1179a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkASSERT(255 == alpha); 1180a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1181fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#define UNROLL 8 1182a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com if (count >= UNROLL) { 1183fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint8x8_t d; 1184fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com const uint8_t *dstart = &gDitherMatrix_Neon[(y&3)*12 + (x&3)]; 1185fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com d = vld1_u8(dstart); 1186fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1187fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com while (count >= UNROLL) { 1188efbe8e9bedda21a3e061ebf3d96431a0f250a654djsollen@google.com uint8x8_t sr, sg, sb; 1189efbe8e9bedda21a3e061ebf3d96431a0f250a654djsollen@google.com uint16x8_t dr, dg, db; 1190fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com uint16x8_t dst8; 1191ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit uint8x8x4_t vsrc; 1192fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1193ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#ifdef SK_CPU_ARM64 1194ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc = sk_vld4_u8_arm64_3(src); 1195ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#else 1196fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com { 1197fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com register uint8x8_t d0 asm("d0"); 1198fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com register uint8x8_t d1 asm("d1"); 1199fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com register uint8x8_t d2 asm("d2"); 1200fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com register uint8x8_t d3 asm("d3"); 1201fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1202688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org asm ( 1203ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit "vld4.8 {d0-d3},[%[src]]! " 1204688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3), [src] "+&r" (src) 1205688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org : 1206688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org ); 1207ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[0] = d0; 1208ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[1] = d1; 1209ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit vsrc.val[2] = d2; 1210fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com } 1211ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#endif 1212ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit sr = vsrc.val[NEON_R]; 1213ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit sg = vsrc.val[NEON_G]; 1214ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit sb = vsrc.val[NEON_B]; 1215ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit 1216fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com /* XXX: if we want to prefetch, hide it in the above asm() 1217fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com * using the gcc __builtin_prefetch(), the prefetch will 1218fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com * fall to the bottom of the loop -- it won't stick up 1219fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com * at the top of the loop, just after the vld4. 1220fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com */ 1221fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1222688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org // sr = sr - (sr>>5) + d 1223fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sr = vsub_u8(sr, vshr_n_u8(sr, 5)); 1224fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dr = vaddl_u8(sr, d); 1225fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1226688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org // sb = sb - (sb>>5) + d 1227fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sb = vsub_u8(sb, vshr_n_u8(sb, 5)); 1228fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com db = vaddl_u8(sb, d); 1229fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1230688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org // sg = sg - (sg>>6) + d>>1; similar logic for overflows 1231fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com sg = vsub_u8(sg, vshr_n_u8(sg, 6)); 1232688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org dg = vaddl_u8(sg, vshr_n_u8(d, 1)); 1233fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1234688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org // pack high bits of each into 565 format (rgb, b is lsb) 1235fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst8 = vshrq_n_u16(db, 3); 1236fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst8 = vsliq_n_u16(dst8, vshrq_n_u16(dg, 2), 5); 1237688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org dst8 = vsliq_n_u16(dst8, vshrq_n_u16(dr, 3), 11); 1238fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1239688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org // store it 1240fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com vst1q_u16(dst, dst8); 1241fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com 1242fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com dst += UNROLL; 1243688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org // we don't need to increment src as the asm above has already done it 1244fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com count -= UNROLL; 1245688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org x += UNROLL; // probably superfluous 1246fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com } 1247a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 1248fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#undef UNROLL 1249a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1250688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org // residuals 1251a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com if (count > 0) { 1252a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com DITHER_565_SCAN(y); 1253a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com do { 1254a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkPMColor c = *src++; 1255a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkPMColorAssert(c); 1256a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com SkASSERT(SkGetPackedA32(c) == 255); 1257a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1258a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com unsigned dither = DITHER_VALUE(x); 1259a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com *dst++ = SkDitherRGB32To565(c, dither); 1260a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com DITHER_INC_X(x); 1261a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } while (--count != 0); 1262a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com } 1263a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com} 1264a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1265a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com/////////////////////////////////////////////////////////////////////////////// 1266a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1267a7f11918d92621507f35b228a290f05dcaf0f4b6reedconst SkBlitRow::Proc16 sk_blitrow_platform_565_procs_arm_neon[] = { 1268a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // no dither 12690060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org S32_D565_Opaque_neon, 127095c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org S32_D565_Blend_neon, 1271a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com S32A_D565_Opaque_neon, 1272fa115bd4543631244f3b9accb3541b28f4222a96mtklein#if 0 1273a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com S32A_D565_Blend_neon, 1274fa115bd4543631244f3b9accb3541b28f4222a96mtklein#else 127596fcdcc219d2a0d3579719b84b28bede76efba64halcanary nullptr, // https://code.google.com/p/skia/issues/detail?id=2797 12765b2c2c6fd09752641b14766678d62fe50b4e3ef3reed#endif 1277a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1278a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com // dither 1279a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com S32_D565_Opaque_Dither_neon, 1280a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com S32_D565_Blend_Dither_neon, 1281a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com S32A_D565_Opaque_Dither_neon, 128296fcdcc219d2a0d3579719b84b28bede76efba64halcanary nullptr, // S32A_D565_Blend_Dither 1283a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com}; 1284a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com 1285402448d6818cab9d7b7633a0c18fcf574c915357mleeconst SkBlitRow::ColorProc16 sk_blitrow_platform_565_colorprocs_arm_neon[] = { 1286402448d6818cab9d7b7633a0c18fcf574c915357mlee Color32A_D565_neon, // Color32_D565, 1287402448d6818cab9d7b7633a0c18fcf574c915357mlee Color32A_D565_neon, // Color32A_D565, 1288402448d6818cab9d7b7633a0c18fcf574c915357mlee Color32A_D565_neon, // Color32_D565_Dither, 1289402448d6818cab9d7b7633a0c18fcf574c915357mlee Color32A_D565_neon, // Color32A_D565_Dither 1290402448d6818cab9d7b7633a0c18fcf574c915357mlee}; 1291402448d6818cab9d7b7633a0c18fcf574c915357mlee 1292a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comconst SkBlitRow::Proc32 sk_blitrow_platform_32_procs_arm_neon[] = { 129396fcdcc219d2a0d3579719b84b28bede76efba64halcanary nullptr, // S32_Opaque, 1294fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com S32_Blend_BlitRow32_neon, // S32_Blend, 1295b4a7dc99b1a01cdd5c0cd5913b630436ca696210mtklein nullptr, // Ported to SkOpts 12963a2682a77f996f649de7699c9f7bee046c6d4f17mtklein#ifdef SK_CPU_ARM32 12971fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org S32A_Blend_BlitRow32_neon // S32A_Blend 1298866b95d65dfc01af372bbed206ec067e04c1f533kevin.petit#else 129996fcdcc219d2a0d3579719b84b28bede76efba64halcanary nullptr 1300866b95d65dfc01af372bbed206ec067e04c1f533kevin.petit#endif 1301a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com}; 1302