1a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com/*
2a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com * Copyright 2012 The Android Open Source Project
3a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com *
4a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com * Use of this source code is governed by a BSD-style license that can be
5a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com * found in the LICENSE file.
6a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com */
7a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
8a111e492b84c312a6bd5d5d9ef100dca48f4941ddjsollen@google.com#include "SkBlitRow_opts_arm_neon.h"
9a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
10a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#include "SkBlitMask.h"
11a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#include "SkBlitRow.h"
12a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#include "SkColorPriv.h"
13a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#include "SkDither.h"
14a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#include "SkMathPriv.h"
15a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#include "SkUtils.h"
16a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
170060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org#include "SkColor_opts_neon.h"
18a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#include <arm_neon.h>
19a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
20ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#ifdef SK_CPU_ARM64
21ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petitstatic inline uint8x8x4_t sk_vld4_u8_arm64_3(const SkPMColor* SK_RESTRICT & src) {
22ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit    uint8x8x4_t vsrc;
23ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit    uint8x8_t vsrc_0, vsrc_1, vsrc_2;
24ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit
25ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit    asm (
26ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        "ld4    {v0.8b - v3.8b}, [%[src]], #32 \t\n"
27ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        "mov    %[vsrc0].8b, v0.8b             \t\n"
28ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        "mov    %[vsrc1].8b, v1.8b             \t\n"
29ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        "mov    %[vsrc2].8b, v2.8b             \t\n"
30ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        : [vsrc0] "=w" (vsrc_0), [vsrc1] "=w" (vsrc_1),
31ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit          [vsrc2] "=w" (vsrc_2), [src] "+&r" (src)
32ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        : : "v0", "v1", "v2", "v3"
33ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit    );
34ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit
35ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit    vsrc.val[0] = vsrc_0;
36ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit    vsrc.val[1] = vsrc_1;
37ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit    vsrc.val[2] = vsrc_2;
38ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit
39ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit    return vsrc;
40ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit}
41ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit
42ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petitstatic inline uint8x8x4_t sk_vld4_u8_arm64_4(const SkPMColor* SK_RESTRICT & src) {
43ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit    uint8x8x4_t vsrc;
44ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit    uint8x8_t vsrc_0, vsrc_1, vsrc_2, vsrc_3;
45ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit
46ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit    asm (
47ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        "ld4    {v0.8b - v3.8b}, [%[src]], #32 \t\n"
48ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        "mov    %[vsrc0].8b, v0.8b             \t\n"
49ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        "mov    %[vsrc1].8b, v1.8b             \t\n"
50ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        "mov    %[vsrc2].8b, v2.8b             \t\n"
51ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        "mov    %[vsrc3].8b, v3.8b             \t\n"
52ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        : [vsrc0] "=w" (vsrc_0), [vsrc1] "=w" (vsrc_1),
53ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit          [vsrc2] "=w" (vsrc_2), [vsrc3] "=w" (vsrc_3),
54ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit          [src] "+&r" (src)
55ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        : : "v0", "v1", "v2", "v3"
56ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit    );
57ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit
58ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit    vsrc.val[0] = vsrc_0;
59ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit    vsrc.val[1] = vsrc_1;
60ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit    vsrc.val[2] = vsrc_2;
61ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit    vsrc.val[3] = vsrc_3;
62ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit
63ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit    return vsrc;
64ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit}
65ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#endif
66ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit
670060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.orgvoid S32_D565_Opaque_neon(uint16_t* SK_RESTRICT dst,
680060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org                           const SkPMColor* SK_RESTRICT src, int count,
690060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org                           U8CPU alpha, int /*x*/, int /*y*/) {
700060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org    SkASSERT(255 == alpha);
710060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org
720060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org    while (count >= 8) {
730060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org        uint8x8x4_t vsrc;
740060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org        uint16x8_t vdst;
750060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org
760060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org        // Load
77ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#ifdef SK_CPU_ARM64
78ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        vsrc = sk_vld4_u8_arm64_3(src);
79ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#else
800060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org        vsrc = vld4_u8((uint8_t*)src);
81ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        src += 8;
82ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#endif
830060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org
840060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org        // Convert src to 565
85bc25dfc798fff225ce65355ecda19d2b85bd0e74commit-bot@chromium.org        vdst = SkPixel32ToPixel16_neon8(vsrc);
860060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org
870060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org        // Store
880060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org        vst1q_u16(dst, vdst);
890060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org
900060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org        // Prepare next iteration
910060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org        dst += 8;
920060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org        count -= 8;
930060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org    };
940060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org
950060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org    // Leftovers
960060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org    while (count > 0) {
970060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org        SkPMColor c = *src++;
980060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org        SkPMColorAssert(c);
990060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org        *dst = SkPixel32ToPixel16_ToU16(c);
1000060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org        dst++;
1010060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org        count--;
1020060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org    };
1030060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org}
1040060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org
10595c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.orgvoid S32_D565_Blend_neon(uint16_t* SK_RESTRICT dst,
10695c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org                          const SkPMColor* SK_RESTRICT src, int count,
10795c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org                          U8CPU alpha, int /*x*/, int /*y*/) {
10895c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org    SkASSERT(255 > alpha);
10995c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org
11095c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org    uint16x8_t vmask_blue, vscale;
11195c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org
11295c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org    // prepare constants
11395c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org    vscale = vdupq_n_u16(SkAlpha255To256(alpha));
11495c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org    vmask_blue = vmovq_n_u16(0x1F);
11595c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org
11695c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org    while (count >= 8) {
117ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        uint8x8x4_t vsrc;
11895c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org        uint16x8_t vdst, vdst_r, vdst_g, vdst_b;
11995c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org        uint16x8_t vres_r, vres_g, vres_b;
12095c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org
12195c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org        // Load src
122ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#ifdef SK_CPU_ARM64
123ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        vsrc = sk_vld4_u8_arm64_3(src);
124ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#else
12595c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org        {
12695c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org        register uint8x8_t d0 asm("d0");
12795c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org        register uint8x8_t d1 asm("d1");
12895c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org        register uint8x8_t d2 asm("d2");
12995c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org        register uint8x8_t d3 asm("d3");
13095c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org
13195c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org        asm (
13295c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org            "vld4.8    {d0-d3},[%[src]]!"
13395c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org            : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3), [src] "+&r" (src)
13495c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org            :
13595c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org        );
136ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        vsrc.val[0] = d0;
137ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        vsrc.val[1] = d1;
138ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        vsrc.val[2] = d2;
13995c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org        }
140ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#endif
14195c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org
14295c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org        // Load and unpack dst
14395c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org        vdst = vld1q_u16(dst);
14495c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org        vdst_g = vshlq_n_u16(vdst, 5);        // shift green to top of lanes
14595c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org        vdst_b = vandq_u16(vdst, vmask_blue); // extract blue
14695c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org        vdst_r = vshrq_n_u16(vdst, 6+5);      // extract red
14795c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org        vdst_g = vshrq_n_u16(vdst_g, 5+5);    // extract green
14895c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org
149ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        // Shift src to 565 range
150ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        vsrc.val[NEON_R] = vshr_n_u8(vsrc.val[NEON_R], 3);
151ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        vsrc.val[NEON_G] = vshr_n_u8(vsrc.val[NEON_G], 2);
152ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        vsrc.val[NEON_B] = vshr_n_u8(vsrc.val[NEON_B], 3);
15395c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org
15495c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org        // Scale src - dst
155ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        vres_r = vmovl_u8(vsrc.val[NEON_R]) - vdst_r;
156ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        vres_g = vmovl_u8(vsrc.val[NEON_G]) - vdst_g;
157ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        vres_b = vmovl_u8(vsrc.val[NEON_B]) - vdst_b;
15895c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org
15995c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org        vres_r = vshrq_n_u16(vres_r * vscale, 8);
16095c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org        vres_g = vshrq_n_u16(vres_g * vscale, 8);
16195c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org        vres_b = vshrq_n_u16(vres_b * vscale, 8);
16295c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org
16395c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org        vres_r += vdst_r;
16495c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org        vres_g += vdst_g;
16595c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org        vres_b += vdst_b;
16695c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org
16795c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org        // Combine
16895c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org        vres_b = vsliq_n_u16(vres_b, vres_g, 5);    // insert green into blue
16995c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org        vres_b = vsliq_n_u16(vres_b, vres_r, 6+5);  // insert red into green/blue
17095c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org
17195c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org        // Store
17295c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org        vst1q_u16(dst, vres_b);
17395c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org        dst += 8;
17495c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org        count -= 8;
17595c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org    }
17695c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org    if (count > 0) {
17795c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org        int scale = SkAlpha255To256(alpha);
17895c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org        do {
17995c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org            SkPMColor c = *src++;
18095c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org            SkPMColorAssert(c);
18195c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org            uint16_t d = *dst;
18295c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org            *dst++ = SkPackRGB16(
18395c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org                    SkAlphaBlend(SkPacked32ToR16(c), SkGetPackedR16(d), scale),
18495c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org                    SkAlphaBlend(SkPacked32ToG16(c), SkGetPackedG16(d), scale),
18595c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org                    SkAlphaBlend(SkPacked32ToB16(c), SkGetPackedB16(d), scale));
18695c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org        } while (--count != 0);
18795c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org    }
18895c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org}
18995c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org
190ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#ifdef SK_CPU_ARM32
191a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comvoid S32A_D565_Opaque_neon(uint16_t* SK_RESTRICT dst,
192a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                           const SkPMColor* SK_RESTRICT src, int count,
193a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                           U8CPU alpha, int /*x*/, int /*y*/) {
194a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com    SkASSERT(255 == alpha);
195a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
196a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com    if (count >= 8) {
197efbe8e9bedda21a3e061ebf3d96431a0f250a654djsollen@google.com        uint16_t* SK_RESTRICT keep_dst = 0;
198fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
199a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com        asm volatile (
200a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "ands       ip, %[count], #7            \n\t"
201a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vmov.u8    d31, #1<<7                  \n\t"
202a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vld1.16    {q12}, [%[dst]]             \n\t"
203a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vld4.8     {d0-d3}, [%[src]]           \n\t"
204a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      // Thumb does not support the standard ARM conditional
205a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      // instructions but instead requires the 'it' instruction
206a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      // to signal conditional execution
207a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "it eq                                  \n\t"
208a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "moveq      ip, #8                      \n\t"
209a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "mov        %[keep_dst], %[dst]         \n\t"
210fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
211a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "add        %[src], %[src], ip, LSL#2   \n\t"
212a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "add        %[dst], %[dst], ip, LSL#1   \n\t"
213a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "subs       %[count], %[count], ip      \n\t"
214a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "b          9f                          \n\t"
215a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      // LOOP
216a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "2:                                         \n\t"
217fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
218a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vld1.16    {q12}, [%[dst]]!            \n\t"
219a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vld4.8     {d0-d3}, [%[src]]!          \n\t"
220a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vst1.16    {q10}, [%[keep_dst]]        \n\t"
221a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "sub        %[keep_dst], %[dst], #8*2   \n\t"
222a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "subs       %[count], %[count], #8      \n\t"
223a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "9:                                         \n\t"
224a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "pld        [%[dst],#32]                \n\t"
225a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      // expand 0565 q12 to 8888 {d4-d7}
226a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vmovn.u16  d4, q12                     \n\t"
227a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vshr.u16   q11, q12, #5                \n\t"
228a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vshr.u16   q10, q12, #6+5              \n\t"
229a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vmovn.u16  d5, q11                     \n\t"
230a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vmovn.u16  d6, q10                     \n\t"
231a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vshl.u8    d4, d4, #3                  \n\t"
232a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vshl.u8    d5, d5, #2                  \n\t"
233a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vshl.u8    d6, d6, #3                  \n\t"
234fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
235a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vmovl.u8   q14, d31                    \n\t"
236a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vmovl.u8   q13, d31                    \n\t"
237a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vmovl.u8   q12, d31                    \n\t"
238fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
239a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      // duplicate in 4/2/1 & 8pix vsns
240a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vmvn.8     d30, d3                     \n\t"
241a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vmlal.u8   q14, d30, d6                \n\t"
242a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vmlal.u8   q13, d30, d5                \n\t"
243a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vmlal.u8   q12, d30, d4                \n\t"
244a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vshr.u16   q8, q14, #5                 \n\t"
245a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vshr.u16   q9, q13, #6                 \n\t"
246a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vaddhn.u16 d6, q14, q8                 \n\t"
247a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vshr.u16   q8, q12, #5                 \n\t"
248a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vaddhn.u16 d5, q13, q9                 \n\t"
249a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vqadd.u8   d6, d6, d0                  \n\t"  // moved up
250a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vaddhn.u16 d4, q12, q8                 \n\t"
251a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      // intentionally don't calculate alpha
252a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      // result in d4-d6
253fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
254a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vqadd.u8   d5, d5, d1                  \n\t"
255a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vqadd.u8   d4, d4, d2                  \n\t"
256fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
257a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      // pack 8888 {d4-d6} to 0565 q10
258a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vshll.u8   q10, d6, #8                 \n\t"
259a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vshll.u8   q3, d5, #8                  \n\t"
260a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vshll.u8   q2, d4, #8                  \n\t"
261a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vsri.u16   q10, q3, #5                 \n\t"
262a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vsri.u16   q10, q2, #11                \n\t"
263fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
264a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "bne        2b                          \n\t"
265fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
266a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "1:                                         \n\t"
267a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vst1.16      {q10}, [%[keep_dst]]      \n\t"
268a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      : [count] "+r" (count)
269fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com                      : [dst] "r" (dst), [keep_dst] "r" (keep_dst), [src] "r" (src)
270a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      : "ip", "cc", "memory", "d0","d1","d2","d3","d4","d5","d6","d7",
271a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "d16","d17","d18","d19","d20","d21","d22","d23","d24","d25","d26","d27","d28","d29",
272a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "d30","d31"
273a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      );
274a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com    }
275fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    else
276a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com    {   // handle count < 8
277efbe8e9bedda21a3e061ebf3d96431a0f250a654djsollen@google.com        uint16_t* SK_RESTRICT keep_dst = 0;
278fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
279a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com        asm volatile (
280a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vmov.u8    d31, #1<<7                  \n\t"
281a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "mov        %[keep_dst], %[dst]         \n\t"
282fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
283a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "tst        %[count], #4                \n\t"
284a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "beq        14f                         \n\t"
285a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vld1.16    {d25}, [%[dst]]!            \n\t"
286a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vld1.32    {q1}, [%[src]]!             \n\t"
287fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
288a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "14:                                        \n\t"
289a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "tst        %[count], #2                \n\t"
290a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "beq        12f                         \n\t"
291a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vld1.32    {d24[1]}, [%[dst]]!         \n\t"
292a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vld1.32    {d1}, [%[src]]!             \n\t"
293fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
294a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "12:                                        \n\t"
295a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "tst        %[count], #1                \n\t"
296a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "beq        11f                         \n\t"
297a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vld1.16    {d24[1]}, [%[dst]]!         \n\t"
298a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vld1.32    {d0[1]}, [%[src]]!          \n\t"
299fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
300a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "11:                                        \n\t"
301a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      // unzips achieve the same as a vld4 operation
302a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vuzpq.u16  q0, q1                      \n\t"
303a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vuzp.u8    d0, d1                      \n\t"
304a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vuzp.u8    d2, d3                      \n\t"
305a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      // expand 0565 q12 to 8888 {d4-d7}
306a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vmovn.u16  d4, q12                     \n\t"
307a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vshr.u16   q11, q12, #5                \n\t"
308a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vshr.u16   q10, q12, #6+5              \n\t"
309a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vmovn.u16  d5, q11                     \n\t"
310a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vmovn.u16  d6, q10                     \n\t"
311a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vshl.u8    d4, d4, #3                  \n\t"
312a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vshl.u8    d5, d5, #2                  \n\t"
313a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vshl.u8    d6, d6, #3                  \n\t"
314fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
315a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vmovl.u8   q14, d31                    \n\t"
316a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vmovl.u8   q13, d31                    \n\t"
317a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vmovl.u8   q12, d31                    \n\t"
318fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
319a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      // duplicate in 4/2/1 & 8pix vsns
320a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vmvn.8     d30, d3                     \n\t"
321a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vmlal.u8   q14, d30, d6                \n\t"
322a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vmlal.u8   q13, d30, d5                \n\t"
323a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vmlal.u8   q12, d30, d4                \n\t"
324a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vshr.u16   q8, q14, #5                 \n\t"
325a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vshr.u16   q9, q13, #6                 \n\t"
326a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vaddhn.u16 d6, q14, q8                 \n\t"
327a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vshr.u16   q8, q12, #5                 \n\t"
328a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vaddhn.u16 d5, q13, q9                 \n\t"
329a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vqadd.u8   d6, d6, d0                  \n\t"  // moved up
330a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vaddhn.u16 d4, q12, q8                 \n\t"
331a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      // intentionally don't calculate alpha
332a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      // result in d4-d6
333fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
334a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vqadd.u8   d5, d5, d1                  \n\t"
335a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vqadd.u8   d4, d4, d2                  \n\t"
336fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
337a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      // pack 8888 {d4-d6} to 0565 q10
338a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vshll.u8   q10, d6, #8                 \n\t"
339a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vshll.u8   q3, d5, #8                  \n\t"
340a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vshll.u8   q2, d4, #8                  \n\t"
341a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vsri.u16   q10, q3, #5                 \n\t"
342a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vsri.u16   q10, q2, #11                \n\t"
343fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
344a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      // store
345a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "tst        %[count], #4                \n\t"
346a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "beq        24f                         \n\t"
347a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vst1.16    {d21}, [%[keep_dst]]!       \n\t"
348fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
349a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "24:                                        \n\t"
350a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "tst        %[count], #2                \n\t"
351a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "beq        22f                         \n\t"
352a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vst1.32    {d20[1]}, [%[keep_dst]]!    \n\t"
353fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
354a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "22:                                        \n\t"
355a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "tst        %[count], #1                \n\t"
356a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "beq        21f                         \n\t"
357a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "vst1.16    {d20[1]}, [%[keep_dst]]!    \n\t"
358fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
359a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "21:                                        \n\t"
360a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      : [count] "+r" (count)
361a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      : [dst] "r" (dst), [keep_dst] "r" (keep_dst), [src] "r" (src)
362a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      : "ip", "cc", "memory", "d0","d1","d2","d3","d4","d5","d6","d7",
363a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "d16","d17","d18","d19","d20","d21","d22","d23","d24","d25","d26","d27","d28","d29",
364a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      "d30","d31"
365a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      );
366a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com    }
367a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com}
368ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#endif
369a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
370be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.orgstatic inline uint16x8_t SkDiv255Round_neon8(uint16x8_t prod) {
371be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org    prod += vdupq_n_u16(128);
372be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org    prod += vshrq_n_u16(prod, 8);
373be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org    return vshrq_n_u16(prod, 8);
374be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org}
375be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org
376a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comvoid S32A_D565_Blend_neon(uint16_t* SK_RESTRICT dst,
377a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                          const SkPMColor* SK_RESTRICT src, int count,
378a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                          U8CPU alpha, int /*x*/, int /*y*/) {
379be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org   SkASSERT(255 > alpha);
380a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
381be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org    /* This code implements a Neon version of S32A_D565_Blend. The results have
382be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org     * a few mismatches compared to the original code. These mismatches never
383be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org     * exceed 1.
384a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com     */
385fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
386be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org    if (count >= 8) {
387be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org        uint16x8_t valpha_max, vmask_blue;
388be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org        uint8x8_t valpha;
389be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org
390be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org        // prepare constants
391be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org        valpha_max = vmovq_n_u16(255);
392be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org        valpha = vdup_n_u8(alpha);
393be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org        vmask_blue = vmovq_n_u16(SK_B16_MASK);
394be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org
395be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org        do {
396be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            uint16x8_t vdst, vdst_r, vdst_g, vdst_b;
397be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            uint16x8_t vres_a, vres_r, vres_g, vres_b;
398be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            uint8x8x4_t vsrc;
399be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org
400be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            // load pixels
401be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            vdst = vld1q_u16(dst);
402ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#ifdef SK_CPU_ARM64
403ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit            vsrc = sk_vld4_u8_arm64_4(src);
404ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#else
405be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org#if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6))
406be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            asm (
407be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org                "vld4.u8 %h[vsrc], [%[src]]!"
408be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org                : [vsrc] "=w" (vsrc), [src] "+&r" (src)
409be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org                : :
410be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            );
411a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#else
412be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            register uint8x8_t d0 asm("d0");
413be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            register uint8x8_t d1 asm("d1");
414be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            register uint8x8_t d2 asm("d2");
415be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            register uint8x8_t d3 asm("d3");
416be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org
417be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            asm volatile (
418be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org                "vld4.u8    {d0-d3},[%[src]]!;"
419be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org                : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3),
420be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org                  [src] "+&r" (src)
421be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org                : :
422be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            );
423be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            vsrc.val[0] = d0;
424be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            vsrc.val[1] = d1;
425be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            vsrc.val[2] = d2;
426be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            vsrc.val[3] = d3;
427a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#endif
428ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#endif // #ifdef SK_CPU_ARM64
429fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
430fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
431be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            // deinterleave dst
432be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            vdst_g = vshlq_n_u16(vdst, SK_R16_BITS);        // shift green to top of lanes
433be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            vdst_b = vdst & vmask_blue;                     // extract blue
434be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            vdst_r = vshrq_n_u16(vdst, SK_R16_SHIFT);       // extract red
435be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            vdst_g = vshrq_n_u16(vdst_g, SK_R16_BITS + SK_B16_BITS); // extract green
436be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org
437be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            // shift src to 565
438be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            vsrc.val[NEON_R] = vshr_n_u8(vsrc.val[NEON_R], 8 - SK_R16_BITS);
439be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            vsrc.val[NEON_G] = vshr_n_u8(vsrc.val[NEON_G], 8 - SK_G16_BITS);
440be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            vsrc.val[NEON_B] = vshr_n_u8(vsrc.val[NEON_B], 8 - SK_B16_BITS);
441be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org
442be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            // calc src * src_scale
443be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            vres_a = vmull_u8(vsrc.val[NEON_A], valpha);
444be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            vres_r = vmull_u8(vsrc.val[NEON_R], valpha);
445be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            vres_g = vmull_u8(vsrc.val[NEON_G], valpha);
446be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            vres_b = vmull_u8(vsrc.val[NEON_B], valpha);
447be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org
448be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            // prepare dst_scale
449be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            vres_a = SkDiv255Round_neon8(vres_a);
450be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            vres_a = valpha_max - vres_a; // 255 - (sa * src_scale) / 255
451be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org
452be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            // add dst * dst_scale to previous result
453be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            vres_r = vmlaq_u16(vres_r, vdst_r, vres_a);
454be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            vres_g = vmlaq_u16(vres_g, vdst_g, vres_a);
455be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            vres_b = vmlaq_u16(vres_b, vdst_b, vres_a);
456be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org
457be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org#ifdef S32A_D565_BLEND_EXACT
458be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            // It is possible to get exact results with this but it is slow,
459be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            // even slower than C code in some cases
460be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            vres_r = SkDiv255Round_neon8(vres_r);
461be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            vres_g = SkDiv255Round_neon8(vres_g);
462be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            vres_b = SkDiv255Round_neon8(vres_b);
463be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org#else
464be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            vres_r = vrshrq_n_u16(vres_r, 8);
465be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            vres_g = vrshrq_n_u16(vres_g, 8);
466be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            vres_b = vrshrq_n_u16(vres_b, 8);
467be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org#endif
468be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            // pack result
469be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            vres_b = vsliq_n_u16(vres_b, vres_g, SK_G16_SHIFT); // insert green into blue
470be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            vres_b = vsliq_n_u16(vres_b, vres_r, SK_R16_SHIFT); // insert red into green/blue
471be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org
472be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            // store
473be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            vst1q_u16(dst, vres_b);
474be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            dst += 8;
475be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            count -= 8;
476be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org        } while (count >= 8);
477be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org    }
478a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
479be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org    // leftovers
480be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org    while (count-- > 0) {
481be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org        SkPMColor sc = *src++;
482be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org        if (sc) {
483be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            uint16_t dc = *dst;
484be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            unsigned dst_scale = 255 - SkMulDiv255Round(SkGetPackedA32(sc), alpha);
485be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            unsigned dr = SkMulS16(SkPacked32ToR16(sc), alpha) + SkMulS16(SkGetPackedR16(dc), dst_scale);
486be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            unsigned dg = SkMulS16(SkPacked32ToG16(sc), alpha) + SkMulS16(SkGetPackedG16(dc), dst_scale);
487be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            unsigned db = SkMulS16(SkPacked32ToB16(sc), alpha) + SkMulS16(SkGetPackedB16(dc), dst_scale);
488be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org            *dst = SkPackRGB16(SkDiv255Round(dr), SkDiv255Round(dg), SkDiv255Round(db));
489be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org        }
490be233d63ca015c2991f4fe0802e4a31a71642062commit-bot@chromium.org        dst += 1;
491a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com    }
492a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com}
493a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
494a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com/* dither matrix for Neon, derived from gDitherMatrix_3Bit_16.
495a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com * each dither value is spaced out into byte lanes, and repeated
496a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com * to allow an 8-byte load from offsets 0, 1, 2 or 3 from the
497a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com * start of each row.
498a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com */
499a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comstatic const uint8_t gDitherMatrix_Neon[48] = {
500a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com    0, 4, 1, 5, 0, 4, 1, 5, 0, 4, 1, 5,
501a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com    6, 2, 7, 3, 6, 2, 7, 3, 6, 2, 7, 3,
502a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com    1, 5, 0, 4, 1, 5, 0, 4, 1, 5, 0, 4,
503a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com    7, 3, 6, 2, 7, 3, 6, 2, 7, 3, 6, 2,
504fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
505a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com};
506a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
507a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comvoid S32_D565_Blend_Dither_neon(uint16_t *dst, const SkPMColor *src,
508a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                                int count, U8CPU alpha, int x, int y)
509a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com{
510fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
5114cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org    SkASSERT(255 > alpha);
5124cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org
5134cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org    // rescale alpha to range 1 - 256
514a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com    int scale = SkAlpha255To256(alpha);
515fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
5164cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org    if (count >= 8) {
5174cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org        /* select row and offset for dither array */
5184cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org        const uint8_t *dstart = &gDitherMatrix_Neon[(y&3)*12 + (x&3)];
519fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
5204cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org        uint8x8_t vdither = vld1_u8(dstart);         // load dither values
5214cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org        uint8x8_t vdither_g = vshr_n_u8(vdither, 1); // calc. green dither values
522fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
5234cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org        int16x8_t vscale = vdupq_n_s16(scale);        // duplicate scale into neon reg
5244cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org        uint16x8_t vmask_b = vdupq_n_u16(0x1F);         // set up blue mask
525fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
5264cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org        do {
5274cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org
528ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit            uint8x8x4_t vsrc;
5294cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            uint8x8_t vsrc_r, vsrc_g, vsrc_b;
5304cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            uint8x8_t vsrc565_r, vsrc565_g, vsrc565_b;
5314cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            uint16x8_t vsrc_dit_r, vsrc_dit_g, vsrc_dit_b;
5324cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            uint16x8_t vsrc_res_r, vsrc_res_g, vsrc_res_b;
5334cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            uint16x8_t vdst;
5344cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            uint16x8_t vdst_r, vdst_g, vdst_b;
5354cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            int16x8_t vres_r, vres_g, vres_b;
5364cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            int8x8_t vres8_r, vres8_g, vres8_b;
5374cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org
5384cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            // Load source and add dither
539ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#ifdef SK_CPU_ARM64
540ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit            vsrc = sk_vld4_u8_arm64_3(src);
541ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#else
5424cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            {
5434cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            register uint8x8_t d0 asm("d0");
5444cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            register uint8x8_t d1 asm("d1");
5454cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            register uint8x8_t d2 asm("d2");
5464cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            register uint8x8_t d3 asm("d3");
5474cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org
5484cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            asm (
549ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit                "vld4.8    {d0-d3},[%[src]]! "
5504cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org                : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3), [src] "+&r" (src)
5514cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org                :
5524cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            );
553ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit            vsrc.val[0] = d0;
554ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit            vsrc.val[1] = d1;
555ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit            vsrc.val[2] = d2;
5564cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            }
557ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#endif
558ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit            vsrc_r = vsrc.val[NEON_R];
559ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit            vsrc_g = vsrc.val[NEON_G];
560ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit            vsrc_b = vsrc.val[NEON_B];
5614cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org
5624cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            vsrc565_g = vshr_n_u8(vsrc_g, 6); // calc. green >> 6
5634cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            vsrc565_r = vshr_n_u8(vsrc_r, 5); // calc. red >> 5
5644cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            vsrc565_b = vshr_n_u8(vsrc_b, 5); // calc. blue >> 5
5654cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org
5664cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            vsrc_dit_g = vaddl_u8(vsrc_g, vdither_g); // add in dither to green and widen
5674cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            vsrc_dit_r = vaddl_u8(vsrc_r, vdither);   // add in dither to red and widen
5684cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            vsrc_dit_b = vaddl_u8(vsrc_b, vdither);   // add in dither to blue and widen
5694cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org
5704cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            vsrc_dit_r = vsubw_u8(vsrc_dit_r, vsrc565_r);  // sub shifted red from result
5714cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            vsrc_dit_g = vsubw_u8(vsrc_dit_g, vsrc565_g);  // sub shifted green from result
5724cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            vsrc_dit_b = vsubw_u8(vsrc_dit_b, vsrc565_b);  // sub shifted blue from result
5734cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org
5744cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            vsrc_res_r = vshrq_n_u16(vsrc_dit_r, 3);
5754cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            vsrc_res_g = vshrq_n_u16(vsrc_dit_g, 2);
5764cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            vsrc_res_b = vshrq_n_u16(vsrc_dit_b, 3);
5774cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org
5784cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            // Load dst and unpack
5794cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            vdst = vld1q_u16(dst);
5804cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            vdst_g = vshrq_n_u16(vdst, 5);                   // shift down to get green
5814cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            vdst_r = vshrq_n_u16(vshlq_n_u16(vdst, 5), 5+5); // double shift to extract red
5824cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            vdst_b = vandq_u16(vdst, vmask_b);               // mask to get blue
5834cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org
5844cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            // subtract dst from src and widen
5854cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            vres_r = vsubq_s16(vreinterpretq_s16_u16(vsrc_res_r), vreinterpretq_s16_u16(vdst_r));
5864cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            vres_g = vsubq_s16(vreinterpretq_s16_u16(vsrc_res_g), vreinterpretq_s16_u16(vdst_g));
5874cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            vres_b = vsubq_s16(vreinterpretq_s16_u16(vsrc_res_b), vreinterpretq_s16_u16(vdst_b));
5884cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org
5894cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            // multiply diffs by scale and shift
5904cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            vres_r = vmulq_s16(vres_r, vscale);
5914cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            vres_g = vmulq_s16(vres_g, vscale);
5924cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            vres_b = vmulq_s16(vres_b, vscale);
5934cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org
5944cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            vres8_r = vshrn_n_s16(vres_r, 8);
5954cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            vres8_g = vshrn_n_s16(vres_g, 8);
5964cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            vres8_b = vshrn_n_s16(vres_b, 8);
5974cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org
5984cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            // add dst to result
5994cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            vres_r = vaddw_s8(vreinterpretq_s16_u16(vdst_r), vres8_r);
6004cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            vres_g = vaddw_s8(vreinterpretq_s16_u16(vdst_g), vres8_g);
6014cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            vres_b = vaddw_s8(vreinterpretq_s16_u16(vdst_b), vres8_b);
6024cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org
6034cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            // put result into 565 format
6044cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            vres_b = vsliq_n_s16(vres_b, vres_g, 5);   // shift up green and insert into blue
6054cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            vres_b = vsliq_n_s16(vres_b, vres_r, 6+5); // shift up red and insert into blue
6064cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org
6074cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            // Store result
6084cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            vst1q_u16(dst, vreinterpretq_u16_s16(vres_b));
6094cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org
6104cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            // Next iteration
6114cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            dst += 8;
6124cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            count -= 8;
6134cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org
6144cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org        } while (count >= 8);
6154cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org    }
6164cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org
6174cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org    // Leftovers
6184cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org    if (count > 0) {
6194cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org        int scale = SkAlpha255To256(alpha);
6204cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org        DITHER_565_SCAN(y);
6214cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org        do {
6224cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            SkPMColor c = *src++;
6234cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            SkPMColorAssert(c);
6244cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org
6254cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            int dither = DITHER_VALUE(x);
6264cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            int sr = SkGetPackedR32(c);
6274cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            int sg = SkGetPackedG32(c);
6284cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            int sb = SkGetPackedB32(c);
6294cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            sr = SkDITHER_R32To565(sr, dither);
6304cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            sg = SkDITHER_G32To565(sg, dither);
6314cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            sb = SkDITHER_B32To565(sb, dither);
6324cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org
6334cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            uint16_t d = *dst;
6344cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            *dst++ = SkPackRGB16(SkAlphaBlend(sr, SkGetPackedR16(d), scale),
6354cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org                                 SkAlphaBlend(sg, SkGetPackedG16(d), scale),
6364cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org                                 SkAlphaBlend(sb, SkGetPackedB16(d), scale));
6374cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org            DITHER_INC_X(x);
6384cc26324e3be5258fae9dc102aa6a3af7d1c96eacommit-bot@chromium.org        } while (--count != 0);
639a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com    }
640a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com}
641a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
642a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comvoid S32A_Opaque_BlitRow32_neon(SkPMColor* SK_RESTRICT dst,
643a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                                const SkPMColor* SK_RESTRICT src,
644a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                                int count, U8CPU alpha) {
645a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
646a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com    SkASSERT(255 == alpha);
647a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com    if (count > 0) {
648a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
649a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
650fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    uint8x8_t alpha_mask;
651a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
652fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    static const uint8_t alpha_mask_setup[] = {3,3,3,3,7,7,7,7};
653fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    alpha_mask = vld1_u8(alpha_mask_setup);
654a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
655fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    /* do the NEON unrolled code */
656fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#define    UNROLL    4
657fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    while (count >= UNROLL) {
658fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        uint8x8_t src_raw, dst_raw, dst_final;
659fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        uint8x8_t src_raw_2, dst_raw_2, dst_final_2;
660a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
6610a5699ee482c3b5ef1e857de8a2de06c6a1fa298commit-bot@chromium.org        /* The two prefetches below may make the code slighlty
6620a5699ee482c3b5ef1e857de8a2de06c6a1fa298commit-bot@chromium.org         * slower for small values of count but are worth having
6630a5699ee482c3b5ef1e857de8a2de06c6a1fa298commit-bot@chromium.org         * in the general case.
6640a5699ee482c3b5ef1e857de8a2de06c6a1fa298commit-bot@chromium.org         */
6650a5699ee482c3b5ef1e857de8a2de06c6a1fa298commit-bot@chromium.org        __builtin_prefetch(src+32);
6660a5699ee482c3b5ef1e857de8a2de06c6a1fa298commit-bot@chromium.org        __builtin_prefetch(dst+32);
6670a5699ee482c3b5ef1e857de8a2de06c6a1fa298commit-bot@chromium.org
668fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        /* get the source */
669fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        src_raw = vreinterpret_u8_u32(vld1_u32(src));
670fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#if    UNROLL > 2
671fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        src_raw_2 = vreinterpret_u8_u32(vld1_u32(src+2));
672a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#endif
673a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
674fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        /* get and hold the dst too */
675fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        dst_raw = vreinterpret_u8_u32(vld1_u32(dst));
676fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#if    UNROLL > 2
677fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        dst_raw_2 = vreinterpret_u8_u32(vld1_u32(dst+2));
678a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#endif
679a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
680fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    /* 1st and 2nd bits of the unrolling */
681fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    {
682fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        uint8x8_t dst_cooked;
683fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        uint16x8_t dst_wide;
684fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        uint8x8_t alpha_narrow;
685fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        uint16x8_t alpha_wide;
686a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
687fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        /* get the alphas spread out properly */
688fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        alpha_narrow = vtbl1_u8(src_raw, alpha_mask);
689fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        alpha_wide = vsubw_u8(vdupq_n_u16(256), alpha_narrow);
690a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
691fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        /* spread the dest */
692fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        dst_wide = vmovl_u8(dst_raw);
693a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
694fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        /* alpha mul the dest */
695fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        dst_wide = vmulq_u16 (dst_wide, alpha_wide);
696fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        dst_cooked = vshrn_n_u16(dst_wide, 8);
697a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
698fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        /* sum -- ignoring any byte lane overflows */
699fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        dst_final = vadd_u8(src_raw, dst_cooked);
700fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    }
701a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
702fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#if    UNROLL > 2
703fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    /* the 3rd and 4th bits of our unrolling */
704fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    {
705fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        uint8x8_t dst_cooked;
706fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        uint16x8_t dst_wide;
707fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        uint8x8_t alpha_narrow;
708fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        uint16x8_t alpha_wide;
709a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
710fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        alpha_narrow = vtbl1_u8(src_raw_2, alpha_mask);
711fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        alpha_wide = vsubw_u8(vdupq_n_u16(256), alpha_narrow);
712a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
713fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        /* spread the dest */
714fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        dst_wide = vmovl_u8(dst_raw_2);
715a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
716fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        /* alpha mul the dest */
717fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        dst_wide = vmulq_u16 (dst_wide, alpha_wide);
718fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        dst_cooked = vshrn_n_u16(dst_wide, 8);
719a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
720fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        /* sum -- ignoring any byte lane overflows */
721fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        dst_final_2 = vadd_u8(src_raw_2, dst_cooked);
722fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    }
723a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#endif
724a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
725fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        vst1_u32(dst, vreinterpret_u32_u8(dst_final));
726fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#if    UNROLL > 2
727fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        vst1_u32(dst+2, vreinterpret_u32_u8(dst_final_2));
728a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#endif
729a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
730fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        src += UNROLL;
731fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        dst += UNROLL;
732fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        count -= UNROLL;
733fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    }
734fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#undef    UNROLL
735a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
736fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    /* do any residual iterations */
737a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com        while (--count >= 0) {
738a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com            *dst = SkPMSrcOver(*src, *dst);
739a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com            src += 1;
740a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com            dst += 1;
741a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com        }
742a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com    }
743a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com}
744a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
745c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.comvoid S32A_Opaque_BlitRow32_neon_src_alpha(SkPMColor* SK_RESTRICT dst,
746c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com                                const SkPMColor* SK_RESTRICT src,
747c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com                                int count, U8CPU alpha) {
748c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    SkASSERT(255 == alpha);
749c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com
750c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    if (count <= 0)
751c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    return;
752c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com
753c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    /* Use these to check if src is transparent or opaque */
754c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    const unsigned int ALPHA_OPAQ  = 0xFF000000;
755c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    const unsigned int ALPHA_TRANS = 0x00FFFFFF;
756c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com
757c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com#define UNROLL  4
758c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    const SkPMColor* SK_RESTRICT src_end = src + count - (UNROLL + 1);
759c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    const SkPMColor* SK_RESTRICT src_temp = src;
760c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com
761c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    /* set up the NEON variables */
762c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    uint8x8_t alpha_mask;
763c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    static const uint8_t alpha_mask_setup[] = {3,3,3,3,7,7,7,7};
764c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    alpha_mask = vld1_u8(alpha_mask_setup);
765c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com
766c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    uint8x8_t src_raw, dst_raw, dst_final;
767c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    uint8x8_t src_raw_2, dst_raw_2, dst_final_2;
768c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    uint8x8_t dst_cooked;
769c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    uint16x8_t dst_wide;
770c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    uint8x8_t alpha_narrow;
771c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    uint16x8_t alpha_wide;
772c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com
773c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    /* choose the first processing type */
774c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    if( src >= src_end)
775c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        goto TAIL;
776c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    if(*src <= ALPHA_TRANS)
777c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        goto ALPHA_0;
778c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    if(*src >= ALPHA_OPAQ)
779c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        goto ALPHA_255;
780c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    /* fall-thru */
781c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com
782c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.comALPHA_1_TO_254:
783c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    do {
784c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com
785c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        /* get the source */
786c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        src_raw = vreinterpret_u8_u32(vld1_u32(src));
787c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        src_raw_2 = vreinterpret_u8_u32(vld1_u32(src+2));
788c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com
789c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        /* get and hold the dst too */
790c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        dst_raw = vreinterpret_u8_u32(vld1_u32(dst));
791c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        dst_raw_2 = vreinterpret_u8_u32(vld1_u32(dst+2));
792c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com
793c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com
794c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        /* get the alphas spread out properly */
795c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        alpha_narrow = vtbl1_u8(src_raw, alpha_mask);
796c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        /* reflect SkAlpha255To256() semantics a+1 vs a+a>>7 */
797c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        /* we collapsed (255-a)+1 ... */
798c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        alpha_wide = vsubw_u8(vdupq_n_u16(256), alpha_narrow);
799c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com
800c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        /* spread the dest */
801c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        dst_wide = vmovl_u8(dst_raw);
802c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com
803c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        /* alpha mul the dest */
804c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        dst_wide = vmulq_u16 (dst_wide, alpha_wide);
805c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        dst_cooked = vshrn_n_u16(dst_wide, 8);
806c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com
807c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        /* sum -- ignoring any byte lane overflows */
808c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        dst_final = vadd_u8(src_raw, dst_cooked);
809c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com
810c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        alpha_narrow = vtbl1_u8(src_raw_2, alpha_mask);
811c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        /* reflect SkAlpha255To256() semantics a+1 vs a+a>>7 */
812c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        /* we collapsed (255-a)+1 ... */
813c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        alpha_wide = vsubw_u8(vdupq_n_u16(256), alpha_narrow);
814c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com
815c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        /* spread the dest */
816c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        dst_wide = vmovl_u8(dst_raw_2);
817c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com
818c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        /* alpha mul the dest */
819c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        dst_wide = vmulq_u16 (dst_wide, alpha_wide);
820c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        dst_cooked = vshrn_n_u16(dst_wide, 8);
821c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com
822c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        /* sum -- ignoring any byte lane overflows */
823c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        dst_final_2 = vadd_u8(src_raw_2, dst_cooked);
824c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com
825c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        vst1_u32(dst, vreinterpret_u32_u8(dst_final));
826c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        vst1_u32(dst+2, vreinterpret_u32_u8(dst_final_2));
827c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com
828c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        src += UNROLL;
829c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        dst += UNROLL;
830c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com
831c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        /* if 2 of the next pixels aren't between 1 and 254
832c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        it might make sense to go to the optimized loops */
833c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        if((src[0] <= ALPHA_TRANS && src[1] <= ALPHA_TRANS) || (src[0] >= ALPHA_OPAQ && src[1] >= ALPHA_OPAQ))
834c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com            break;
835c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com
836c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    } while(src < src_end);
837c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com
838c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    if (src >= src_end)
839c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        goto TAIL;
840c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com
841c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    if(src[0] >= ALPHA_OPAQ && src[1] >= ALPHA_OPAQ)
842c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        goto ALPHA_255;
843c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com
844c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    /*fall-thru*/
845c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com
846c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.comALPHA_0:
847c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com
848c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    /*In this state, we know the current alpha is 0 and
849c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com     we optimize for the next alpha also being zero. */
850c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    src_temp = src;  //so we don't have to increment dst every time
851c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    do {
852c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        if(*(++src) > ALPHA_TRANS)
853c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com            break;
854c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        if(*(++src) > ALPHA_TRANS)
855c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com            break;
856c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        if(*(++src) > ALPHA_TRANS)
857c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com            break;
858c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        if(*(++src) > ALPHA_TRANS)
859c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com            break;
860c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    } while(src < src_end);
861c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com
862c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    dst += (src - src_temp);
863c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com
864c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    /* no longer alpha 0, so determine where to go next. */
865c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    if( src >= src_end)
866c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        goto TAIL;
867c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    if(*src >= ALPHA_OPAQ)
868c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        goto ALPHA_255;
869c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    else
870c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        goto ALPHA_1_TO_254;
871c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com
872c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.comALPHA_255:
873c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    while((src[0] & src[1] & src[2] & src[3]) >= ALPHA_OPAQ) {
874c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        dst[0]=src[0];
875c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        dst[1]=src[1];
876c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        dst[2]=src[2];
877c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        dst[3]=src[3];
878c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        src+=UNROLL;
879c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        dst+=UNROLL;
880c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        if(src >= src_end)
881c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com            goto TAIL;
882c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    }
883c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com
884c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    //Handle remainder.
885c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    if(*src >= ALPHA_OPAQ) { *dst++ = *src++;
886c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        if(*src >= ALPHA_OPAQ) { *dst++ = *src++;
887c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com            if(*src >= ALPHA_OPAQ) { *dst++ = *src++; }
888c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        }
889c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    }
890c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com
891c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    if( src >= src_end)
892c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        goto TAIL;
893c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    if(*src <= ALPHA_TRANS)
894c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        goto ALPHA_0;
895c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    else
896c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        goto ALPHA_1_TO_254;
897c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com
898c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.comTAIL:
899c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    /* do any residual iterations */
900c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    src_end += UNROLL + 1;  //goto the real end
901c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    while(src != src_end) {
902c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        if( *src != 0 ) {
903c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com            if( *src >= ALPHA_OPAQ ) {
904c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com                *dst = *src;
905c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com            }
906c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com            else {
907c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com                *dst = SkPMSrcOver(*src, *dst);
908c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com            }
909c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        }
910c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        src++;
911c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com        dst++;
912c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    }
913c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com
914c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com#undef    UNROLL
915c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    return;
916c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com}
917a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
918a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com/* Neon version of S32_Blend_BlitRow32()
919a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com * portable version is in src/core/SkBlitRow_D32.cpp
920a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com */
921a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comvoid S32_Blend_BlitRow32_neon(SkPMColor* SK_RESTRICT dst,
922a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                              const SkPMColor* SK_RESTRICT src,
923a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                              int count, U8CPU alpha) {
924a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com    SkASSERT(alpha <= 255);
925fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
926374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org    if (count <= 0) {
927374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org        return;
928374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org    }
929dfff2737f8ad3e945a4dcbe175380d4b2a91a260commit-bot@chromium.org
930374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org    uint16_t src_scale = SkAlpha255To256(alpha);
931374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org    uint16_t dst_scale = 256 - src_scale;
932dfff2737f8ad3e945a4dcbe175380d4b2a91a260commit-bot@chromium.org
933374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org    while (count >= 2) {
934374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org        uint8x8_t vsrc, vdst, vres;
935374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org        uint16x8_t vsrc_wide, vdst_wide;
936dfff2737f8ad3e945a4dcbe175380d4b2a91a260commit-bot@chromium.org
937374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org        /* These commented prefetches are a big win for count
938374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org         * values > 64 on an A9 (Pandaboard) but hurt by 10% for count = 4.
939374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org         * They also hurt a little (<5%) on an A15
940374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org         */
941374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org        //__builtin_prefetch(src+32);
942374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org        //__builtin_prefetch(dst+32);
943dfff2737f8ad3e945a4dcbe175380d4b2a91a260commit-bot@chromium.org
944374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org        // Load
945374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org        vsrc = vreinterpret_u8_u32(vld1_u32(src));
946374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org        vdst = vreinterpret_u8_u32(vld1_u32(dst));
947374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org
948374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org        // Process src
949374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org        vsrc_wide = vmovl_u8(vsrc);
950374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org        vsrc_wide = vmulq_u16(vsrc_wide, vdupq_n_u16(src_scale));
951374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org
952374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org        // Process dst
953374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org        vdst_wide = vmull_u8(vdst, vdup_n_u8(dst_scale));
954374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org
955374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org        // Combine
956374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org        vres = vshrn_n_u16(vdst_wide, 8) + vshrn_n_u16(vsrc_wide, 8);
957374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org
958374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org        // Store
959374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org        vst1_u32(dst, vreinterpret_u32_u8(vres));
960374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org
961374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org        src += 2;
962374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org        dst += 2;
963374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org        count -= 2;
964fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    }
965fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
966fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    if (count == 1) {
967374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org        uint8x8_t vsrc = vdup_n_u8(0), vdst = vdup_n_u8(0), vres;
968374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org        uint16x8_t vsrc_wide, vdst_wide;
969dfff2737f8ad3e945a4dcbe175380d4b2a91a260commit-bot@chromium.org
970374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org        // Load
971374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org        vsrc = vreinterpret_u8_u32(vld1_lane_u32(src, vreinterpret_u32_u8(vsrc), 0));
972374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org        vdst = vreinterpret_u8_u32(vld1_lane_u32(dst, vreinterpret_u32_u8(vdst), 0));
973374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org
974374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org        // Process
975374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org        vsrc_wide = vmovl_u8(vsrc);
976374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org        vsrc_wide = vmulq_u16(vsrc_wide, vdupq_n_u16(src_scale));
977374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org        vdst_wide = vmull_u8(vdst, vdup_n_u8(dst_scale));
978374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org        vres = vshrn_n_u16(vdst_wide, 8) + vshrn_n_u16(vsrc_wide, 8);
979374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org
980374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org        // Store
981374ea4ee26b9d537c1b9635544105f915766f61bcommit-bot@chromium.org        vst1_lane_u32(dst, vreinterpret_u32_u8(vres), 0);
982a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com    }
983a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com}
984a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
9853a2682a77f996f649de7699c9f7bee046c6d4f17mtklein#ifdef SK_CPU_ARM32
9861fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.orgvoid S32A_Blend_BlitRow32_neon(SkPMColor* SK_RESTRICT dst,
9871fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org                         const SkPMColor* SK_RESTRICT src,
9881fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org                         int count, U8CPU alpha) {
9891fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org
9901fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org    SkASSERT(255 >= alpha);
9911fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org
9921fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org    if (count <= 0) {
9931fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org        return;
9941fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org    }
9951fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org
9961fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org    unsigned alpha256 = SkAlpha255To256(alpha);
9971fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org
9981fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org    // First deal with odd counts
9991fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org    if (count & 1) {
10001fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org        uint8x8_t vsrc = vdup_n_u8(0), vdst = vdup_n_u8(0), vres;
10011fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org        uint16x8_t vdst_wide, vsrc_wide;
10021fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org        unsigned dst_scale;
10031fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org
10041fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org        // Load
10051fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org        vsrc = vreinterpret_u8_u32(vld1_lane_u32(src, vreinterpret_u32_u8(vsrc), 0));
10061fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org        vdst = vreinterpret_u8_u32(vld1_lane_u32(dst, vreinterpret_u32_u8(vdst), 0));
10071fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org
10081fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org        // Calc dst_scale
10091fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org        dst_scale = vget_lane_u8(vsrc, 3);
10101fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org        dst_scale *= alpha256;
10111fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org        dst_scale >>= 8;
10121fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org        dst_scale = 256 - dst_scale;
10131fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org
10141fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org        // Process src
10151fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org        vsrc_wide = vmovl_u8(vsrc);
10161fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org        vsrc_wide = vmulq_n_u16(vsrc_wide, alpha256);
10171fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org
10181fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org        // Process dst
10191fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org        vdst_wide = vmovl_u8(vdst);
10201fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org        vdst_wide = vmulq_n_u16(vdst_wide, dst_scale);
10211fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org
10221fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org        // Combine
10231fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org        vres = vshrn_n_u16(vdst_wide, 8) + vshrn_n_u16(vsrc_wide, 8);
10241fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org
10251fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org        vst1_lane_u32(dst, vreinterpret_u32_u8(vres), 0);
10261fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org        dst++;
10271fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org        src++;
10281fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org        count--;
10291fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org    }
10301fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org
10311fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org    if (count) {
10321fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org        uint8x8_t alpha_mask;
10331fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org        static const uint8_t alpha_mask_setup[] = {3,3,3,3,7,7,7,7};
10341fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org        alpha_mask = vld1_u8(alpha_mask_setup);
10351fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org
10361fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org        do {
10371fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org
10381fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org            uint8x8_t vsrc, vdst, vres, vsrc_alphas;
10391fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org            uint16x8_t vdst_wide, vsrc_wide, vsrc_scale, vdst_scale;
10401fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org
10411fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org            __builtin_prefetch(src+32);
10421fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org            __builtin_prefetch(dst+32);
10431fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org
10441fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org            // Load
10451fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org            vsrc = vreinterpret_u8_u32(vld1_u32(src));
10461fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org            vdst = vreinterpret_u8_u32(vld1_u32(dst));
10471fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org
10481fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org            // Prepare src_scale
10491fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org            vsrc_scale = vdupq_n_u16(alpha256);
10501fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org
10511fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org            // Calc dst_scale
10521fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org            vsrc_alphas = vtbl1_u8(vsrc, alpha_mask);
10531fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org            vdst_scale = vmovl_u8(vsrc_alphas);
10541fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org            vdst_scale *= vsrc_scale;
10551fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org            vdst_scale = vshrq_n_u16(vdst_scale, 8);
10561fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org            vdst_scale = vsubq_u16(vdupq_n_u16(256), vdst_scale);
10571fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org
10581fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org            // Process src
10591fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org            vsrc_wide = vmovl_u8(vsrc);
10601fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org            vsrc_wide *= vsrc_scale;
10611fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org
10621fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org            // Process dst
10631fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org            vdst_wide = vmovl_u8(vdst);
10641fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org            vdst_wide *= vdst_scale;
10651fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org
10661fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org            // Combine
10671fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org            vres = vshrn_n_u16(vdst_wide, 8) + vshrn_n_u16(vsrc_wide, 8);
10681fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org
10691fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org            vst1_u32(dst, vreinterpret_u32_u8(vres));
10701fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org
10711fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org            src += 2;
10721fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org            dst += 2;
10731fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org            count -= 2;
10741fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org        } while(count);
10751fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org    }
10761fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org}
10771fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org
1078a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com///////////////////////////////////////////////////////////////////////////////
1079a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
1080fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#undef    DEBUG_OPAQUE_DITHER
1081a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
1082fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#if    defined(DEBUG_OPAQUE_DITHER)
1083a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comstatic void showme8(char *str, void *p, int len)
1084a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com{
1085fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    static char buf[256];
1086fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    char tbuf[32];
1087fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    int i;
1088fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    char *pc = (char*) p;
1089fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    sprintf(buf,"%8s:", str);
1090fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    for(i=0;i<len;i++) {
1091fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        sprintf(tbuf, "   %02x", pc[i]);
1092fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        strcat(buf, tbuf);
1093fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    }
1094fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    SkDebugf("%s\n", buf);
1095a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com}
1096a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comstatic void showme16(char *str, void *p, int len)
1097a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com{
1098fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    static char buf[256];
1099fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    char tbuf[32];
1100fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    int i;
1101fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    uint16_t *pc = (uint16_t*) p;
1102fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    sprintf(buf,"%8s:", str);
1103fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    len = (len / sizeof(uint16_t));    /* passed as bytes */
1104fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    for(i=0;i<len;i++) {
1105fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        sprintf(tbuf, " %04x", pc[i]);
1106fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        strcat(buf, tbuf);
1107fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    }
1108fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    SkDebugf("%s\n", buf);
1109a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com}
1110a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#endif
1111ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#endif // #ifdef SK_CPU_ARM32
1112a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
1113a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comvoid S32A_D565_Opaque_Dither_neon (uint16_t * SK_RESTRICT dst,
1114a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                                   const SkPMColor* SK_RESTRICT src,
1115a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                                   int count, U8CPU alpha, int x, int y) {
1116a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com    SkASSERT(255 == alpha);
1117a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
1118fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#define    UNROLL    8
1119a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
1120a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com    if (count >= UNROLL) {
1121fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
1122fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org#if defined(DEBUG_OPAQUE_DITHER)
1123fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    uint16_t tmpbuf[UNROLL];
1124fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    int td[UNROLL];
1125fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    int tdv[UNROLL];
1126fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    int ta[UNROLL];
1127fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    int tap[UNROLL];
1128fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    uint16_t in_dst[UNROLL];
1129fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    int offset = 0;
1130fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    int noisy = 0;
1131a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#endif
1132a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
1133fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org    uint8x8_t dbase;
1134fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    const uint8_t *dstart = &gDitherMatrix_Neon[(y&3)*12 + (x&3)];
1135fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    dbase = vld1_u8(dstart);
1136a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
1137a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com        do {
1138ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        uint8x8x4_t vsrc;
1139fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        uint8x8_t sr, sg, sb, sa, d;
1140fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        uint16x8_t dst8, scale8, alpha8;
1141fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        uint16x8_t dst_r, dst_g, dst_b;
1142fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
1143fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org#if defined(DEBUG_OPAQUE_DITHER)
1144fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org        // calculate 8 elements worth into a temp buffer
1145fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org        {
1146fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org        int my_y = y;
1147fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org        int my_x = x;
1148fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org        SkPMColor* my_src = (SkPMColor*)src;
1149fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org        uint16_t* my_dst = dst;
1150fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org        int i;
1151fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org
1152fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org        DITHER_565_SCAN(my_y);
1153fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org        for(i = 0; i < UNROLL; i++) {
1154a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com            SkPMColor c = *my_src++;
1155a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com            SkPMColorAssert(c);
1156a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com            if (c) {
1157a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                unsigned a = SkGetPackedA32(c);
1158fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
1159a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                int d = SkAlphaMul(DITHER_VALUE(my_x), SkAlpha255To256(a));
1160fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org                tdv[i] = DITHER_VALUE(my_x);
1161fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org                ta[i] = a;
1162fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org                tap[i] = SkAlpha255To256(a);
1163fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org                td[i] = d;
1164fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
1165a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                unsigned sr = SkGetPackedR32(c);
1166a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                unsigned sg = SkGetPackedG32(c);
1167a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                unsigned sb = SkGetPackedB32(c);
1168a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                sr = SkDITHER_R32_FOR_565(sr, d);
1169a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                sg = SkDITHER_G32_FOR_565(sg, d);
1170a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                sb = SkDITHER_B32_FOR_565(sb, d);
1171fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
1172a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                uint32_t src_expanded = (sg << 24) | (sr << 13) | (sb << 2);
1173a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                uint32_t dst_expanded = SkExpand_rgb_16(*my_dst);
1174a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3);
1175a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                // now src and dst expanded are in g:11 r:10 x:1 b:10
1176a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                tmpbuf[i] = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5);
1177fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org                td[i] = d;
1178a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com            } else {
1179fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org                tmpbuf[i] = *my_dst;
1180fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org                ta[i] = tdv[i] = td[i] = 0xbeef;
1181fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org            }
1182fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org            in_dst[i] = *my_dst;
1183a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com            my_dst += 1;
1184a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com            DITHER_INC_X(my_x);
1185fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org        }
1186fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org        }
1187a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#endif
1188a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
1189ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#ifdef SK_CPU_ARM64
1190ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        vsrc = sk_vld4_u8_arm64_4(src);
1191ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#else
1192fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        {
1193fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        register uint8x8_t d0 asm("d0");
1194fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        register uint8x8_t d1 asm("d1");
1195fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        register uint8x8_t d2 asm("d2");
1196fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        register uint8x8_t d3 asm("d3");
1197a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
1198ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        asm ("vld4.8    {d0-d3},[%[src]]! "
1199fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org            : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3), [src] "+r" (src)
1200fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org            :
1201fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org        );
1202ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        vsrc.val[0] = d0;
1203ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        vsrc.val[1] = d1;
1204ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        vsrc.val[2] = d2;
1205ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        vsrc.val[3] = d3;
1206fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        }
1207ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#endif
1208ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        sa = vsrc.val[NEON_A];
1209ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        sr = vsrc.val[NEON_R];
1210ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        sg = vsrc.val[NEON_G];
1211ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        sb = vsrc.val[NEON_B];
1212a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
1213fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org        /* calculate 'd', which will be 0..7
1214fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org         * dbase[] is 0..7; alpha is 0..256; 16 bits suffice
1215fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org         */
1216fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org        alpha8 = vmovl_u8(dbase);
1217fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org        alpha8 = vmlal_u8(alpha8, sa, dbase);
1218fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org        d = vshrn_n_u16(alpha8, 8);    // narrowing too
1219fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
1220fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org        // sr = sr - (sr>>5) + d
1221fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        /* watching for 8-bit overflow.  d is 0..7; risky range of
1222fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com         * sr is >248; and then (sr>>5) is 7 so it offsets 'd';
1223fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org         * safe  as long as we do ((sr-sr>>5) + d)
1224fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org         */
1225fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        sr = vsub_u8(sr, vshr_n_u8(sr, 5));
1226fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        sr = vadd_u8(sr, d);
1227fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
1228fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org        // sb = sb - (sb>>5) + d
1229fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        sb = vsub_u8(sb, vshr_n_u8(sb, 5));
1230fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        sb = vadd_u8(sb, d);
1231fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
1232fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org        // sg = sg - (sg>>6) + d>>1; similar logic for overflows
1233fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        sg = vsub_u8(sg, vshr_n_u8(sg, 6));
1234fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        sg = vadd_u8(sg, vshr_n_u8(d,1));
1235fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
1236fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org        // need to pick up 8 dst's -- at 16 bits each, 128 bits
1237fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        dst8 = vld1q_u16(dst);
1238fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org        dst_b = vandq_u16(dst8, vdupq_n_u16(SK_B16_MASK));
1239fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org        dst_g = vshrq_n_u16(vshlq_n_u16(dst8, SK_R16_BITS), SK_R16_BITS + SK_B16_BITS);
1240fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org        dst_r = vshrq_n_u16(dst8, SK_R16_SHIFT);    // clearing hi bits
1241fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org
1242fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org        // blend
1243fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        scale8 = vsubw_u8(vdupq_n_u16(256), sa);
1244a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
1245fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org        // combine the addq and mul, save 3 insns
1246fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        scale8 = vshrq_n_u16(scale8, 3);
1247fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        dst_b = vmlaq_u16(vshll_n_u8(sb,2), dst_b, scale8);
1248fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        dst_g = vmlaq_u16(vshll_n_u8(sg,3), dst_g, scale8);
1249fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        dst_r = vmlaq_u16(vshll_n_u8(sr,2), dst_r, scale8);
1250a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
1251fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org        // repack to store
1252fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org        dst8 = vshrq_n_u16(dst_b, 5);
1253fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        dst8 = vsliq_n_u16(dst8, vshrq_n_u16(dst_g, 5), 5);
1254fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        dst8 = vsliq_n_u16(dst8, vshrq_n_u16(dst_r,5), 11);
1255fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
1256fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        vst1q_u16(dst, dst8);
1257fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
1258fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org#if defined(DEBUG_OPAQUE_DITHER)
1259fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org        // verify my 8 elements match the temp buffer
1260fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org        {
1261fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org        int i, bad=0;
1262fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org        static int invocation;
1263a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
1264fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org        for (i = 0; i < UNROLL; i++) {
1265fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org            if (tmpbuf[i] != dst[i]) {
1266fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org                bad=1;
1267fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org            }
1268fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org        }
1269fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org        if (bad) {
1270fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org            SkDebugf("BAD S32A_D565_Opaque_Dither_neon(); invocation %d offset %d\n",
1271fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org                     invocation, offset);
1272fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org            SkDebugf("  alpha 0x%x\n", alpha);
1273fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org            for (i = 0; i < UNROLL; i++)
1274fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org                SkDebugf("%2d: %s %04x w %04x id %04x s %08x d %04x %04x %04x %04x\n",
1275fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org                         i, ((tmpbuf[i] != dst[i])?"BAD":"got"), dst[i], tmpbuf[i],
1276fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org                         in_dst[i], src[i-8], td[i], tdv[i], tap[i], ta[i]);
1277fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org
1278fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org            showme16("alpha8", &alpha8, sizeof(alpha8));
1279fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org            showme16("scale8", &scale8, sizeof(scale8));
1280fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org            showme8("d", &d, sizeof(d));
1281fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org            showme16("dst8", &dst8, sizeof(dst8));
1282fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org            showme16("dst_b", &dst_b, sizeof(dst_b));
1283fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org            showme16("dst_g", &dst_g, sizeof(dst_g));
1284fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org            showme16("dst_r", &dst_r, sizeof(dst_r));
1285fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org            showme8("sb", &sb, sizeof(sb));
1286fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org            showme8("sg", &sg, sizeof(sg));
1287fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org            showme8("sr", &sr, sizeof(sr));
1288fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org
1289fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org            return;
1290fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org        }
1291fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org        offset += UNROLL;
1292fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org        invocation++;
1293fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org        }
1294fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org#endif
1295fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org        dst += UNROLL;
1296fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        count -= UNROLL;
1297fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org        // skip x += UNROLL, since it's unchanged mod-4
1298a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com        } while (count >= UNROLL);
1299a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com    }
1300fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#undef    UNROLL
1301a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
1302fe68eb6a4081f60caf665ec632180e6d7c26a169commit-bot@chromium.org    // residuals
1303a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com    if (count > 0) {
1304a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com        DITHER_565_SCAN(y);
1305a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com        do {
1306a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com            SkPMColor c = *src++;
1307a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com            SkPMColorAssert(c);
1308a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com            if (c) {
1309a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                unsigned a = SkGetPackedA32(c);
1310fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
1311a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                // dither and alpha are just temporary variables to work-around
1312a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                // an ICE in debug.
1313a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                unsigned dither = DITHER_VALUE(x);
1314a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                unsigned alpha = SkAlpha255To256(a);
1315a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                int d = SkAlphaMul(dither, alpha);
1316fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
1317a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                unsigned sr = SkGetPackedR32(c);
1318a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                unsigned sg = SkGetPackedG32(c);
1319a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                unsigned sb = SkGetPackedB32(c);
1320a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                sr = SkDITHER_R32_FOR_565(sr, d);
1321a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                sg = SkDITHER_G32_FOR_565(sg, d);
1322a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                sb = SkDITHER_B32_FOR_565(sb, d);
1323fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
1324a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                uint32_t src_expanded = (sg << 24) | (sr << 13) | (sb << 2);
1325a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                uint32_t dst_expanded = SkExpand_rgb_16(*dst);
1326a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3);
1327a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                // now src and dst expanded are in g:11 r:10 x:1 b:10
1328a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5);
1329a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com            }
1330a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com            dst += 1;
1331a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com            DITHER_INC_X(x);
1332a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com        } while (--count != 0);
1333a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com    }
1334a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com}
1335a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
1336a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com///////////////////////////////////////////////////////////////////////////////
1337a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
1338fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#undef    DEBUG_S32_OPAQUE_DITHER
1339a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
1340a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comvoid S32_D565_Opaque_Dither_neon(uint16_t* SK_RESTRICT dst,
1341a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                                 const SkPMColor* SK_RESTRICT src,
1342a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                                 int count, U8CPU alpha, int x, int y) {
1343a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com    SkASSERT(255 == alpha);
1344a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
1345fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#define    UNROLL    8
1346a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com    if (count >= UNROLL) {
1347fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    uint8x8_t d;
1348fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    const uint8_t *dstart = &gDitherMatrix_Neon[(y&3)*12 + (x&3)];
1349fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    d = vld1_u8(dstart);
1350fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
1351fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    while (count >= UNROLL) {
1352efbe8e9bedda21a3e061ebf3d96431a0f250a654djsollen@google.com        uint8x8_t sr, sg, sb;
1353efbe8e9bedda21a3e061ebf3d96431a0f250a654djsollen@google.com        uint16x8_t dr, dg, db;
1354fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        uint16x8_t dst8;
1355ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        uint8x8x4_t vsrc;
1356fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
1357ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#ifdef SK_CPU_ARM64
1358ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        vsrc = sk_vld4_u8_arm64_3(src);
1359ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#else
1360fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        {
1361fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        register uint8x8_t d0 asm("d0");
1362fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        register uint8x8_t d1 asm("d1");
1363fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        register uint8x8_t d2 asm("d2");
1364fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        register uint8x8_t d3 asm("d3");
1365fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
1366688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org        asm (
1367ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit            "vld4.8    {d0-d3},[%[src]]! "
1368688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org            : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3), [src] "+&r" (src)
1369688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org            :
1370688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org        );
1371ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        vsrc.val[0] = d0;
1372ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        vsrc.val[1] = d1;
1373ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        vsrc.val[2] = d2;
1374fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        }
1375ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#endif
1376ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        sr = vsrc.val[NEON_R];
1377ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        sg = vsrc.val[NEON_G];
1378ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit        sb = vsrc.val[NEON_B];
1379ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit
1380fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        /* XXX: if we want to prefetch, hide it in the above asm()
1381fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com         * using the gcc __builtin_prefetch(), the prefetch will
1382fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com         * fall to the bottom of the loop -- it won't stick up
1383fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com         * at the top of the loop, just after the vld4.
1384fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com         */
1385fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
1386688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org        // sr = sr - (sr>>5) + d
1387fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        sr = vsub_u8(sr, vshr_n_u8(sr, 5));
1388fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        dr = vaddl_u8(sr, d);
1389fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
1390688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org        // sb = sb - (sb>>5) + d
1391fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        sb = vsub_u8(sb, vshr_n_u8(sb, 5));
1392fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        db = vaddl_u8(sb, d);
1393fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
1394688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org        // sg = sg - (sg>>6) + d>>1; similar logic for overflows
1395fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        sg = vsub_u8(sg, vshr_n_u8(sg, 6));
1396688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org        dg = vaddl_u8(sg, vshr_n_u8(d, 1));
1397fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
1398688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org        // pack high bits of each into 565 format  (rgb, b is lsb)
1399fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        dst8 = vshrq_n_u16(db, 3);
1400fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        dst8 = vsliq_n_u16(dst8, vshrq_n_u16(dg, 2), 5);
1401688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org        dst8 = vsliq_n_u16(dst8, vshrq_n_u16(dr, 3), 11);
1402fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
1403688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org        // store it
1404fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        vst1q_u16(dst, dst8);
1405fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com
1406fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#if    defined(DEBUG_S32_OPAQUE_DITHER)
1407688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org        // always good to know if we generated good results
1408fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        {
1409fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        int i, myx = x, myy = y;
1410fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        DITHER_565_SCAN(myy);
1411fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        for (i=0;i<UNROLL;i++) {
1412688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org            // the '!' in the asm block above post-incremented src by the 8 pixels it reads.
1413688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org            SkPMColor c = src[i-8];
1414fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com            unsigned dither = DITHER_VALUE(myx);
1415fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com            uint16_t val = SkDitherRGB32To565(c, dither);
1416fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com            if (val != dst[i]) {
1417fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com            SkDebugf("RBE: src %08x dither %02x, want %04x got %04x dbas[i] %02x\n",
1418fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com                c, dither, val, dst[i], dstart[i]);
1419fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com            }
1420fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com            DITHER_INC_X(myx);
1421fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        }
1422fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        }
1423a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com#endif
1424a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
1425fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        dst += UNROLL;
1426688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org        // we don't need to increment src as the asm above has already done it
1427fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com        count -= UNROLL;
1428688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org        x += UNROLL;        // probably superfluous
1429fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    }
1430a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com    }
1431fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com#undef    UNROLL
1432a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
1433688d362b4545e1beadebb7ba5886813d7038883ccommit-bot@chromium.org    // residuals
1434a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com    if (count > 0) {
1435a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com        DITHER_565_SCAN(y);
1436a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com        do {
1437a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com            SkPMColor c = *src++;
1438a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com            SkPMColorAssert(c);
1439a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com            SkASSERT(SkGetPackedA32(c) == 255);
1440a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
1441a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com            unsigned dither = DITHER_VALUE(x);
1442a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com            *dst++ = SkDitherRGB32To565(c, dither);
1443a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com            DITHER_INC_X(x);
1444a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com        } while (--count != 0);
1445a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com    }
1446a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com}
1447a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
1448a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comvoid Color32_arm_neon(SkPMColor* dst, const SkPMColor* src, int count,
1449a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com                      SkPMColor color) {
1450a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com    if (count <= 0) {
1451a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com        return;
1452a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com    }
1453a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
1454a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com    if (0 == color) {
1455a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com        if (src != dst) {
1456a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com            memcpy(dst, src, count * sizeof(SkPMColor));
1457a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com        }
1458a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com        return;
1459a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com    }
1460a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
1461a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com    unsigned colorA = SkGetPackedA32(color);
1462a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com    if (255 == colorA) {
1463a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com        sk_memset32(dst, color, count);
14645376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org        return;
14655376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org    }
1466a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
14675376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org    unsigned scale = 256 - SkAlpha255To256(colorA);
1468a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
14695376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org    if (count >= 8) {
14705376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org        uint32x4_t vcolor;
14715376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org        uint8x8_t vscale;
14725376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org
14735376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org        vcolor = vdupq_n_u32(color);
14745376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org
14755376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org        // scale numerical interval [0-255], so load as 8 bits
14765376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org        vscale = vdup_n_u8(scale);
14775376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org
14785376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org        do {
14795376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org            // load src color, 8 pixels, 4 64 bit registers
14805376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org            // (and increment src).
14815376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org            uint32x2x4_t vsrc;
14823a2682a77f996f649de7699c9f7bee046c6d4f17mtklein#if defined(SK_CPU_ARM32) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6)))
14835376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org            asm (
14845376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org                "vld1.32    %h[vsrc], [%[src]]!"
14855376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org                : [vsrc] "=w" (vsrc), [src] "+r" (src)
14865376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org                : :
14875376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org            );
1488866b95d65dfc01af372bbed206ec067e04c1f533kevin.petit#else // 64bit targets and Clang
14895376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org            vsrc.val[0] = vld1_u32(src);
14905376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org            vsrc.val[1] = vld1_u32(src+2);
14915376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org            vsrc.val[2] = vld1_u32(src+4);
14925376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org            vsrc.val[3] = vld1_u32(src+6);
14935376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org            src += 8;
14945376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org#endif
14955376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org
14965376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org            // multiply long by scale, 64 bits at a time,
14975376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org            // destination into a 128 bit register.
14985376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org            uint16x8x4_t vtmp;
14995376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org            vtmp.val[0] = vmull_u8(vreinterpret_u8_u32(vsrc.val[0]), vscale);
15005376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org            vtmp.val[1] = vmull_u8(vreinterpret_u8_u32(vsrc.val[1]), vscale);
15015376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org            vtmp.val[2] = vmull_u8(vreinterpret_u8_u32(vsrc.val[2]), vscale);
15025376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org            vtmp.val[3] = vmull_u8(vreinterpret_u8_u32(vsrc.val[3]), vscale);
15035376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org
15045376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org            // shift the 128 bit registers, containing the 16
15055376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org            // bit scaled values back to 8 bits, narrowing the
15065376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org            // results to 64 bit registers.
15075376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org            uint8x16x2_t vres;
15085376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org            vres.val[0] = vcombine_u8(
15095376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org                            vshrn_n_u16(vtmp.val[0], 8),
15105376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org                            vshrn_n_u16(vtmp.val[1], 8));
15115376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org            vres.val[1] = vcombine_u8(
15125376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org                            vshrn_n_u16(vtmp.val[2], 8),
15135376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org                            vshrn_n_u16(vtmp.val[3], 8));
15145376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org
15155376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org            // adding back the color, using 128 bit registers.
15165376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org            uint32x4x2_t vdst;
15175376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org            vdst.val[0] = vreinterpretq_u32_u8(vres.val[0] +
15185376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org                                               vreinterpretq_u8_u32(vcolor));
15195376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org            vdst.val[1] = vreinterpretq_u32_u8(vres.val[1] +
15205376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org                                               vreinterpretq_u8_u32(vcolor));
15215376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org
15225376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org            // store back the 8 calculated pixels (2 128 bit
15235376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org            // registers), and increment dst.
15243a2682a77f996f649de7699c9f7bee046c6d4f17mtklein#if defined(SK_CPU_ARM32) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6)))
15255376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org            asm (
15265376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org                "vst1.32    %h[vdst], [%[dst]]!"
15275376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org                : [dst] "+r" (dst)
15285376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org                : [vdst] "w" (vdst)
15295376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org                : "memory"
15305376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org            );
1531866b95d65dfc01af372bbed206ec067e04c1f533kevin.petit#else // 64bit targets and Clang
15325376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org            vst1q_u32(dst, vdst.val[0]);
15335376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org            vst1q_u32(dst+4, vdst.val[1]);
15345376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org            dst += 8;
15355376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org#endif
15365376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org            count -= 8;
15375376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org
15385376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org        } while (count >= 8);
15395376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org    }
15405376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org
15415376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org    while (count > 0) {
15425376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org        *dst = color + SkAlphaMulQ(*src, scale);
15435376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org        src += 1;
15445376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org        dst += 1;
15455376325c7c6a2ba42d2713587bda6c76ea1bd7d7commit-bot@chromium.org        count--;
1546a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com    }
1547a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com}
1548a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
1549a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com///////////////////////////////////////////////////////////////////////////////
1550a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
1551a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comconst SkBlitRow::Proc sk_blitrow_platform_565_procs_arm_neon[] = {
1552a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com    // no dither
15530060159457453ca45a47828648c8f29d5695983ccommit-bot@chromium.org    S32_D565_Opaque_neon,
155495c2e5532b094add82b007bdfcd4c64050b6b366commit-bot@chromium.org    S32_D565_Blend_neon,
1555ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#ifdef SK_CPU_ARM32
1556a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com    S32A_D565_Opaque_neon,
1557ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#else
1558ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit    NULL,
1559ea6b46b6c06fd9e03b98f01b274733de1eeae89dkevin.petit#endif
1560a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com    S32A_D565_Blend_neon,
1561a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
1562a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com    // dither
1563a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com    S32_D565_Opaque_Dither_neon,
1564a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com    S32_D565_Blend_Dither_neon,
1565a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com    S32A_D565_Opaque_Dither_neon,
1566a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com    NULL,   // S32A_D565_Blend_Dither
1567a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com};
1568a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com
1569a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.comconst SkBlitRow::Proc32 sk_blitrow_platform_32_procs_arm_neon[] = {
1570a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com    NULL,   // S32_Opaque,
1571fbfcd5602128ec010c82cb733c9cdc0a3254f9f3rmistry@google.com    S32_Blend_BlitRow32_neon,        // S32_Blend,
1572c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    /*
1573c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com     * We have two choices for S32A_Opaque procs. The one reads the src alpha
1574c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com     * value and attempts to optimize accordingly.  The optimization is
1575c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com     * sensitive to the source content and is not a win in all cases. For
1576c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com     * example, if there are a lot of transitions between the alpha states,
1577c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com     * the performance will almost certainly be worse.  However, for many
1578c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com     * common cases the performance is equivalent or better than the standard
1579c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com     * case where we do not inspect the src alpha.
1580c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com     */
1581c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com#if SK_A32_SHIFT == 24
1582c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    // This proc assumes the alpha value occupies bits 24-32 of each SkPMColor
1583c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    S32A_Opaque_BlitRow32_neon_src_alpha,   // S32A_Opaque,
1584c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com#else
1585c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com    S32A_Opaque_BlitRow32_neon,     // S32A_Opaque,
1586c2532dd0b89e03ed158229872cb1ee06ae7f10fedjsollen@google.com#endif
15873a2682a77f996f649de7699c9f7bee046c6d4f17mtklein#ifdef SK_CPU_ARM32
15881fdc6774280ffc18dd7e1247e430931aa2f58790commit-bot@chromium.org    S32A_Blend_BlitRow32_neon        // S32A_Blend
1589866b95d65dfc01af372bbed206ec067e04c1f533kevin.petit#else
1590866b95d65dfc01af372bbed206ec067e04c1f533kevin.petit    NULL
1591866b95d65dfc01af372bbed206ec067e04c1f533kevin.petit#endif
1592a8dd1ce930811a51cc841f583424d507d95e7e78digit@google.com};
1593