10e72b7e2c68083333e90024cafd15d4084abd3a3reed/*
20e72b7e2c68083333e90024cafd15d4084abd3a3reed * Copyright 2015 Google Inc.
30e72b7e2c68083333e90024cafd15d4084abd3a3reed *
40e72b7e2c68083333e90024cafd15d4084abd3a3reed * Use of this source code is governed by a BSD-style license that can be
50e72b7e2c68083333e90024cafd15d4084abd3a3reed * found in the LICENSE file.
60e72b7e2c68083333e90024cafd15d4084abd3a3reed */
7dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org
8dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org#include "SkBlitMask.h"
9dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org#include "SkColor_opts_neon.h"
10dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org
11dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.orgvoid SkBlitLCD16OpaqueRow_neon(SkPMColor dst[], const uint16_t src[],
12dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org                                        SkColor color, int width,
13dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org                                        SkPMColor opaqueDst) {
14dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org    int colR = SkColorGetR(color);
15dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org    int colG = SkColorGetG(color);
16dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org    int colB = SkColorGetB(color);
17dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org
184284613cfe211bfdcf3506f363d382c044ae8b51Kevin Lubick    uint8x8_t vcolR = vdup_n_u8(colR);
194284613cfe211bfdcf3506f363d382c044ae8b51Kevin Lubick    uint8x8_t vcolG = vdup_n_u8(colG);
204284613cfe211bfdcf3506f363d382c044ae8b51Kevin Lubick    uint8x8_t vcolB = vdup_n_u8(colB);
214284613cfe211bfdcf3506f363d382c044ae8b51Kevin Lubick    uint8x8_t vopqDstA = vdup_n_u8(SkGetPackedA32(opaqueDst));
224284613cfe211bfdcf3506f363d382c044ae8b51Kevin Lubick    uint8x8_t vopqDstR = vdup_n_u8(SkGetPackedR32(opaqueDst));
234284613cfe211bfdcf3506f363d382c044ae8b51Kevin Lubick    uint8x8_t vopqDstG = vdup_n_u8(SkGetPackedG32(opaqueDst));
244284613cfe211bfdcf3506f363d382c044ae8b51Kevin Lubick    uint8x8_t vopqDstB = vdup_n_u8(SkGetPackedB32(opaqueDst));
25dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org
26dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org    while (width >= 8) {
27dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        uint8x8x4_t vdst;
28dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        uint16x8_t vmask;
29dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        uint16x8_t vmaskR, vmaskG, vmaskB;
30dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        uint8x8_t vsel_trans, vsel_opq;
31dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org
32dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        vdst = vld4_u8((uint8_t*)dst);
33dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        vmask = vld1q_u16(src);
34dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org
35dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        // Prepare compare masks
36dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        vsel_trans = vmovn_u16(vceqq_u16(vmask, vdupq_n_u16(0)));
37dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        vsel_opq = vmovn_u16(vceqq_u16(vmask, vdupq_n_u16(0xFFFF)));
38dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org
39dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        // Get all the color masks on 5 bits
40dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        vmaskR = vshrq_n_u16(vmask, SK_R16_SHIFT);
41dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        vmaskG = vshrq_n_u16(vshlq_n_u16(vmask, SK_R16_BITS),
42dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org                             SK_B16_BITS + SK_R16_BITS + 1);
43dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        vmaskB = vmask & vdupq_n_u16(SK_B16_MASK);
44dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org
45dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        // Upscale to 0..32
46dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        vmaskR = vmaskR + vshrq_n_u16(vmaskR, 4);
47dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        vmaskG = vmaskG + vshrq_n_u16(vmaskG, 4);
48dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        vmaskB = vmaskB + vshrq_n_u16(vmaskB, 4);
49dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org
50dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        vdst.val[NEON_A] = vbsl_u8(vsel_trans, vdst.val[NEON_A], vdup_n_u8(0xFF));
51dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        vdst.val[NEON_A] = vbsl_u8(vsel_opq, vopqDstA, vdst.val[NEON_A]);
52dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org
53dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        vdst.val[NEON_R] = SkBlend32_neon8(vcolR, vdst.val[NEON_R], vmaskR);
54dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        vdst.val[NEON_G] = SkBlend32_neon8(vcolG, vdst.val[NEON_G], vmaskG);
55dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        vdst.val[NEON_B] = SkBlend32_neon8(vcolB, vdst.val[NEON_B], vmaskB);
56dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org
57dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        vdst.val[NEON_R] = vbsl_u8(vsel_opq, vopqDstR, vdst.val[NEON_R]);
58dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        vdst.val[NEON_G] = vbsl_u8(vsel_opq, vopqDstG, vdst.val[NEON_G]);
59dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        vdst.val[NEON_B] = vbsl_u8(vsel_opq, vopqDstB, vdst.val[NEON_B]);
60dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org
61dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        vst4_u8((uint8_t*)dst, vdst);
62dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org
63dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        dst += 8;
64dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        src += 8;
65dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        width -= 8;
66dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org    }
67dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org
68dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org    // Leftovers
69dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org    for (int i = 0; i < width; i++) {
70dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        dst[i] = SkBlendLCD16Opaque(colR, colG, colB, dst[i], src[i],
71dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org                                    opaqueDst);
72dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org    }
73dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org}
74dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org
75dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.orgvoid SkBlitLCD16Row_neon(SkPMColor dst[], const uint16_t src[],
76dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org                                   SkColor color, int width, SkPMColor) {
77dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org    int colA = SkColorGetA(color);
78dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org    int colR = SkColorGetR(color);
79dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org    int colG = SkColorGetG(color);
80dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org    int colB = SkColorGetB(color);
81dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org
82dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org    colA = SkAlpha255To256(colA);
83dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org
844284613cfe211bfdcf3506f363d382c044ae8b51Kevin Lubick    uint16x8_t vcolA = vdupq_n_u16(colA);
854284613cfe211bfdcf3506f363d382c044ae8b51Kevin Lubick    uint8x8_t vcolR = vdup_n_u8(colR);
864284613cfe211bfdcf3506f363d382c044ae8b51Kevin Lubick    uint8x8_t vcolG = vdup_n_u8(colG);
874284613cfe211bfdcf3506f363d382c044ae8b51Kevin Lubick    uint8x8_t vcolB = vdup_n_u8(colB);
88dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org
89dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org    while (width >= 8) {
90dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        uint8x8x4_t vdst;
91dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        uint16x8_t vmask;
92dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        uint16x8_t vmaskR, vmaskG, vmaskB;
93dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org
94dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        vdst = vld4_u8((uint8_t*)dst);
95dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        vmask = vld1q_u16(src);
96dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org
97dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        // Get all the color masks on 5 bits
98dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        vmaskR = vshrq_n_u16(vmask, SK_R16_SHIFT);
99dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        vmaskG = vshrq_n_u16(vshlq_n_u16(vmask, SK_R16_BITS),
100dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org                             SK_B16_BITS + SK_R16_BITS + 1);
101dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        vmaskB = vmask & vdupq_n_u16(SK_B16_MASK);
102dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org
103dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        // Upscale to 0..32
104dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        vmaskR = vmaskR + vshrq_n_u16(vmaskR, 4);
105dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        vmaskG = vmaskG + vshrq_n_u16(vmaskG, 4);
106dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        vmaskB = vmaskB + vshrq_n_u16(vmaskB, 4);
107dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org
108dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        vmaskR = vshrq_n_u16(vmaskR * vcolA, 8);
109dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        vmaskG = vshrq_n_u16(vmaskG * vcolA, 8);
110dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        vmaskB = vshrq_n_u16(vmaskB * vcolA, 8);
111dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org
112dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        vdst.val[NEON_A] = vdup_n_u8(0xFF);
113dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        vdst.val[NEON_R] = SkBlend32_neon8(vcolR, vdst.val[NEON_R], vmaskR);
114dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        vdst.val[NEON_G] = SkBlend32_neon8(vcolG, vdst.val[NEON_G], vmaskG);
115dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        vdst.val[NEON_B] = SkBlend32_neon8(vcolB, vdst.val[NEON_B], vmaskB);
116dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org
117dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        vst4_u8((uint8_t*)dst, vdst);
118dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org
119dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        dst += 8;
120dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        src += 8;
121dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        width -= 8;
122dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org    }
123dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org
124dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org    for (int i = 0; i < width; i++) {
125dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org        dst[i] = SkBlendLCD16(colA, colR, colG, colB, dst[i], src[i]);
126dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org    }
127dbe7f52412f55561dcc3a51fa3df2779c9a368bfcommit-bot@chromium.org}
128dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang
129dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang#define LOAD_LANE_16(reg, n) \
130dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang    reg = vld1q_lane_u16(device, reg, n); \
131dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang    device = (uint16_t*)((char*)device + deviceRB);
132dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang
133dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang#define STORE_LANE_16(reg, n) \
134dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang    vst1_lane_u16(dst, reg, n); \
135dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang    dst = (uint16_t*)((char*)dst + deviceRB);
136dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang
137dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhangvoid SkRGB16BlitterBlitV_neon(uint16_t* device,
138dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang                              int height,
139dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang                              size_t deviceRB,
140dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang                              unsigned scale,
141dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang                              uint32_t src32) {
142dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang    if (height >= 8)
143dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang    {
144dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang        uint16_t* dst = device;
145dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang
146dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang        // prepare constants
147dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang        uint16x8_t vdev = vdupq_n_u16(0);
148dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang        uint16x8_t vmaskq_g16 = vdupq_n_u16(SK_G16_MASK_IN_PLACE);
149dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang        uint16x8_t vmaskq_ng16 = vdupq_n_u16(~SK_G16_MASK_IN_PLACE);
150dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang        uint32x4_t vsrc32 = vdupq_n_u32(src32);
151dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang        uint32x4_t vscale5 = vdupq_n_u32((uint32_t)scale);
152dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang
153dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang        while (height >= 8){
154dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang            LOAD_LANE_16(vdev, 0)
155dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang            LOAD_LANE_16(vdev, 1)
156dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang            LOAD_LANE_16(vdev, 2)
157dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang            LOAD_LANE_16(vdev, 3)
158dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang            LOAD_LANE_16(vdev, 4)
159dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang            LOAD_LANE_16(vdev, 5)
160dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang            LOAD_LANE_16(vdev, 6)
161dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang            LOAD_LANE_16(vdev, 7)
162dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang
163dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang            // Expand_rgb_16
164dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang            uint16x8x2_t vdst = vzipq_u16((vdev & vmaskq_ng16), (vdev & vmaskq_g16));
165dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang            uint32x4_t vdst32_lo = vmulq_u32(vreinterpretq_u32_u16(vdst.val[0]), vscale5);
166dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang            uint32x4_t vdst32_hi = vmulq_u32(vreinterpretq_u32_u16(vdst.val[1]), vscale5);
167dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang
168dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang            // Compact_rgb_16
169dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang            vdst32_lo = vaddq_u32(vdst32_lo, vsrc32);
170dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang            vdst32_hi = vaddq_u32(vdst32_hi, vsrc32);
171dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang            vdst32_lo = vshrq_n_u32(vdst32_lo, 5);
172dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang            vdst32_hi = vshrq_n_u32(vdst32_hi, 5);
173dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang
174dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang            uint16x4_t vtmp_lo = vmovn_u32(vdst32_lo) & vget_low_u16(vmaskq_ng16);
175dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang            uint16x4_t vtmp_hi = vshrn_n_u32(vdst32_lo, 16) & vget_low_u16(vmaskq_g16);
176dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang            uint16x4_t vdst16_lo = vorr_u16(vtmp_lo, vtmp_hi);
177dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang            vtmp_lo = vmovn_u32(vdst32_hi) & vget_low_u16(vmaskq_ng16);
178dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang            vtmp_hi = vshrn_n_u32(vdst32_hi, 16) & vget_low_u16(vmaskq_g16);
179dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang            uint16x4_t vdst16_hi = vorr_u16(vtmp_lo, vtmp_hi);
180dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang
181dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang            STORE_LANE_16(vdst16_lo, 0)
182dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang            STORE_LANE_16(vdst16_lo, 1)
183dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang            STORE_LANE_16(vdst16_lo, 2)
184dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang            STORE_LANE_16(vdst16_lo, 3)
185dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang            STORE_LANE_16(vdst16_hi, 0)
186dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang            STORE_LANE_16(vdst16_hi, 1)
187dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang            STORE_LANE_16(vdst16_hi, 2)
188dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang            STORE_LANE_16(vdst16_hi, 3)
189dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang            height -= 8;
190dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang        }
191dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang    }
192dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang    while (height != 0){
193dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang        uint32_t dst32 = SkExpand_rgb_16(*device) * scale;
194dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang        *device = SkCompact_rgb_16((src32 + dst32) >> 5);
195dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang        device = (uint16_t*)((char*)device + deviceRB);
196dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang        height--;
197dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang    }
198dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang}
199dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang
200dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang#undef LOAD_LANE_16
201dc77b3591841bf1e70ed45455490d688e5d4e6f9yang.zhang#undef STORE_LANE_16
202