193d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com/*
2b0c97975894a5eebebf9d93147cdd941a3accb63fbarchard@google.com *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
393d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com *
493d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com *  Use of this source code is governed by a BSD-style license
593d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com *  that can be found in the LICENSE file in the root of the source
693d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com *  tree. An additional intellectual property rights grant can be found
7cde587092fef0dbed2c35602f30b79e7b892e766fbarchard@google.com *  in the file PATENTS. All contributing project authors may
893d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com *  be found in the AUTHORS file in the root of the source tree.
993d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com */
1093d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com
11142f6c4ed5eaeec0176f255e64bac8d8c70b42e1fbarchard@google.com#include "libyuv/row.h"
1293d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com
13fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.com#ifdef __cplusplus
14fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.comnamespace libyuv {
15fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.comextern "C" {
16fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.com#endif
17fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.com
182d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com// This module is for GCC Neon
191b9df4c5c85ca9ff161249a8ffbaeda2f5edf5dffbarchard@google.com#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)
202d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com
214807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com// Read 8 Y, 4 U and 4 V from 422
224807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com#define READYUV422                                                             \
230bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)                                                               \
242c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d0}, [%0]!                    \n"                             \
250bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)                                                               \
262c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.32    {d2[0]}, [%1]!                 \n"                             \
270bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(2)                                                               \
282c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.32    {d2[1]}, [%2]!                 \n"
294807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com
30b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com// Read 8 Y, 2 U and 2 V from 422
31b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com#define READYUV411                                                             \
320bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)                                                               \
332c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d0}, [%0]!                    \n"                             \
340bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)                                                               \
352c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.16    {d2[0]}, [%1]!                 \n"                             \
360bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(2)                                                               \
372c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.16    {d2[1]}, [%2]!                 \n"                             \
38b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    "vmov.u8    d3, d2                         \n"                             \
39b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    "vzip.u8    d2, d3                         \n"
40b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com
41b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com// Read 8 Y, 8 U and 8 V from 444
42b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com#define READYUV444                                                             \
430bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)                                                               \
442c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d0}, [%0]!                    \n"                             \
450bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)                                                               \
462c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d2}, [%1]!                    \n"                             \
470bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(2)                                                               \
482c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d3}, [%2]!                    \n"                             \
49b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    "vpaddl.u8  q1, q1                         \n"                             \
50b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    "vrshrn.u16 d2, q1, #1                     \n"
51b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com
5200b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com// Read 8 Y, and set 4 U and 4 V to 128
5300b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com#define READYUV400                                                             \
540bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)                                                               \
552c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d0}, [%0]!                    \n"                             \
5600b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com    "vmov.u8    d2, #128                       \n"
5700b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com
584807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com// Read 8 Y and 4 UV from NV12
594807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com#define READNV12                                                               \
600bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)                                                               \
612c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d0}, [%0]!                    \n"                             \
620bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)                                                               \
632c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d2}, [%1]!                    \n"                             \
644807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "vmov.u8    d3, d2                         \n"/* split odd/even uv apart */\
654807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "vuzp.u8    d2, d3                         \n"                             \
66793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "vtrn.u32   d2, d3                         \n"
674807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com
684807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com// Read 8 Y and 4 VU from NV21
694807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com#define READNV21                                                               \
700bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)                                                               \
712c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d0}, [%0]!                    \n"                             \
720bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)                                                               \
732c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d2}, [%1]!                    \n"                             \
744807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "vmov.u8    d3, d2                         \n"/* split odd/even uv apart */\
754807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "vuzp.u8    d3, d2                         \n"                             \
76793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "vtrn.u32   d2, d3                         \n"
77793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com
78793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com// Read 8 YUY2
79793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com#define READYUY2                                                               \
800bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)                                                               \
812c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld2.8     {d0, d2}, [%0]!                \n"                             \
82793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "vmov.u8    d3, d2                         \n"                             \
83793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "vuzp.u8    d2, d3                         \n"                             \
84793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "vtrn.u32   d2, d3                         \n"
85793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com
86793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com// Read 8 UYVY
87793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com#define READUYVY                                                               \
880bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)                                                               \
892c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld2.8     {d2, d3}, [%0]!                \n"                             \
90793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "vmov.u8    d0, d3                         \n"                             \
91793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "vmov.u8    d3, d2                         \n"                             \
92793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "vuzp.u8    d2, d3                         \n"                             \
93793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "vtrn.u32   d2, d3                         \n"
944807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com
954807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com#define YUV422TORGB                                                            \
9693d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "veor.u8    d2, d26                        \n"/*subtract 128 from u and v*/\
9793d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vmull.s8   q8, d2, d24                    \n"/*  u/v B/R component      */\
9893d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vmull.s8   q9, d2, d25                    \n"/*  u/v G component        */\
9993d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vmov.u8    d1, #0                         \n"/*  split odd/even y apart */\
10093d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vtrn.u8    d0, d1                         \n"                             \
10193d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vsub.s16   q0, q0, q15                    \n"/*  offset y               */\
10293d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vmul.s16   q0, q0, q14                    \n"                             \
10393d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vadd.s16   d18, d19                       \n"                             \
104c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com    "vqadd.s16  d20, d0, d16                   \n" /* B */                     \
10593d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vqadd.s16  d21, d1, d16                   \n"                             \
106c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com    "vqadd.s16  d22, d0, d17                   \n" /* R */                     \
10793d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vqadd.s16  d23, d1, d17                   \n"                             \
108c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com    "vqadd.s16  d16, d0, d18                   \n" /* G */                     \
10993d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vqadd.s16  d17, d1, d18                   \n"                             \
110d39ce16ba0654179bb51c4ecd46d8943cc24d130fbarchard@google.com    "vqshrun.s16 d0, q10, #6                   \n" /* B */                     \
111d39ce16ba0654179bb51c4ecd46d8943cc24d130fbarchard@google.com    "vqshrun.s16 d1, q11, #6                   \n" /* G */                     \
112d39ce16ba0654179bb51c4ecd46d8943cc24d130fbarchard@google.com    "vqshrun.s16 d2, q8, #6                    \n" /* R */                     \
11393d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vmovl.u8   q10, d0                        \n"/*  set up for reinterleave*/\
11493d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vmovl.u8   q11, d1                        \n"                             \
11593d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vmovl.u8   q8, d2                         \n"                             \
11693d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vtrn.u8    d20, d21                       \n"                             \
11793d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vtrn.u8    d22, d23                       \n"                             \
11893d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vtrn.u8    d16, d17                       \n"                             \
1194807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "vmov.u8    d21, d16                       \n"
12093d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com
121f2aa91a1ac08703d5a22af7fa48c59eba8eb397afbarchard@google.comstatic vec8 kUVToRB  = { 127, 127, 127, 127, 102, 102, 102, 102,
122f2aa91a1ac08703d5a22af7fa48c59eba8eb397afbarchard@google.com                         0, 0, 0, 0, 0, 0, 0, 0 };
123f2aa91a1ac08703d5a22af7fa48c59eba8eb397afbarchard@google.comstatic vec8 kUVToG = { -25, -25, -25, -25, -52, -52, -52, -52,
124f2aa91a1ac08703d5a22af7fa48c59eba8eb397afbarchard@google.com                       0, 0, 0, 0, 0, 0, 0, 0 };
12593d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com
126b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.comvoid I444ToARGBRow_NEON(const uint8* src_y,
127b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com                        const uint8* src_u,
128b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com                        const uint8* src_v,
129b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com                        uint8* dst_argb,
130b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com                        int width) {
131b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com  asm volatile (
1320bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(5)
1332c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d24}, [%5]                    \n"
1340bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(6)
1352c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d25}, [%6]                    \n"
136b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    "vmov.u8    d26, #128                      \n"
137b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    "vmov.u16   q14, #74                       \n"
138b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    "vmov.u16   q15, #16                       \n"
139c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
140b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com  "1:                                          \n"
141b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    READYUV444
142b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    YUV422TORGB
143b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    "subs       %4, %4, #8                     \n"
144b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    "vmov.u8    d23, #255                      \n"
1450bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(3)
146b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    "vst4.8     {d20, d21, d22, d23}, [%3]!    \n"
147b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    "bgt        1b                             \n"
148b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    : "+r"(src_y),     // %0
149b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com      "+r"(src_u),     // %1
150b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com      "+r"(src_v),     // %2
151b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com      "+r"(dst_argb),  // %3
152b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com      "+r"(width)      // %4
153b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    : "r"(&kUVToRB),   // %5
154b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com      "r"(&kUVToG)     // %6
155b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    : "cc", "memory", "q0", "q1", "q2", "q3",
156b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
157b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com  );
158b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com}
159b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com
1609de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.comvoid I422ToARGBRow_NEON(const uint8* src_y,
1619de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                        const uint8* src_u,
1629de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                        const uint8* src_v,
1639de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                        uint8* dst_argb,
164e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com                        int width) {
1655b22506b14367f38c1e31d1aab4dc1132158737ffbarchard@google.com  asm volatile (
1660bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(5)
1672c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d24}, [%5]                    \n"
1680bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(6)
1692c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d25}, [%6]                    \n"
17093d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vmov.u8    d26, #128                      \n"
17193d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vmov.u16   q14, #74                       \n"
17293d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vmov.u16   q15, #16                       \n"
173c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
17493d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com  "1:                                          \n"
1754807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    READYUV422
176e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com    YUV422TORGB
1774807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "subs       %4, %4, #8                     \n"
17893d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vmov.u8    d23, #255                      \n"
1790bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(3)
180dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "vst4.8     {d20, d21, d22, d23}, [%3]!    \n"
18118184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com    "bgt        1b                             \n"
1829de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    : "+r"(src_y),     // %0
1839de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(src_u),     // %1
1849de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(src_v),     // %2
1859de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(dst_argb),  // %3
1869de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(width)      // %4
1879de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    : "r"(&kUVToRB),   // %5
1889de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "r"(&kUVToG)     // %6
18964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    : "cc", "memory", "q0", "q1", "q2", "q3",
19064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
19193d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com  );
19293d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com}
19393d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com
194b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.comvoid I411ToARGBRow_NEON(const uint8* src_y,
195b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com                        const uint8* src_u,
196b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com                        const uint8* src_v,
197b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com                        uint8* dst_argb,
198b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com                        int width) {
199b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com  asm volatile (
2000bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(5)
2012c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d24}, [%5]                    \n"
2020bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(6)
2032c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d25}, [%6]                    \n"
204b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    "vmov.u8    d26, #128                      \n"
205b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    "vmov.u16   q14, #74                       \n"
206b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    "vmov.u16   q15, #16                       \n"
207c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
208b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com  "1:                                          \n"
209b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    READYUV411
210b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    YUV422TORGB
211b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    "subs       %4, %4, #8                     \n"
212b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    "vmov.u8    d23, #255                      \n"
2130bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(3)
214b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    "vst4.8     {d20, d21, d22, d23}, [%3]!    \n"
215b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    "bgt        1b                             \n"
216b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    : "+r"(src_y),     // %0
217b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com      "+r"(src_u),     // %1
218b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com      "+r"(src_v),     // %2
219b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com      "+r"(dst_argb),  // %3
220b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com      "+r"(width)      // %4
221b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    : "r"(&kUVToRB),   // %5
222b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com      "r"(&kUVToG)     // %6
223b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    : "cc", "memory", "q0", "q1", "q2", "q3",
224b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
225b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com  );
226b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com}
227b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com
2289de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.comvoid I422ToBGRARow_NEON(const uint8* src_y,
2299de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                        const uint8* src_u,
2309de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                        const uint8* src_v,
2319de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                        uint8* dst_bgra,
232e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com                        int width) {
2335b22506b14367f38c1e31d1aab4dc1132158737ffbarchard@google.com  asm volatile (
2340bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(5)
2352c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d24}, [%5]                    \n"
2360bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(6)
2372c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d25}, [%6]                    \n"
23893d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vmov.u8    d26, #128                      \n"
23993d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vmov.u16   q14, #74                       \n"
24093d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vmov.u16   q15, #16                       \n"
241c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
24293d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com  "1:                                          \n"
2434807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    READYUV422
244e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com    YUV422TORGB
2454807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "subs       %4, %4, #8                     \n"
24693d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vswp.u8    d20, d22                       \n"
24793d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vmov.u8    d19, #255                      \n"
2480bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(3)
249dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "vst4.8     {d19, d20, d21, d22}, [%3]!    \n"
25018184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com    "bgt        1b                             \n"
2519de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    : "+r"(src_y),     // %0
2529de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(src_u),     // %1
2539de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(src_v),     // %2
2549de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(dst_bgra),  // %3
2559de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(width)      // %4
2569de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    : "r"(&kUVToRB),   // %5
2579de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "r"(&kUVToG)     // %6
25864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    : "cc", "memory", "q0", "q1", "q2", "q3",
25964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
26093d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com  );
26193d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com}
26293d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com
2639de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.comvoid I422ToABGRRow_NEON(const uint8* src_y,
2649de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                        const uint8* src_u,
2659de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                        const uint8* src_v,
2669de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                        uint8* dst_abgr,
267e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com                        int width) {
2685b22506b14367f38c1e31d1aab4dc1132158737ffbarchard@google.com  asm volatile (
2690bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(5)
2702c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d24}, [%5]                    \n"
2710bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(6)
2722c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d25}, [%6]                    \n"
27393d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vmov.u8    d26, #128                      \n"
27493d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vmov.u16   q14, #74                       \n"
27593d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vmov.u16   q15, #16                       \n"
276c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
27793d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com  "1:                                          \n"
2784807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    READYUV422
279e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com    YUV422TORGB
2804807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "subs       %4, %4, #8                     \n"
28193d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vswp.u8    d20, d22                       \n"
28293d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vmov.u8    d23, #255                      \n"
2830bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(3)
284dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "vst4.8     {d20, d21, d22, d23}, [%3]!    \n"
28518184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com    "bgt        1b                             \n"
2869de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    : "+r"(src_y),     // %0
2879de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(src_u),     // %1
2889de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(src_v),     // %2
2899de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(dst_abgr),  // %3
2909de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(width)      // %4
2919de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    : "r"(&kUVToRB),   // %5
2929de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "r"(&kUVToG)     // %6
29364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    : "cc", "memory", "q0", "q1", "q2", "q3",
29464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
29593d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com  );
29693d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com}
297fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.com
2989de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.comvoid I422ToRGBARow_NEON(const uint8* src_y,
2999de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                        const uint8* src_u,
3009de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                        const uint8* src_v,
3019de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                        uint8* dst_rgba,
3022d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com                        int width) {
3032d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com  asm volatile (
3040bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(5)
3052c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d24}, [%5]                    \n"
3060bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(6)
3072c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d25}, [%6]                    \n"
3082d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com    "vmov.u8    d26, #128                      \n"
3092d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com    "vmov.u16   q14, #74                       \n"
3102d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com    "vmov.u16   q15, #16                       \n"
311c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
3122d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com  "1:                                          \n"
3134807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    READYUV422
3142d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com    YUV422TORGB
3154807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "subs       %4, %4, #8                     \n"
3162d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com    "vmov.u8    d19, #255                      \n"
3170bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(3)
318dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "vst4.8     {d19, d20, d21, d22}, [%3]!    \n"
3192d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com    "bgt        1b                             \n"
3209de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    : "+r"(src_y),     // %0
3219de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(src_u),     // %1
3229de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(src_v),     // %2
3239de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(dst_rgba),  // %3
3249de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(width)      // %4
3259de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    : "r"(&kUVToRB),   // %5
3269de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "r"(&kUVToG)     // %6
32764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    : "cc", "memory", "q0", "q1", "q2", "q3",
32864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
3292d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com  );
3302d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com}
3312d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com
3329de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.comvoid I422ToRGB24Row_NEON(const uint8* src_y,
333834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com                         const uint8* src_u,
334834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com                         const uint8* src_v,
335834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com                         uint8* dst_rgb24,
336834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com                         int width) {
33764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com  asm volatile (
3380bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(5)
3392c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d24}, [%5]                    \n"
3400bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(6)
3412c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d25}, [%6]                    \n"
34264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vmov.u8    d26, #128                      \n"
34364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vmov.u16   q14, #74                       \n"
34464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vmov.u16   q15, #16                       \n"
345c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
34664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com  "1:                                          \n"
34764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    READYUV422
34864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    YUV422TORGB
34964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "subs       %4, %4, #8                     \n"
3500bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(3)
35164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vst3.8     {d20, d21, d22}, [%3]!         \n"
35264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "bgt        1b                             \n"
3539de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    : "+r"(src_y),      // %0
3549de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(src_u),      // %1
3559de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(src_v),      // %2
3569de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(dst_rgb24),  // %3
3579de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(width)       // %4
3589de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    : "r"(&kUVToRB),    // %5
3599de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "r"(&kUVToG)      // %6
36064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    : "cc", "memory", "q0", "q1", "q2", "q3",
36164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
36264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com  );
36364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com}
36464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
3659de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.comvoid I422ToRAWRow_NEON(const uint8* src_y,
3669de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                       const uint8* src_u,
3679de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                       const uint8* src_v,
3689de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                       uint8* dst_raw,
36964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com                       int width) {
37064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com  asm volatile (
3710bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(5)
3722c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d24}, [%5]                    \n"
3730bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(6)
3742c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d25}, [%6]                    \n"
37564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vmov.u8    d26, #128                      \n"
37664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vmov.u16   q14, #74                       \n"
37764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vmov.u16   q15, #16                       \n"
378c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
37964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com  "1:                                          \n"
38064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    READYUV422
38164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    YUV422TORGB
38264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "subs       %4, %4, #8                     \n"
38364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vswp.u8    d20, d22                       \n"
3840bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(3)
38564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vst3.8     {d20, d21, d22}, [%3]!         \n"
38664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "bgt        1b                             \n"
3879de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    : "+r"(src_y),    // %0
3889de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(src_u),    // %1
3899de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(src_v),    // %2
3909de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(dst_raw),  // %3
39164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com      "+r"(width)     // %4
39264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    : "r"(&kUVToRB),  // %5
39364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com      "r"(&kUVToG)    // %6
39464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    : "cc", "memory", "q0", "q1", "q2", "q3",
39564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
39664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com  );
39764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com}
39864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
39911c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com#define ARGBTORGB565                                                           \
40011c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vshr.u8    d20, d20, #3                   \n"  /* B                    */ \
40111c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vshr.u8    d21, d21, #2                   \n"  /* G                    */ \
40211c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vshr.u8    d22, d22, #3                   \n"  /* R                    */ \
40311c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vmovl.u8   q8, d20                        \n"  /* B                    */ \
40411c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vmovl.u8   q9, d21                        \n"  /* G                    */ \
40511c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vmovl.u8   q10, d22                       \n"  /* R                    */ \
40611c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vshl.u16   q9, q9, #5                     \n"  /* G                    */ \
40711c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vshl.u16   q10, q10, #11                  \n"  /* R                    */ \
40811c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vorr       q0, q8, q9                     \n"  /* BG                   */ \
40911c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vorr       q0, q0, q10                    \n"  /* BGR                  */
41011c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com
41115449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.comvoid I422ToRGB565Row_NEON(const uint8* src_y,
412834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com                          const uint8* src_u,
413834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com                          const uint8* src_v,
414834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com                          uint8* dst_rgb565,
415834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com                          int width) {
41615449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com  asm volatile (
4170bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(5)
4182c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d24}, [%5]                    \n"
4190bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(6)
4202c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d25}, [%6]                    \n"
42115449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com    "vmov.u8    d26, #128                      \n"
42215449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com    "vmov.u16   q14, #74                       \n"
42315449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com    "vmov.u16   q15, #16                       \n"
424c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
42515449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com  "1:                                          \n"
42615449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com    READYUV422
42715449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com    YUV422TORGB
42815449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com    "subs       %4, %4, #8                     \n"
42911c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    ARGBTORGB565
4300bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(3)
43115449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com    "vst1.8     {q0}, [%3]!                    \n"  // store 8 pixels RGB565.
43215449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com    "bgt        1b                             \n"
43315449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com    : "+r"(src_y),    // %0
43415449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com      "+r"(src_u),    // %1
43515449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com      "+r"(src_v),    // %2
43615449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com      "+r"(dst_rgb565),  // %3
43715449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com      "+r"(width)     // %4
43815449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com    : "r"(&kUVToRB),  // %5
43915449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com      "r"(&kUVToG)    // %6
44015449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com    : "cc", "memory", "q0", "q1", "q2", "q3",
44115449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
44215449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com  );
44315449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com}
44415449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com
44511c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com#define ARGBTOARGB1555                                                         \
44611c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vshr.u8    q10, q10, #3                   \n"  /* B                    */ \
44711c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vshr.u8    d22, d22, #3                   \n"  /* R                    */ \
44811c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vshr.u8    d23, d23, #7                   \n"  /* A                    */ \
44911c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vmovl.u8   q8, d20                        \n"  /* B                    */ \
45011c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vmovl.u8   q9, d21                        \n"  /* G                    */ \
45111c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vmovl.u8   q10, d22                       \n"  /* R                    */ \
45211c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vmovl.u8   q11, d23                       \n"  /* A                    */ \
45311c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vshl.u16   q9, q9, #5                     \n"  /* G                    */ \
45411c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vshl.u16   q10, q10, #10                  \n"  /* R                    */ \
45511c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vshl.u16   q11, q11, #15                  \n"  /* A                    */ \
45611c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vorr       q0, q8, q9                     \n"  /* BG                   */ \
45711c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vorr       q1, q10, q11                   \n"  /* RA                   */ \
45811c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vorr       q0, q0, q1                     \n"  /* BGRA                 */
45911c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com
46011c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.comvoid I422ToARGB1555Row_NEON(const uint8* src_y,
461834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com                            const uint8* src_u,
462834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com                            const uint8* src_v,
463834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com                            uint8* dst_argb1555,
464834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com                            int width) {
46511c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com  asm volatile (
4660bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(5)
4672c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d24}, [%5]                    \n"
4680bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(6)
4692c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d25}, [%6]                    \n"
47011c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vmov.u8    d26, #128                      \n"
47111c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vmov.u16   q14, #74                       \n"
47211c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vmov.u16   q15, #16                       \n"
473c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
47411c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com  "1:                                          \n"
47511c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    READYUV422
47611c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    YUV422TORGB
47711c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "subs       %4, %4, #8                     \n"
47811c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vmov.u8    d23, #255                      \n"
47911c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    ARGBTOARGB1555
4800bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(3)
48111c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vst1.8     {q0}, [%3]!                    \n"  // store 8 pixels ARGB1555.
48211c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "bgt        1b                             \n"
48311c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    : "+r"(src_y),    // %0
48411c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com      "+r"(src_u),    // %1
48511c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com      "+r"(src_v),    // %2
48611c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com      "+r"(dst_argb1555),  // %3
48711c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com      "+r"(width)     // %4
48811c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    : "r"(&kUVToRB),  // %5
48911c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com      "r"(&kUVToG)    // %6
49011c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    : "cc", "memory", "q0", "q1", "q2", "q3",
49111c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
49211c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com  );
49311c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com}
49411c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com
49511c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com#define ARGBTOARGB4444                                                         \
49611c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vshr.u8    d20, d20, #4                   \n"  /* B                    */ \
49711c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vbic.32    d21, d21, d4                   \n"  /* G                    */ \
49811c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vshr.u8    d22, d22, #4                   \n"  /* R                    */ \
49911c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vbic.32    d23, d23, d4                   \n"  /* A                    */ \
50011c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vorr       d0, d20, d21                   \n"  /* BG                   */ \
50111c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vorr       d1, d22, d23                   \n"  /* RA                   */ \
50211c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vzip.u8    d0, d1                         \n"  /* BGRA                 */
50311c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com
50411c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.comvoid I422ToARGB4444Row_NEON(const uint8* src_y,
505834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com                            const uint8* src_u,
506834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com                            const uint8* src_v,
507834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com                            uint8* dst_argb4444,
508834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com                            int width) {
50911c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com  asm volatile (
5100bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(5)
5112c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d24}, [%5]                    \n"
5120bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(6)
5132c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d25}, [%6]                    \n"
51411c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vmov.u8    d26, #128                      \n"
51511c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vmov.u16   q14, #74                       \n"
51611c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vmov.u16   q15, #16                       \n"
51711c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vmov.u8    d4, #0x0f                      \n"  // bits to clear with vbic.
518c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
51911c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com  "1:                                          \n"
52011c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    READYUV422
52111c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    YUV422TORGB
52211c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "subs       %4, %4, #8                     \n"
52311c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vmov.u8    d23, #255                      \n"
52411c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    ARGBTOARGB4444
5250bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(3)
52611c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vst1.8     {q0}, [%3]!                    \n"  // store 8 pixels ARGB4444.
52711c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "bgt        1b                             \n"
52811c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    : "+r"(src_y),    // %0
52911c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com      "+r"(src_u),    // %1
53011c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com      "+r"(src_v),    // %2
53111c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com      "+r"(dst_argb4444),  // %3
53211c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com      "+r"(width)     // %4
53311c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    : "r"(&kUVToRB),  // %5
53411c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com      "r"(&kUVToG)    // %6
53511c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    : "cc", "memory", "q0", "q1", "q2", "q3",
53611c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
53711c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com  );
53811c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com}
53911c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com
54000b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.comvoid YToARGBRow_NEON(const uint8* src_y,
54100b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com                     uint8* dst_argb,
54200b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com                     int width) {
54300b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com  asm volatile (
5440bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(3)
5450bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(3)
5462c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d24}, [%3]                    \n"
5470bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(4)
5482c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d25}, [%4]                    \n"
54900b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com    "vmov.u8    d26, #128                      \n"
55000b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com    "vmov.u16   q14, #74                       \n"
55100b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com    "vmov.u16   q15, #16                       \n"
552c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
55300b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com  "1:                                          \n"
55400b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com    READYUV400
55500b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com    YUV422TORGB
55600b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com    "subs       %2, %2, #8                     \n"
55700b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com    "vmov.u8    d23, #255                      \n"
5580bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
55900b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com    "vst4.8     {d20, d21, d22, d23}, [%1]!    \n"
56000b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com    "bgt        1b                             \n"
56100b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com    : "+r"(src_y),     // %0
56200b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com      "+r"(dst_argb),  // %1
56300b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com      "+r"(width)      // %2
56400b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com    : "r"(&kUVToRB),   // %3
56500b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com      "r"(&kUVToG)     // %4
56600b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com    : "cc", "memory", "q0", "q1", "q2", "q3",
56700b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
56800b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com  );
56900b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com}
57000b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com
57100b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.comvoid I400ToARGBRow_NEON(const uint8* src_y,
57200b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com                        uint8* dst_argb,
57300b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com                        int width) {
57400b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com  asm volatile (
575c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
57600b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com    "vmov.u8    d23, #255                      \n"
57700b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com  "1:                                          \n"
5780bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
5792c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d20}, [%0]!                   \n"
58000b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com    "vmov       d21, d20                       \n"
58100b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com    "vmov       d22, d20                       \n"
58200b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com    "subs       %2, %2, #8                     \n"
5830bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
58400b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com    "vst4.8     {d20, d21, d22, d23}, [%1]!    \n"
58500b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com    "bgt        1b                             \n"
58600b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com    : "+r"(src_y),     // %0
58700b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com      "+r"(dst_argb),  // %1
58800b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com      "+r"(width)      // %2
58900b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com    :
59000b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com    : "cc", "memory", "d20", "d21", "d22", "d23"
59100b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com  );
59200b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com}
59300b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com
5949de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.comvoid NV12ToARGBRow_NEON(const uint8* src_y,
5959de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                        const uint8* src_uv,
5969de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                        uint8* dst_argb,
5974807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com                        int width) {
5984807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com  asm volatile (
5990bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(4)
6002c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d24}, [%4]                    \n"
6010bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(5)
6022c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d25}, [%5]                    \n"
6034807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "vmov.u8    d26, #128                      \n"
6044807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "vmov.u16   q14, #74                       \n"
6054807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "vmov.u16   q15, #16                       \n"
606c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
6074807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com  "1:                                          \n"
6084807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    READNV12
6094807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    YUV422TORGB
6104807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "subs       %3, %3, #8                     \n"
6114807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "vmov.u8    d23, #255                      \n"
6120bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(2)
6134807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "vst4.8     {d20, d21, d22, d23}, [%2]!    \n"
6144807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "bgt        1b                             \n"
6159de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    : "+r"(src_y),     // %0
6169de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(src_uv),    // %1
6179de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(dst_argb),  // %2
6189de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(width)      // %3
6199de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    : "r"(&kUVToRB),   // %4
6209de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "r"(&kUVToG)     // %5
62164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    : "cc", "memory", "q0", "q1", "q2", "q3",
62264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
6234807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com  );
6244807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com}
6254807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com
6269de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.comvoid NV21ToARGBRow_NEON(const uint8* src_y,
6279de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                        const uint8* src_uv,
6289de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                        uint8* dst_argb,
6294807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com                        int width) {
6304807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com  asm volatile (
6310bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(4)
6322c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d24}, [%4]                    \n"
6330bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(5)
6342c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d25}, [%5]                    \n"
6354807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "vmov.u8    d26, #128                      \n"
6364807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "vmov.u16   q14, #74                       \n"
6374807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "vmov.u16   q15, #16                       \n"
638c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
6394807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com  "1:                                          \n"
6404807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    READNV21
6414807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    YUV422TORGB
6424807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "subs       %3, %3, #8                     \n"
6434807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "vmov.u8    d23, #255                      \n"
6440bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(2)
6454807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "vst4.8     {d20, d21, d22, d23}, [%2]!    \n"
6464807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "bgt        1b                             \n"
6479de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    : "+r"(src_y),     // %0
6489de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(src_uv),    // %1
6499de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(dst_argb),  // %2
6509de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(width)      // %3
6519de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    : "r"(&kUVToRB),   // %4
6529de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "r"(&kUVToG)     // %5
65364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    : "cc", "memory", "q0", "q1", "q2", "q3",
65464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
6554807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com  );
6564807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com}
6574807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com
6589f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.comvoid NV12ToRGB565Row_NEON(const uint8* src_y,
659bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com                          const uint8* src_uv,
660bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com                          uint8* dst_rgb565,
661bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com                          int width) {
6629f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com  asm volatile (
6630bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(4)
6642c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d24}, [%4]                    \n"
6650bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(5)
6662c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d25}, [%5]                    \n"
6679f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    "vmov.u8    d26, #128                      \n"
6689f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    "vmov.u16   q14, #74                       \n"
6699f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    "vmov.u16   q15, #16                       \n"
670c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
6719f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com  "1:                                          \n"
6729f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    READNV12
6739f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    YUV422TORGB
6749f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    "subs       %3, %3, #8                     \n"
6759f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    ARGBTORGB565
6760bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(2)
6779f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    "vst1.8     {q0}, [%2]!                    \n"  // store 8 pixels RGB565.
6789f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    "bgt        1b                             \n"
6799f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    : "+r"(src_y),     // %0
6809f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com      "+r"(src_uv),    // %1
6819f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com      "+r"(dst_rgb565),  // %2
6829f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com      "+r"(width)      // %3
6839f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    : "r"(&kUVToRB),   // %4
6849f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com      "r"(&kUVToG)     // %5
6859f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    : "cc", "memory", "q0", "q1", "q2", "q3",
6869f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
6879f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com  );
6889f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com}
6899f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com
6909f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.comvoid NV21ToRGB565Row_NEON(const uint8* src_y,
691bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com                          const uint8* src_uv,
692bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com                          uint8* dst_rgb565,
693bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com                          int width) {
6949f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com  asm volatile (
6950bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(4)
6962c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d24}, [%4]                    \n"
6970bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(5)
6982c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d25}, [%5]                    \n"
6999f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    "vmov.u8    d26, #128                      \n"
7009f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    "vmov.u16   q14, #74                       \n"
7019f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    "vmov.u16   q15, #16                       \n"
702c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
7039f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com  "1:                                          \n"
7049f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    READNV21
7059f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    YUV422TORGB
7069f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    "subs       %3, %3, #8                     \n"
7079f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    ARGBTORGB565
7080bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(2)
7099f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    "vst1.8     {q0}, [%2]!                    \n"  // store 8 pixels RGB565.
7109f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    "bgt        1b                             \n"
7119f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    : "+r"(src_y),     // %0
7129f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com      "+r"(src_uv),    // %1
7139f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com      "+r"(dst_rgb565),  // %2
7149f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com      "+r"(width)      // %3
7159f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    : "r"(&kUVToRB),   // %4
7169f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com      "r"(&kUVToG)     // %5
7179f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    : "cc", "memory", "q0", "q1", "q2", "q3",
7189f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
7199f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com  );
7209f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com}
7219f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com
722793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.comvoid YUY2ToARGBRow_NEON(const uint8* src_yuy2,
723793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com                        uint8* dst_argb,
724793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com                        int width) {
725793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com  asm volatile (
7260bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(3)
7272c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d24}, [%3]                    \n"
7280bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(4)
7292c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d25}, [%4]                    \n"
730793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "vmov.u8    d26, #128                      \n"
731793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "vmov.u16   q14, #74                       \n"
732793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "vmov.u16   q15, #16                       \n"
733c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
734793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com  "1:                                          \n"
735793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    READYUY2
736793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    YUV422TORGB
737793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "subs       %2, %2, #8                     \n"
738793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "vmov.u8    d23, #255                      \n"
7390bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
740793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "vst4.8     {d20, d21, d22, d23}, [%1]!    \n"
741793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "bgt        1b                             \n"
742793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    : "+r"(src_yuy2),  // %0
743793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com      "+r"(dst_argb),  // %1
744793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com      "+r"(width)      // %2
745793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    : "r"(&kUVToRB),   // %3
746793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com      "r"(&kUVToG)     // %4
747793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    : "cc", "memory", "q0", "q1", "q2", "q3",
748793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
749793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com  );
750793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com}
751793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com
752793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.comvoid UYVYToARGBRow_NEON(const uint8* src_uyvy,
753793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com                        uint8* dst_argb,
754793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com                        int width) {
755793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com  asm volatile (
7560bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(3)
7572c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d24}, [%3]                    \n"
7580bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(4)
7592c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d25}, [%4]                    \n"
760793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "vmov.u8    d26, #128                      \n"
761793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "vmov.u16   q14, #74                       \n"
762793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "vmov.u16   q15, #16                       \n"
763c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
764793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com  "1:                                          \n"
765793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    READUYVY
766793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    YUV422TORGB
767793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "subs       %2, %2, #8                     \n"
768793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "vmov.u8    d23, #255                      \n"
7690bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
770793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "vst4.8     {d20, d21, d22, d23}, [%1]!    \n"
771793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "bgt        1b                             \n"
772793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    : "+r"(src_uyvy),  // %0
773793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com      "+r"(dst_argb),  // %1
774793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com      "+r"(width)      // %2
775793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    : "r"(&kUVToRB),   // %3
776793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com      "r"(&kUVToG)     // %4
777793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    : "cc", "memory", "q0", "q1", "q2", "q3",
778793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
779793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com  );
780793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com}
781793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com
7824a86a836fcde981b6c3fd3f4a216a3253a2d26bcfbarchard@google.com// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v.
783f08ac6bb095348565b5259f2fab95f259ef47edefbarchard@google.comvoid SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
784f08ac6bb095348565b5259f2fab95f259ef47edefbarchard@google.com                     int width) {
7855b22506b14367f38c1e31d1aab4dc1132158737ffbarchard@google.com  asm volatile (
786c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
7872d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com  "1:                                          \n"
7880bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
7892c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld2.8     {q0, q1}, [%0]!                \n"  // load 16 pairs of UV
7904807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "subs       %3, %3, #16                    \n"  // 16 processed per loop
7910bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
7922c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {q0}, [%1]!                    \n"  // store U
7930bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(2)
7942c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {q1}, [%2]!                    \n"  // store V
79518184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com    "bgt        1b                             \n"
7968536b2f389dea8f8b7177f4886d995e3315f12e8fbarchard@google.com    : "+r"(src_uv),  // %0
7978536b2f389dea8f8b7177f4886d995e3315f12e8fbarchard@google.com      "+r"(dst_u),   // %1
7988536b2f389dea8f8b7177f4886d995e3315f12e8fbarchard@google.com      "+r"(dst_v),   // %2
79916a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com      "+r"(width)    // %3  // Output registers
8002d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com    :                       // Input registers
8018f506332af217882648eed166a257557855b9fdbfbarchard@google.com    : "cc", "memory", "q0", "q1"  // Clobber List
8022d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com  );
8032d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com}
8042d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com
80562a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com// Reads 16 U's and V's and writes out 16 pairs of UV.
806f08ac6bb095348565b5259f2fab95f259ef47edefbarchard@google.comvoid MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
807f08ac6bb095348565b5259f2fab95f259ef47edefbarchard@google.com                     int width) {
80862a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com  asm volatile (
809c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
81062a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com  "1:                                          \n"
8110bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
8122c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {q0}, [%0]!                    \n"  // load U
8130bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
8142c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {q1}, [%1]!                    \n"  // load V
81562a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com    "subs       %3, %3, #16                    \n"  // 16 processed per loop
8160bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(2)
817d26b4514726a9a7476f6dfb6730cda2b422bf550fbarchard@google.com    "vst2.u8    {q0, q1}, [%2]!                \n"  // store 16 pairs of UV
81862a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com    "bgt        1b                             \n"
81962a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com    :
82062a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com      "+r"(src_u),   // %0
82162a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com      "+r"(src_v),   // %1
82262a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com      "+r"(dst_uv),  // %2
82362a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com      "+r"(width)    // %3  // Output registers
82462a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com    :                       // Input registers
8258f506332af217882648eed166a257557855b9fdbfbarchard@google.com    : "cc", "memory", "q0", "q1"  // Clobber List
82662a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com  );
82762a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com}
828834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com
8292c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com// Copy multiple of 32.  vld4.8  allow unaligned and is fastest on a15.
83019932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.comvoid CopyRow_NEON(const uint8* src, uint8* dst, int count) {
8315b22506b14367f38c1e31d1aab4dc1132158737ffbarchard@google.com  asm volatile (
832c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
83319932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com  "1:                                          \n"
8340bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
8352c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 32
83662a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com    "subs       %2, %2, #32                    \n"  // 32 processed per loop
8370bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
8382c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {d0, d1, d2, d3}, [%1]!        \n"  // store 32
83918184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com    "bgt        1b                             \n"
8403e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  : "+r"(src),   // %0
8413e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "+r"(dst),   // %1
8423e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "+r"(count)  // %2  // Output registers
8433e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  :                     // Input registers
8448f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1"  // Clobber List
84519932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com  );
84619932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com}
84719932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com
84864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com// SetRow8 writes 'count' bytes using a 32 bit value repeated.
849f08ac6bb095348565b5259f2fab95f259ef47edefbarchard@google.comvoid SetRow_NEON(uint8* dst, uint32 v32, int count) {
8503e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  asm volatile (
8514807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "vdup.u32  q0, %2                          \n"  // duplicate 4 ints
8524807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "1:                                        \n"
8534807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "subs      %1, %1, #16                     \n"  // 16 bytes per loop
8540bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
8552c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8    {q0}, [%0]!                     \n"  // store
8564807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "bgt       1b                              \n"
8573e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  : "+r"(dst),   // %0
8583e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "+r"(count)  // %1
8593e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  : "r"(v32)     // %2
8608f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0"
8613e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  );
8624807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com}
8634807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com
8644807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com// TODO(fbarchard): Make fully assembler
86564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com// SetRow32 writes 'count' words using a 32 bit value repeated.
866f08ac6bb095348565b5259f2fab95f259ef47edefbarchard@google.comvoid ARGBSetRows_NEON(uint8* dst, uint32 v32, int width,
8671b40d8caa2811759aa5fa87f2e23061d26f8968cfbarchard@google.com                      int dst_stride, int height) {
8684807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com  for (int y = 0; y < height; ++y) {
869f08ac6bb095348565b5259f2fab95f259ef47edefbarchard@google.com    SetRow_NEON(dst, v32, width << 2);
8704807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    dst += dst_stride;
8714807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com  }
8724807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com}
8734807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com
87416a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.comvoid MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
8755b22506b14367f38c1e31d1aab4dc1132158737ffbarchard@google.com  asm volatile (
8763e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    // Start at end of source row.
8773e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "mov        r3, #-16                       \n"
8783e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "add        %0, %0, %2                     \n"
8793e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "sub        %0, #16                        \n"
8803e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com
881c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
88282069e7a7108a956d0d78f7e923851925b2827d4fbarchard@google.com  "1:                                          \n"
8830bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
8843e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "vld1.8     {q0}, [%0], r3                 \n"  // src -= 16
8853e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "subs       %2, #16                        \n"  // 16 pixels per loop.
8863e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "vrev64.8   q0, q0                         \n"
8870bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
8883e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "vst1.8     {d1}, [%1]!                    \n"  // dst += 16
8890bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
8903e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "vst1.8     {d0}, [%1]!                    \n"
8913e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "bgt        1b                             \n"
8923e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  : "+r"(src),   // %0
8933e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "+r"(dst),   // %1
8943e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "+r"(width)  // %2
8953e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  :
8968f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "r3", "q0"
89716a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com  );
89816a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com}
89916a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com
900752cb9e057c8c36a251810e57f98f195196fedc6fbarchard@google.comvoid MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
901752cb9e057c8c36a251810e57f98f195196fedc6fbarchard@google.com                      int width) {
9025b22506b14367f38c1e31d1aab4dc1132158737ffbarchard@google.com  asm volatile (
9033e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    // Start at end of source row.
904752cb9e057c8c36a251810e57f98f195196fedc6fbarchard@google.com    "mov        r12, #-16                      \n"
9053e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "add        %0, %0, %3, lsl #1             \n"
9063e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "sub        %0, #16                        \n"
9073e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com
908c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
90982069e7a7108a956d0d78f7e923851925b2827d4fbarchard@google.com  "1:                                          \n"
9100bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
911752cb9e057c8c36a251810e57f98f195196fedc6fbarchard@google.com    "vld2.8     {d0, d1}, [%0], r12            \n"  // src -= 16
9123e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "subs       %3, #8                         \n"  // 8 pixels per loop.
9133e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "vrev64.8   q0, q0                         \n"
9140bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
9153e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "vst1.8     {d0}, [%1]!                    \n"  // dst += 8
9160bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(2)
9173e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "vst1.8     {d1}, [%2]!                    \n"
9183e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "bgt        1b                             \n"
9193e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  : "+r"(src_uv),  // %0
9203e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "+r"(dst_u),   // %1
9213e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "+r"(dst_v),   // %2
9223e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "+r"(width)    // %3
9233e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  :
9248f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "r12", "q0"
92516a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com  );
92616a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com}
9273e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com
9283e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.comvoid ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) {
9293e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  asm volatile (
9303e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    // Start at end of source row.
9313e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "mov        r3, #-16                       \n"
9323e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "add        %0, %0, %2, lsl #2             \n"
9333e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "sub        %0, #16                        \n"
9343e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com
935c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
9363e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  "1:                                          \n"
9370bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
9383e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "vld1.8     {q0}, [%0], r3                 \n"  // src -= 16
9393e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "subs       %2, #4                         \n"  // 4 pixels per loop.
9403e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "vrev64.32  q0, q0                         \n"
9410bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
9423e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "vst1.8     {d1}, [%1]!                    \n"  // dst += 16
9430bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
9443e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "vst1.8     {d0}, [%1]!                    \n"
9453e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "bgt        1b                             \n"
9463e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  : "+r"(src),   // %0
9473e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "+r"(dst),   // %1
9483e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "+r"(width)  // %2
9493e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  :
9508f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "r3", "q0"
9513e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  );
9523e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com}
95316a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com
954797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.comvoid RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix) {
955797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com  asm volatile (
956275913410d879df91edd1648dfee19207870264bfbarchard@google.com    "vmov.u8    d4, #255                       \n"  // Alpha
957c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
958797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com  "1:                                          \n"
9590bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
960275913410d879df91edd1648dfee19207870264bfbarchard@google.com    "vld3.8     {d1, d2, d3}, [%0]!            \n"  // load 8 pixels of RGB24.
96182069e7a7108a956d0d78f7e923851925b2827d4fbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
9620bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
9634807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "vst4.8     {d1, d2, d3, d4}, [%1]!        \n"  // store 8 pixels of ARGB.
964797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com    "bgt        1b                             \n"
965797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com  : "+r"(src_rgb24),  // %0
966dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "+r"(dst_argb),   // %1
967dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "+r"(pix)         // %2
968797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com  :
9698f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "d1", "d2", "d3", "d4"  // Clobber List
970797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com  );
971797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com}
972797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com
973797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.comvoid RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int pix) {
974797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com  asm volatile (
975275913410d879df91edd1648dfee19207870264bfbarchard@google.com    "vmov.u8    d4, #255                       \n"  // Alpha
976c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
977797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com  "1:                                          \n"
9780bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
979275913410d879df91edd1648dfee19207870264bfbarchard@google.com    "vld3.8     {d1, d2, d3}, [%0]!            \n"  // load 8 pixels of RAW.
9804807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
981275913410d879df91edd1648dfee19207870264bfbarchard@google.com    "vswp.u8    d1, d3                         \n"  // swap R, B
9820bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
983275913410d879df91edd1648dfee19207870264bfbarchard@google.com    "vst4.8     {d1, d2, d3, d4}, [%1]!        \n"  // store 8 pixels of ARGB.
984797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com    "bgt        1b                             \n"
985dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  : "+r"(src_raw),   // %0
986797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com    "+r"(dst_argb),  // %1
987797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com    "+r"(pix)        // %2
988797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com  :
9898f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "d1", "d2", "d3", "d4"  // Clobber List
990797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com  );
991797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com}
992797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com
993bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com#define RGB565TOARGB                                                           \
994f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vshrn.u16  d6, q0, #5                     \n"  /* G xxGGGGGG           */ \
995f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vuzp.u8    d0, d1                         \n"  /* d0 xxxBBBBB RRRRRxxx */ \
996f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vshl.u8    d6, d6, #2                     \n"  /* G GGGGGG00 upper 6   */ \
997f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vshr.u8    d1, d1, #3                     \n"  /* R 000RRRRR lower 5   */ \
998f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vshl.u8    q0, q0, #3                     \n"  /* B,R BBBBB000 upper 5 */ \
999f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vshr.u8    q2, q0, #5                     \n"  /* B,R 00000BBB lower 3 */ \
1000f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vorr.u8    d0, d0, d4                     \n"  /* B                    */ \
1001f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vshr.u8    d4, d6, #6                     \n"  /* G 000000GG lower 2   */ \
1002f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vorr.u8    d2, d1, d5                     \n"  /* R                    */ \
1003f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vorr.u8    d1, d4, d6                     \n"  /* G                    */
1004bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com
1005bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.comvoid RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int pix) {
1006bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  asm volatile (
1007bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d3, #255                       \n"  // Alpha
1008c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
1009bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  "1:                                          \n"
10100bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
10114b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vld1.8     {q0}, [%0]!                    \n"  // load 8 RGB565 pixels.
1012bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
1013bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    RGB565TOARGB
10140bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
1015bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vst4.8     {d0, d1, d2, d3}, [%1]!        \n"  // store 8 pixels of ARGB.
1016bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "bgt        1b                             \n"
1017bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  : "+r"(src_rgb565),  // %0
1018bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "+r"(dst_argb),    // %1
1019bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "+r"(pix)          // %2
1020bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  :
10218f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3"  // Clobber List
1022bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  );
1023bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com}
1024bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com
10254b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com#define ARGB1555TOARGB                                                         \
10264b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vshrn.u16  d7, q0, #8                     \n"  /* A Arrrrrxx           */ \
10274b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vshr.u8    d6, d7, #2                     \n"  /* R xxxRRRRR           */ \
10284b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vshrn.u16  d5, q0, #5                     \n"  /* G xxxGGGGG           */ \
10294b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vmovn.u16  d4, q0                         \n"  /* B xxxBBBBB           */ \
10304b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vshr.u8    d7, d7, #7                     \n"  /* A 0000000A           */ \
10314b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vneg.s8    d7, d7                         \n"  /* A AAAAAAAA upper 8   */ \
10324b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vshl.u8    d6, d6, #3                     \n"  /* R RRRRR000 upper 5   */ \
10334b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vshr.u8    q1, q3, #5                     \n"  /* R,A 00000RRR lower 3 */ \
10344b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vshl.u8    q0, q2, #3                     \n"  /* B,G BBBBB000 upper 5 */ \
10354b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vshr.u8    q2, q0, #5                     \n"  /* B,G 00000BBB lower 3 */ \
10364b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vorr.u8    q1, q1, q3                     \n"  /* R,A                  */ \
10374b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vorr.u8    q0, q0, q2                     \n"  /* B,G                  */ \
10384b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com
1039522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com// RGB555TOARGB is same as ARGB1555TOARGB but ignores alpha.
1040522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com#define RGB555TOARGB                                                           \
1041522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vshrn.u16  d6, q0, #5                     \n"  /* G xxxGGGGG           */ \
1042522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vuzp.u8    d0, d1                         \n"  /* d0 xxxBBBBB xRRRRRxx */ \
1043522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vshl.u8    d6, d6, #3                     \n"  /* G GGGGG000 upper 5   */ \
1044522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vshr.u8    d1, d1, #2                     \n"  /* R 00xRRRRR lower 5   */ \
1045522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vshl.u8    q0, q0, #3                     \n"  /* B,R BBBBB000 upper 5 */ \
1046522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vshr.u8    q2, q0, #5                     \n"  /* B,R 00000BBB lower 3 */ \
1047522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vorr.u8    d0, d0, d4                     \n"  /* B                    */ \
1048522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vshr.u8    d4, d6, #5                     \n"  /* G 00000GGG lower 3   */ \
1049522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vorr.u8    d2, d1, d5                     \n"  /* R                    */ \
1050522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vorr.u8    d1, d4, d6                     \n"  /* G                    */
1051522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com
10524b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.comvoid ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb,
10534b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com                            int pix) {
10544b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com  asm volatile (
10554b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vmov.u8    d3, #255                       \n"  // Alpha
1056c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
10574b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com  "1:                                          \n"
10580bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
10594b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vld1.8     {q0}, [%0]!                    \n"  // load 8 ARGB1555 pixels.
10604b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
10614b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    ARGB1555TOARGB
10620bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
10634b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vst4.8     {d0, d1, d2, d3}, [%1]!        \n"  // store 8 pixels of ARGB.
10644b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "bgt        1b                             \n"
10654b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com  : "+r"(src_argb1555),  // %0
10664b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "+r"(dst_argb),    // %1
10674b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "+r"(pix)          // %2
10684b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com  :
10698f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3"  // Clobber List
10704b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com  );
10714b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com}
10724b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com
10734b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com#define ARGB4444TOARGB                                                         \
10744b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vuzp.u8    d0, d1                         \n"  /* d0 BG, d1 RA         */ \
10754b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vshl.u8    q2, q0, #4                     \n"  /* B,R BBBB0000         */ \
10764b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vshr.u8    q1, q0, #4                     \n"  /* G,A 0000GGGG         */ \
10774b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vshr.u8    q0, q2, #4                     \n"  /* B,R 0000BBBB         */ \
10784b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vorr.u8    q0, q0, q2                     \n"  /* B,R BBBBBBBB         */ \
10794b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vshl.u8    q2, q1, #4                     \n"  /* G,A GGGG0000         */ \
10804b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vorr.u8    q1, q1, q2                     \n"  /* G,A GGGGGGGG         */ \
10814b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vswp.u8    d1, d2                         \n"  /* B,R,G,A -> B,G,R,A   */
10824b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com
10834b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.comvoid ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb,
10844b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com                            int pix) {
10854b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com  asm volatile (
10864b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vmov.u8    d3, #255                       \n"  // Alpha
1087c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
10884b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com  "1:                                          \n"
10890bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
10904b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vld1.8     {q0}, [%0]!                    \n"  // load 8 ARGB4444 pixels.
10914b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
10924b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    ARGB4444TOARGB
10930bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
10944b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vst4.8     {d0, d1, d2, d3}, [%1]!        \n"  // store 8 pixels of ARGB.
10954b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "bgt        1b                             \n"
10964b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com  : "+r"(src_argb4444),  // %0
10974b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "+r"(dst_argb),    // %1
10984b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "+r"(pix)          // %2
10994b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com  :
11008f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1", "q2"  // Clobber List
11014b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com  );
11024b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com}
11034b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com
110464961c01b200a77b4af9629bf1215358ec056f0afbarchard@google.comvoid ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb24, int pix) {
110564961c01b200a77b4af9629bf1215358ec056f0afbarchard@google.com  asm volatile (
1106c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
110764961c01b200a77b4af9629bf1215358ec056f0afbarchard@google.com  "1:                                          \n"
11080bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
1109275913410d879df91edd1648dfee19207870264bfbarchard@google.com    "vld4.8     {d1, d2, d3, d4}, [%0]!        \n"  // load 8 pixels of ARGB.
111082069e7a7108a956d0d78f7e923851925b2827d4fbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
11110bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
11124807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "vst3.8     {d1, d2, d3}, [%1]!            \n"  // store 8 pixels of RGB24.
111364961c01b200a77b4af9629bf1215358ec056f0afbarchard@google.com    "bgt        1b                             \n"
111464961c01b200a77b4af9629bf1215358ec056f0afbarchard@google.com  : "+r"(src_argb),   // %0
111564961c01b200a77b4af9629bf1215358ec056f0afbarchard@google.com    "+r"(dst_rgb24),  // %1
111664961c01b200a77b4af9629bf1215358ec056f0afbarchard@google.com    "+r"(pix)         // %2
111764961c01b200a77b4af9629bf1215358ec056f0afbarchard@google.com  :
11188f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "d1", "d2", "d3", "d4"  // Clobber List
111964961c01b200a77b4af9629bf1215358ec056f0afbarchard@google.com  );
112064961c01b200a77b4af9629bf1215358ec056f0afbarchard@google.com}
112164961c01b200a77b4af9629bf1215358ec056f0afbarchard@google.com
11225808cb22ce60bf963e15bfb1a0958cb362f5efbcfbarchard@google.comvoid ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_raw, int pix) {
11235808cb22ce60bf963e15bfb1a0958cb362f5efbcfbarchard@google.com  asm volatile (
1124c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
11255808cb22ce60bf963e15bfb1a0958cb362f5efbcfbarchard@google.com  "1:                                          \n"
11260bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
1127275913410d879df91edd1648dfee19207870264bfbarchard@google.com    "vld4.8     {d1, d2, d3, d4}, [%0]!        \n"  // load 8 pixels of ARGB.
11284807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
1129275913410d879df91edd1648dfee19207870264bfbarchard@google.com    "vswp.u8    d1, d3                         \n"  // swap R, B
11300bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
1131275913410d879df91edd1648dfee19207870264bfbarchard@google.com    "vst3.8     {d1, d2, d3}, [%1]!            \n"  // store 8 pixels of RAW.
11325808cb22ce60bf963e15bfb1a0958cb362f5efbcfbarchard@google.com    "bgt        1b                             \n"
11335808cb22ce60bf963e15bfb1a0958cb362f5efbcfbarchard@google.com  : "+r"(src_argb),  // %0
11345808cb22ce60bf963e15bfb1a0958cb362f5efbcfbarchard@google.com    "+r"(dst_raw),   // %1
11355808cb22ce60bf963e15bfb1a0958cb362f5efbcfbarchard@google.com    "+r"(pix)        // %2
11365808cb22ce60bf963e15bfb1a0958cb362f5efbcfbarchard@google.com  :
11378f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "d1", "d2", "d3", "d4"  // Clobber List
11385808cb22ce60bf963e15bfb1a0958cb362f5efbcfbarchard@google.com  );
11395808cb22ce60bf963e15bfb1a0958cb362f5efbcfbarchard@google.com}
11405808cb22ce60bf963e15bfb1a0958cb362f5efbcfbarchard@google.com
1141dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.comvoid YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int pix) {
1142dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  asm volatile (
1143c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
1144dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  "1:                                          \n"
11450bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
11462c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld2.8     {q0, q1}, [%0]!                \n"  // load 16 pixels of YUY2.
114782069e7a7108a956d0d78f7e923851925b2827d4fbarchard@google.com    "subs       %2, %2, #16                    \n"  // 16 processed per loop.
11480bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
11492c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {q0}, [%1]!                    \n"  // store 16 pixels of Y.
1150dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "bgt        1b                             \n"
1151dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  : "+r"(src_yuy2),  // %0
1152dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "+r"(dst_y),     // %1
1153dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "+r"(pix)        // %2
1154dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  :
11558f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1"  // Clobber List
1156dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  );
1157dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com}
1158dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com
1159dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.comvoid UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int pix) {
1160dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  asm volatile (
1161c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
1162dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  "1:                                          \n"
11630bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
11642c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld2.8     {q0, q1}, [%0]!                \n"  // load 16 pixels of UYVY.
116582069e7a7108a956d0d78f7e923851925b2827d4fbarchard@google.com    "subs       %2, %2, #16                    \n"  // 16 processed per loop.
11660bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
11672c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {q1}, [%1]!                    \n"  // store 16 pixels of Y.
1168dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "bgt        1b                             \n"
1169dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  : "+r"(src_uyvy),  // %0
1170dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "+r"(dst_y),     // %1
1171dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "+r"(pix)        // %2
1172dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  :
11738f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1"  // Clobber List
1174dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  );
1175dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com}
1176dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com
1177dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.comvoid YUY2ToUV422Row_NEON(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v,
1178dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com                         int pix) {
1179dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  asm volatile (
1180c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
1181dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  "1:                                          \n"
11820bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
1183dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 16 pixels of YUY2.
11844807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "subs       %3, %3, #16                    \n"  // 16 pixels = 8 UVs.
11850bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
11862c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {d1}, [%1]!                    \n"  // store 8 U.
11870bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(2)
11882c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {d3}, [%2]!                    \n"  // store 8 V.
1189dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "bgt        1b                             \n"
1190dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  : "+r"(src_yuy2),  // %0
1191dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "+r"(dst_u),     // %1
1192dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "+r"(dst_v),     // %2
1193dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "+r"(pix)        // %3
1194dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  :
11958f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "d0", "d1", "d2", "d3"  // Clobber List
1196dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  );
1197dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com}
1198dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com
1199dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.comvoid UYVYToUV422Row_NEON(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v,
1200dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com                         int pix) {
1201dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  asm volatile (
1202c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
1203dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  "1:                                          \n"
12040bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
1205dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 16 pixels of UYVY.
12064807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "subs       %3, %3, #16                    \n"  // 16 pixels = 8 UVs.
12070bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
12082c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {d0}, [%1]!                    \n"  // store 8 U.
12090bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(2)
12102c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {d2}, [%2]!                    \n"  // store 8 V.
1211dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "bgt        1b                             \n"
1212dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  : "+r"(src_uyvy),  // %0
1213dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "+r"(dst_u),     // %1
1214dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "+r"(dst_v),     // %2
1215dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "+r"(pix)        // %3
1216dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  :
12178f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "d0", "d1", "d2", "d3"  // Clobber List
1218dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  );
1219dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com}
1220dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com
1221dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.comvoid YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2,
1222dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com                      uint8* dst_u, uint8* dst_v, int pix) {
1223dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  asm volatile (
1224dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com    "add        %1, %0, %1                     \n"  // stride + src_yuy2
1225c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
1226dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  "1:                                          \n"
12270bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
1228dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 16 pixels of YUY2.
12294807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "subs       %4, %4, #16                    \n"  // 16 pixels = 8 UVs.
12300bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
1231dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "vld4.8     {d4, d5, d6, d7}, [%1]!        \n"  // load next row YUY2.
1232dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "vrhadd.u8  d1, d1, d5                     \n"  // average rows of U
1233dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "vrhadd.u8  d3, d3, d7                     \n"  // average rows of V
12340bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(2)
12352c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {d1}, [%2]!                    \n"  // store 8 U.
12360bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(3)
12372c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {d3}, [%3]!                    \n"  // store 8 V.
1238dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "bgt        1b                             \n"
1239cc206dee46f09369d5b4dad79ec662fd4289959cfbarchard@google.com  : "+r"(src_yuy2),     // %0
1240dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "+r"(stride_yuy2),  // %1
1241cc206dee46f09369d5b4dad79ec662fd4289959cfbarchard@google.com    "+r"(dst_u),        // %2
1242cc206dee46f09369d5b4dad79ec662fd4289959cfbarchard@google.com    "+r"(dst_v),        // %3
1243cc206dee46f09369d5b4dad79ec662fd4289959cfbarchard@google.com    "+r"(pix)           // %4
1244dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  :
12458f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7"  // Clobber List
1246dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  );
1247dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com}
1248dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com
1249dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.comvoid UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy,
1250dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com                      uint8* dst_u, uint8* dst_v, int pix) {
1251dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  asm volatile (
1252dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com    "add        %1, %0, %1                     \n"  // stride + src_uyvy
1253c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
1254dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  "1:                                          \n"
12550bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
1256dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 16 pixels of UYVY.
12574807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "subs       %4, %4, #16                    \n"  // 16 pixels = 8 UVs.
12580bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
1259dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "vld4.8     {d4, d5, d6, d7}, [%1]!        \n"  // load next row UYVY.
1260dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "vrhadd.u8  d0, d0, d4                     \n"  // average rows of U
1261dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "vrhadd.u8  d2, d2, d6                     \n"  // average rows of V
12620bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(2)
12632c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {d0}, [%2]!                    \n"  // store 8 U.
12640bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(3)
12652c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {d2}, [%3]!                    \n"  // store 8 V.
1266dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "bgt        1b                             \n"
1267cc206dee46f09369d5b4dad79ec662fd4289959cfbarchard@google.com  : "+r"(src_uyvy),     // %0
1268dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "+r"(stride_uyvy),  // %1
1269cc206dee46f09369d5b4dad79ec662fd4289959cfbarchard@google.com    "+r"(dst_u),        // %2
1270cc206dee46f09369d5b4dad79ec662fd4289959cfbarchard@google.com    "+r"(dst_v),        // %3
1271cc206dee46f09369d5b4dad79ec662fd4289959cfbarchard@google.com    "+r"(pix)           // %4
1272dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  :
12738f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7"  // Clobber List
1274dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  );
1275dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com}
12764807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com
1277e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.comvoid HalfRow_NEON(const uint8* src_uv, int src_uv_stride,
1278e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com                  uint8* dst_uv, int pix) {
1279e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com  asm volatile (
1280e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com    // change the stride to row 2 pointer
1281e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com    "add        %1, %0                         \n"
1282f658aebb7c2e072ad5b94ad82b94b8299649bdf2fbarchard@google.com  "1:                                          \n"
12830bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
12842c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {q0}, [%0]!                    \n"  // load row 1 16 pixels.
1285e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com    "subs       %3, %3, #16                    \n"  // 16 processed per loop
12860bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
12872c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {q1}, [%1]!                    \n"  // load row 2 16 pixels.
1288e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com    "vrhadd.u8  q0, q1                         \n"  // average row 1 and 2
12890bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(2)
12902c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {q0}, [%2]!                    \n"
1291e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com    "bgt        1b                             \n"
12923e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  : "+r"(src_uv),         // %0
12933e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "+r"(src_uv_stride),  // %1
12943e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "+r"(dst_uv),         // %2
12953e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "+r"(pix)             // %3
12963e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  :
12978f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1"  // Clobber List
12983e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  );
1299e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com}
1300e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com
13018d37dd5c205216e0ad13c5091061908cb981c5f9fbarchard@google.com// Select 2 channels from ARGB on alternating pixels.  e.g.  BGBGBGBG
13021096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.comvoid ARGBToBayerRow_NEON(const uint8* src_argb, uint8* dst_bayer,
13031096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com                         uint32 selector, int pix) {
1304f658aebb7c2e072ad5b94ad82b94b8299649bdf2fbarchard@google.com  asm volatile (
1305c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vmov.u32   d6[0], %3                      \n"  // selector
1306f658aebb7c2e072ad5b94ad82b94b8299649bdf2fbarchard@google.com  "1:                                          \n"
13070bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
13082c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {q0, q1}, [%0]!                \n"  // load row 8 pixels.
1309c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop
1310c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vtbl.8     d4, {d0, d1}, d6               \n"  // look up 4 pixels
1311c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vtbl.8     d5, {d2, d3}, d6               \n"  // look up 4 pixels
1312c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vtrn.u32   d4, d5                         \n"  // combine 8 pixels
13130bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
13142c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {d4}, [%1]!                    \n"  // store 8.
1315f658aebb7c2e072ad5b94ad82b94b8299649bdf2fbarchard@google.com    "bgt        1b                             \n"
13164f4c8b544cf0d4c30cca0baaa19d8be29c79ed2ffbarchard@google.com  : "+r"(src_argb),   // %0
13174f4c8b544cf0d4c30cca0baaa19d8be29c79ed2ffbarchard@google.com    "+r"(dst_bayer),  // %1
13184f4c8b544cf0d4c30cca0baaa19d8be29c79ed2ffbarchard@google.com    "+r"(pix)         // %2
13194f4c8b544cf0d4c30cca0baaa19d8be29c79ed2ffbarchard@google.com  : "r"(selector)     // %3
1320c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3"  // Clobber List
13211096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com  );
13221096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com}
13231096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com
132408b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com// Select G channels from ARGB.  e.g.  GGGGGGGG
132508b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.comvoid ARGBToBayerGGRow_NEON(const uint8* src_argb, uint8* dst_bayer,
132608b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com                           uint32 /*selector*/, int pix) {
132708b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com  asm volatile (
132808b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com  "1:                                          \n"
13290bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
133008b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load row 8 pixels.
133108b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop
13320bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
133308b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com    "vst1.8     {d1}, [%1]!                    \n"  // store 8 G's.
133408b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com    "bgt        1b                             \n"
133508b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com  : "+r"(src_argb),   // %0
133608b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com    "+r"(dst_bayer),  // %1
133708b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com    "+r"(pix)         // %2
133808b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com  :
133908b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com  : "cc", "memory", "q0", "q1"  // Clobber List
134008b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com  );
134108b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com}
134208b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com
13431096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
13441096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.comvoid ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb,
13451096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com                         const uint8* shuffler, int pix) {
13461096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com  asm volatile (
13470bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(3)
13482c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {q2}, [%3]                     \n"  // shuffler
13491096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com  "1:                                          \n"
13500bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
13512c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {q0}, [%0]!                    \n"  // load 4 pixels.
13521096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com    "subs       %2, %2, #4                     \n"  // 4 processed per loop
13531096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com    "vtbl.8     d2, {d0, d1}, d4               \n"  // look up 2 first pixels
13541096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com    "vtbl.8     d3, {d0, d1}, d5               \n"  // look up 2 next pixels
13550bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
13562c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {q1}, [%1]!                    \n"  // store 4.
13571096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com    "bgt        1b                             \n"
13581096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com  : "+r"(src_argb),  // %0
13591096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com    "+r"(dst_argb),  // %1
13601096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com    "+r"(pix)        // %2
13611096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com  : "r"(shuffler)    // %3
13621096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com  : "cc", "memory", "q0", "q1", "q2"  // Clobber List
13633e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  );
13648d37dd5c205216e0ad13c5091061908cb981c5f9fbarchard@google.com}
13658d37dd5c205216e0ad13c5091061908cb981c5f9fbarchard@google.com
13669de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.comvoid I422ToYUY2Row_NEON(const uint8* src_y,
13679de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                        const uint8* src_u,
13689de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                        const uint8* src_v,
13699de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                        uint8* dst_yuy2, int width) {
13709de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com  asm volatile (
1371c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
13729de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com  "1:                                          \n"
13730bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
13749de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    "vld2.8     {d0, d2}, [%0]!                \n"  // load 16 Ys
13750bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
13769de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    "vld1.8     {d1}, [%1]!                    \n"  // load 8 Us
13770bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(2)
13789de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    "vld1.8     {d3}, [%2]!                    \n"  // load 8 Vs
13799de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    "subs       %4, %4, #16                    \n"  // 16 pixels
13800bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(3)
13812c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst4.8     {d0, d1, d2, d3}, [%3]!        \n"  // Store 8 YUY2/16 pixels.
13829de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    "bgt        1b                             \n"
13833e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  : "+r"(src_y),     // %0
13843e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "+r"(src_u),     // %1
13853e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "+r"(src_v),     // %2
13863e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "+r"(dst_yuy2),  // %3
13873e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "+r"(width)      // %4
13883e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  :
13893e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  : "cc", "memory", "d0", "d1", "d2", "d3"
13909de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com  );
13919de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com}
13929de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com
13939de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.comvoid I422ToUYVYRow_NEON(const uint8* src_y,
13949de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                        const uint8* src_u,
13959de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                        const uint8* src_v,
13969de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                        uint8* dst_uyvy, int width) {
13979de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com  asm volatile (
1398c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
13999de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com  "1:                                          \n"
14000bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
14019de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    "vld2.8     {d1, d3}, [%0]!                \n"  // load 16 Ys
14020bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
14039de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    "vld1.8     {d0}, [%1]!                    \n"  // load 8 Us
14040bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(2)
14059de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    "vld1.8     {d2}, [%2]!                    \n"  // load 8 Vs
14069de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    "subs       %4, %4, #16                    \n"  // 16 pixels
14070bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(3)
14082c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst4.8     {d0, d1, d2, d3}, [%3]!        \n"  // Store 8 UYVY/16 pixels.
14099de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    "bgt        1b                             \n"
14103e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  : "+r"(src_y),     // %0
14113e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "+r"(src_u),     // %1
14123e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "+r"(src_v),     // %2
14133e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "+r"(dst_uyvy),  // %3
14143e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "+r"(width)      // %4
14153e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  :
14163e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  : "cc", "memory", "d0", "d1", "d2", "d3"
14179de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com  );
14189de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com}
14199de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com
14201bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.comvoid ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb565, int pix) {
14211bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com  asm volatile (
1422c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
14231bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com  "1:                                          \n"
14240bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
142511c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vld4.8     {d20, d21, d22, d23}, [%0]!    \n"  // load 8 pixels of ARGB.
14261bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
142711c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    ARGBTORGB565
14280bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
14291bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com    "vst1.8     {q0}, [%1]!                    \n"  // store 8 pixels RGB565.
14301bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com    "bgt        1b                             \n"
14311bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com  : "+r"(src_argb),  // %0
14321bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com    "+r"(dst_rgb565),  // %1
14331bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com    "+r"(pix)        // %2
14341bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com  :
14358f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q8", "q9", "q10", "q11"
14361bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com  );
14371bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com}
14381bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com
14391bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.comvoid ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_argb1555,
14401bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com                            int pix) {
14411bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com  asm volatile (
1442c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
14431bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com  "1:                                          \n"
14440bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
144511c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vld4.8     {d20, d21, d22, d23}, [%0]!    \n"  // load 8 pixels of ARGB.
14461bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
144711c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    ARGBTOARGB1555
14480bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
14491bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com    "vst1.8     {q0}, [%1]!                    \n"  // store 8 pixels ARGB1555.
14501bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com    "bgt        1b                             \n"
14511bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com  : "+r"(src_argb),  // %0
14521bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com    "+r"(dst_argb1555),  // %1
14531bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com    "+r"(pix)        // %2
14541bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com  :
14558f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q8", "q9", "q10", "q11"
14561bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com  );
14571bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com}
14581bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com
1459bb6bddc9fb4aea694ef26d7761d9fbcba8f5b6c1fbarchard@google.comvoid ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_argb4444,
1460bb6bddc9fb4aea694ef26d7761d9fbcba8f5b6c1fbarchard@google.com                            int pix) {
1461c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com  asm volatile (
1462c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com    "vmov.u8    d4, #0x0f                      \n"  // bits to clear with vbic.
1463c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
1464c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com  "1:                                          \n"
14650bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
146611c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vld4.8     {d20, d21, d22, d23}, [%0]!    \n"  // load 8 pixels of ARGB.
1467c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
146811c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    ARGBTOARGB4444
14690bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
1470c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com    "vst1.8     {q0}, [%1]!                    \n"  // store 8 pixels ARGB4444.
1471c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com    "bgt        1b                             \n"
1472bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  : "+r"(src_argb),      // %0
1473c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com    "+r"(dst_argb4444),  // %1
1474bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "+r"(pix)            // %2
1475c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com  :
14768f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q8", "q9", "q10", "q11"
1477c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com  );
1478c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com}
14790908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com
14800908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.comvoid ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
14810908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com  asm volatile (
1482bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d24, #13                       \n"  // B * 0.1016 coefficient
1483bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d25, #65                       \n"  // G * 0.5078 coefficient
1484bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d26, #33                       \n"  // R * 0.2578 coefficient
1485bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d27, #16                       \n"  // Add 16 constant
1486c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
1487bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  "1:                                          \n"
14880bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
14891dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 ARGB pixels.
1490bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
1491bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmull.u8   q2, d0, d24                    \n"  // B
1492bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmlal.u8   q2, d1, d25                    \n"  // G
1493bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmlal.u8   q2, d2, d26                    \n"  // R
1494bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vqrshrun.s16 d0, q2, #7                   \n"  // 16 bit to 8 bit Y
1495bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vqadd.u8   d0, d27                        \n"
14960bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
1497bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
1498bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "bgt        1b                             \n"
1499bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  : "+r"(src_argb),  // %0
1500bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "+r"(dst_y),     // %1
1501bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "+r"(pix)        // %2
1502bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  :
15038f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q12", "q13"
1504bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  );
1505bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com}
1506bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com
1507cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.comvoid ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
1508cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com  asm volatile (
1509050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "vmov.u8    d24, #15                       \n"  // B * 0.11400 coefficient
1510050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "vmov.u8    d25, #75                       \n"  // G * 0.58700 coefficient
1511050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "vmov.u8    d26, #38                       \n"  // R * 0.29900 coefficient
1512c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
1513cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com  "1:                                          \n"
15140bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
1515cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 ARGB pixels.
1516cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
1517cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com    "vmull.u8   q2, d0, d24                    \n"  // B
1518cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com    "vmlal.u8   q2, d1, d25                    \n"  // G
1519cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com    "vmlal.u8   q2, d2, d26                    \n"  // R
1520050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "vqrshrun.s16 d0, q2, #7                   \n"  // 15 bit to 8 bit Y
15210bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
1522cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
1523cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com    "bgt        1b                             \n"
1524cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com  : "+r"(src_argb),  // %0
1525cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com    "+r"(dst_y),     // %1
1526cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com    "+r"(pix)        // %2
1527cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com  :
1528cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q12", "q13"
1529cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com  );
1530cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com}
1531cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com
1532c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com// 8x1 pixels.
1533c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.comvoid ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
1534c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com                         int pix) {
1535c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com  asm volatile (
1536c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vmov.u8    d24, #112                      \n"  // UB / VR 0.875 coefficient
1537c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vmov.u8    d25, #74                       \n"  // UG -0.5781 coefficient
1538c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vmov.u8    d26, #38                       \n"  // UR -0.2969 coefficient
1539c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vmov.u8    d27, #18                       \n"  // VB -0.1406 coefficient
1540c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vmov.u8    d28, #94                       \n"  // VG -0.7344 coefficient
1541c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vmov.u16   q15, #0x8080                   \n"  // 128.5
1542c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
1543c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com  "1:                                          \n"
15440bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
1545c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 ARGB pixels.
1546c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
1547c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com    "vmull.u8   q2, d0, d24                    \n"  // B
1548c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com    "vmlsl.u8   q2, d1, d25                    \n"  // G
1549c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com    "vmlsl.u8   q2, d2, d26                    \n"  // R
1550c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com    "vadd.u16   q2, q2, q15                    \n"  // +128 -> unsigned
1551c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com
1552c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com    "vmull.u8   q3, d2, d24                    \n"  // R
1553c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com    "vmlsl.u8   q3, d1, d28                    \n"  // G
1554c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com    "vmlsl.u8   q3, d0, d27                    \n"  // B
1555c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com    "vadd.u16   q3, q3, q15                    \n"  // +128 -> unsigned
1556c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com
1557c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vqshrn.u16  d0, q2, #8                    \n"  // 16 bit to 8 bit U
1558c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vqshrn.u16  d1, q3, #8                    \n"  // 16 bit to 8 bit V
1559c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com
15600bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
1561c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels U.
15620bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(2)
1563c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vst1.8     {d1}, [%2]!                    \n"  // store 8 pixels V.
1564c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com    "bgt        1b                             \n"
1565c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com  : "+r"(src_argb),  // %0
1566c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com    "+r"(dst_u),     // %1
1567c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com    "+r"(dst_v),     // %2
1568c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com    "+r"(pix)        // %3
1569c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com  :
15708f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q12", "q13", "q14", "q15"
1571c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com  );
1572c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com}
1573c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com
1574c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com// 16x1 pixels -> 8x1.  pix is number of argb pixels. e.g. 16.
1575c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.comvoid ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
1576c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com                         int pix) {
1577c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com  asm volatile (
1578c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
1579c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
1580c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
1581c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
158276e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
1583c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vmov.u16   q15, #0x8080                   \n"  // 128.5
1584c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
1585c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com  "1:                                          \n"
15860bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
1587c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 ARGB pixels.
15880bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
1589c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vld4.8     {d1, d3, d5, d7}, [%0]!        \n"  // load next 8 ARGB pixels.
1590c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com
1591c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vpaddl.u8  q0, q0                         \n"  // B 16 bytes -> 8 shorts.
1592c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vpaddl.u8  q1, q1                         \n"  // G 16 bytes -> 8 shorts.
1593c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vpaddl.u8  q2, q2                         \n"  // R 16 bytes -> 8 shorts.
1594c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com
1595c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "subs       %3, %3, #16                    \n"  // 16 processed per loop.
1596c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vmul.s16   q8, q0, q10                    \n"  // B
1597c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vmls.s16   q8, q1, q11                    \n"  // G
1598c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vmls.s16   q8, q2, q12                    \n"  // R
1599c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vadd.u16   q8, q8, q15                    \n"  // +128 -> unsigned
1600c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com
1601c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vmul.s16   q9, q2, q10                    \n"  // R
1602c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vmls.s16   q9, q1, q14                    \n"  // G
1603c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vmls.s16   q9, q0, q13                    \n"  // B
1604c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vadd.u16   q9, q9, q15                    \n"  // +128 -> unsigned
1605c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com
1606c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vqshrn.u16  d0, q8, #8                    \n"  // 16 bit to 8 bit U
1607c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vqshrn.u16  d1, q9, #8                    \n"  // 16 bit to 8 bit V
1608c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com
16090bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
1610c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels U.
16110bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(2)
1612c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vst1.8     {d1}, [%2]!                    \n"  // store 8 pixels V.
1613c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "bgt        1b                             \n"
1614c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com  : "+r"(src_argb),  // %0
1615c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "+r"(dst_u),     // %1
1616c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "+r"(dst_v),     // %2
1617c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "+r"(pix)        // %3
1618c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com  :
16198f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3",
1620c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
1621c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com  );
1622c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com}
1623c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com
162476e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com// 32x1 pixels -> 8x1.  pix is number of argb pixels. e.g. 32.
162576e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.comvoid ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
162676e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com                         int pix) {
162776e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com  asm volatile (
162806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
162906ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
163006ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
163106ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
163206ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
163376e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vmov.u16   q15, #0x8080                   \n"  // 128.5
1634c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
163576e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com  "1:                                          \n"
16360bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
163776e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 ARGB pixels.
16380bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
163976e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vld4.8     {d1, d3, d5, d7}, [%0]!        \n"  // load next 8 ARGB pixels.
164076e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vpaddl.u8  q0, q0                         \n"  // B 16 bytes -> 8 shorts.
164176e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vpaddl.u8  q1, q1                         \n"  // G 16 bytes -> 8 shorts.
164276e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vpaddl.u8  q2, q2                         \n"  // R 16 bytes -> 8 shorts.
16430bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
164476e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vld4.8     {d8, d10, d12, d14}, [%0]!     \n"  // load 8 more ARGB pixels.
16450bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
164676e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vld4.8     {d9, d11, d13, d15}, [%0]!     \n"  // load last 8 ARGB pixels.
164776e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vpaddl.u8  q4, q4                         \n"  // B 16 bytes -> 8 shorts.
164876e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vpaddl.u8  q5, q5                         \n"  // G 16 bytes -> 8 shorts.
164976e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vpaddl.u8  q6, q6                         \n"  // R 16 bytes -> 8 shorts.
165006ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com
165176e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vpadd.u16  d0, d0, d1                     \n"  // B 16 shorts -> 8 shorts.
165276e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vpadd.u16  d1, d8, d9                     \n"  // B
165376e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vpadd.u16  d2, d2, d3                     \n"  // G 16 shorts -> 8 shorts.
165476e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vpadd.u16  d3, d10, d11                   \n"  // G
165576e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vpadd.u16  d4, d4, d5                     \n"  // R 16 shorts -> 8 shorts.
165676e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vpadd.u16  d5, d12, d13                   \n"  // R
165706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com
165806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q0, q0, #1                     \n"  // 2x average
165906ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q1, q1, #1                     \n"
166006ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q2, q2, #1                     \n"
166106ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com
166276e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "subs       %3, %3, #32                    \n"  // 32 processed per loop.
166376e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vmul.s16   q8, q0, q10                    \n"  // B
166476e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vmls.s16   q8, q1, q11                    \n"  // G
166576e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vmls.s16   q8, q2, q12                    \n"  // R
166676e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vadd.u16   q8, q8, q15                    \n"  // +128 -> unsigned
166776e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vmul.s16   q9, q2, q10                    \n"  // R
166876e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vmls.s16   q9, q1, q14                    \n"  // G
166976e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vmls.s16   q9, q0, q13                    \n"  // B
167076e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vadd.u16   q9, q9, q15                    \n"  // +128 -> unsigned
167176e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vqshrn.u16  d0, q8, #8                    \n"  // 16 bit to 8 bit U
167276e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vqshrn.u16  d1, q9, #8                    \n"  // 16 bit to 8 bit V
16730bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
167476e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels U.
16750bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(2)
167676e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vst1.8     {d1}, [%2]!                    \n"  // store 8 pixels V.
167776e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "bgt        1b                             \n"
167876e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com  : "+r"(src_argb),  // %0
167976e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "+r"(dst_u),     // %1
168076e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "+r"(dst_v),     // %2
168176e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "+r"(pix)        // %3
168276e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com  :
16838f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
168476e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
168576e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com  );
168676e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com}
168776e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com
1688dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com// 16x2 pixels -> 8x1.  pix is number of argb pixels. e.g. 16.
168995730719503137a7db61a105bec02220f9ed159efbarchard@google.com#define RGBTOUV(QB, QG, QR) \
169095730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vmul.s16   q8, " #QB ", q10               \n"  /* B                    */ \
169195730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vmls.s16   q8, " #QG ", q11               \n"  /* G                    */ \
169295730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vmls.s16   q8, " #QR ", q12               \n"  /* R                    */ \
1693522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vadd.u16   q8, q8, q15                    \n"  /* +128 -> unsigned     */ \
169495730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vmul.s16   q9, " #QR ", q10               \n"  /* R                    */ \
169595730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vmls.s16   q9, " #QG ", q14               \n"  /* G                    */ \
169695730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vmls.s16   q9, " #QB ", q13               \n"  /* B                    */ \
1697522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vadd.u16   q9, q9, q15                    \n"  /* +128 -> unsigned     */ \
1698522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vqshrn.u16  d0, q8, #8                    \n"  /* 16 bit to 8 bit U    */ \
1699522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vqshrn.u16  d1, q9, #8                    \n"  /* 16 bit to 8 bit V    */
1700522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com
170106ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com// TODO(fbarchard): Consider vhadd vertical, then vpaddl horizontal, avoid shr.
1702dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.comvoid ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
1703dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com                      uint8* dst_u, uint8* dst_v, int pix) {
170476e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com  asm volatile (
1705dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com    "add        %1, %0, %1                     \n"  // src_stride + src_argb
170606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
170706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
170806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
170906ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
171006ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
171176e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vmov.u16   q15, #0x8080                   \n"  // 128.5
1712c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
171376e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com  "1:                                          \n"
17140bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
1715dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 ARGB pixels.
17160bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
1717dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com    "vld4.8     {d1, d3, d5, d7}, [%0]!        \n"  // load next 8 ARGB pixels.
1718dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com    "vpaddl.u8  q0, q0                         \n"  // B 16 bytes -> 8 shorts.
1719dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com    "vpaddl.u8  q1, q1                         \n"  // G 16 bytes -> 8 shorts.
1720dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com    "vpaddl.u8  q2, q2                         \n"  // R 16 bytes -> 8 shorts.
17210bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
1722dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com    "vld4.8     {d8, d10, d12, d14}, [%1]!     \n"  // load 8 more ARGB pixels.
17230bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
1724dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com    "vld4.8     {d9, d11, d13, d15}, [%1]!     \n"  // load last 8 ARGB pixels.
1725dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com    "vpadal.u8  q0, q4                         \n"  // B 16 bytes -> 8 shorts.
1726dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com    "vpadal.u8  q1, q5                         \n"  // G 16 bytes -> 8 shorts.
1727dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com    "vpadal.u8  q2, q6                         \n"  // R 16 bytes -> 8 shorts.
172806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com
172906ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q0, q0, #1                     \n"  // 2x average
173006ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q1, q1, #1                     \n"
173106ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q2, q2, #1                     \n"
173206ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com
1733dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com    "subs       %4, %4, #16                    \n"  // 32 processed per loop.
173495730719503137a7db61a105bec02220f9ed159efbarchard@google.com    RGBTOUV(q0, q1, q2)
17350bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(2)
1736dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
17370bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(3)
1738dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
173976e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "bgt        1b                             \n"
174076e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com  : "+r"(src_argb),  // %0
1741dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com    "+r"(src_stride_argb),  // %1
1742dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com    "+r"(dst_u),     // %2
1743dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com    "+r"(dst_v),     // %3
1744dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com    "+r"(pix)        // %4
174576e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com  :
17468f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
174776e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
174876e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com  );
174976e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com}
175095730719503137a7db61a105bec02220f9ed159efbarchard@google.com
1751050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com// TODO(fbarchard): Subsample match C code.
1752050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.comvoid ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb,
1753050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com                       uint8* dst_u, uint8* dst_v, int pix) {
1754050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com  asm volatile (
1755050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "add        %1, %0, %1                     \n"  // src_stride + src_argb
175606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q10, #127 / 2                  \n"  // UB / VR 0.500 coefficient
175706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q11, #84 / 2                   \n"  // UG -0.33126 coefficient
175806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q12, #43 / 2                   \n"  // UR -0.16874 coefficient
175906ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q13, #20 / 2                   \n"  // VB -0.08131 coefficient
176006ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q14, #107 / 2                  \n"  // VG -0.41869 coefficient
1761050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "vmov.u16   q15, #0x8080                   \n"  // 128.5
1762c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
1763050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com  "1:                                          \n"
17640bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
1765050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 ARGB pixels.
17660bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
1767050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "vld4.8     {d1, d3, d5, d7}, [%0]!        \n"  // load next 8 ARGB pixels.
1768050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "vpaddl.u8  q0, q0                         \n"  // B 16 bytes -> 8 shorts.
1769050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "vpaddl.u8  q1, q1                         \n"  // G 16 bytes -> 8 shorts.
1770050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "vpaddl.u8  q2, q2                         \n"  // R 16 bytes -> 8 shorts.
17710bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
1772050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "vld4.8     {d8, d10, d12, d14}, [%1]!     \n"  // load 8 more ARGB pixels.
17730bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
1774050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "vld4.8     {d9, d11, d13, d15}, [%1]!     \n"  // load last 8 ARGB pixels.
1775050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "vpadal.u8  q0, q4                         \n"  // B 16 bytes -> 8 shorts.
1776050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "vpadal.u8  q1, q5                         \n"  // G 16 bytes -> 8 shorts.
1777050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "vpadal.u8  q2, q6                         \n"  // R 16 bytes -> 8 shorts.
177806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com
177906ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q0, q0, #1                     \n"  // 2x average
178006ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q1, q1, #1                     \n"
178106ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q2, q2, #1                     \n"
178206ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com
1783050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "subs       %4, %4, #16                    \n"  // 32 processed per loop.
1784050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    RGBTOUV(q0, q1, q2)
17850bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(2)
1786050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
17870bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(3)
1788050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
1789050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "bgt        1b                             \n"
1790050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com  : "+r"(src_argb),  // %0
1791050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "+r"(src_stride_argb),  // %1
1792050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "+r"(dst_u),     // %2
1793050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "+r"(dst_v),     // %3
1794050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "+r"(pix)        // %4
1795050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com  :
1796050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
1797050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
1798050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com  );
1799050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com}
1800050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com
180195730719503137a7db61a105bec02220f9ed159efbarchard@google.comvoid BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra,
180295730719503137a7db61a105bec02220f9ed159efbarchard@google.com                      uint8* dst_u, uint8* dst_v, int pix) {
180395730719503137a7db61a105bec02220f9ed159efbarchard@google.com  asm volatile (
180495730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "add        %1, %0, %1                     \n"  // src_stride + src_bgra
180506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
180606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
180706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
180806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
180906ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
181095730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vmov.u16   q15, #0x8080                   \n"  // 128.5
1811c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
181295730719503137a7db61a105bec02220f9ed159efbarchard@google.com  "1:                                          \n"
18130bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
181495730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 BGRA pixels.
18150bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
181695730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vld4.8     {d1, d3, d5, d7}, [%0]!        \n"  // load next 8 BGRA pixels.
181795730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpaddl.u8  q3, q3                         \n"  // B 16 bytes -> 8 shorts.
181895730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpaddl.u8  q2, q2                         \n"  // G 16 bytes -> 8 shorts.
181995730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpaddl.u8  q1, q1                         \n"  // R 16 bytes -> 8 shorts.
18200bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
182195730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vld4.8     {d8, d10, d12, d14}, [%1]!     \n"  // load 8 more BGRA pixels.
18220bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
182395730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vld4.8     {d9, d11, d13, d15}, [%1]!     \n"  // load last 8 BGRA pixels.
182495730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpadal.u8  q3, q7                         \n"  // B 16 bytes -> 8 shorts.
182595730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpadal.u8  q2, q6                         \n"  // G 16 bytes -> 8 shorts.
182695730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpadal.u8  q1, q5                         \n"  // R 16 bytes -> 8 shorts.
182706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com
182806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q1, q1, #1                     \n"  // 2x average
182906ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q2, q2, #1                     \n"
183006ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q3, q3, #1                     \n"
183106ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com
183295730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "subs       %4, %4, #16                    \n"  // 32 processed per loop.
183395730719503137a7db61a105bec02220f9ed159efbarchard@google.com    RGBTOUV(q3, q2, q1)
18340bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(2)
183595730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
18360bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(3)
183795730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
183895730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "bgt        1b                             \n"
183995730719503137a7db61a105bec02220f9ed159efbarchard@google.com  : "+r"(src_bgra),  // %0
184095730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "+r"(src_stride_bgra),  // %1
184195730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "+r"(dst_u),     // %2
184295730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "+r"(dst_v),     // %3
184395730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "+r"(pix)        // %4
184495730719503137a7db61a105bec02220f9ed159efbarchard@google.com  :
18458f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
184695730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
184795730719503137a7db61a105bec02220f9ed159efbarchard@google.com  );
184895730719503137a7db61a105bec02220f9ed159efbarchard@google.com}
184995730719503137a7db61a105bec02220f9ed159efbarchard@google.com
185095730719503137a7db61a105bec02220f9ed159efbarchard@google.comvoid ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr,
185195730719503137a7db61a105bec02220f9ed159efbarchard@google.com                      uint8* dst_u, uint8* dst_v, int pix) {
185295730719503137a7db61a105bec02220f9ed159efbarchard@google.com  asm volatile (
185395730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "add        %1, %0, %1                     \n"  // src_stride + src_abgr
185406ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
185506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
185606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
185706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
185806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
185995730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vmov.u16   q15, #0x8080                   \n"  // 128.5
1860c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
186195730719503137a7db61a105bec02220f9ed159efbarchard@google.com  "1:                                          \n"
18620bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
186395730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 ABGR pixels.
18640bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
186595730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vld4.8     {d1, d3, d5, d7}, [%0]!        \n"  // load next 8 ABGR pixels.
186695730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpaddl.u8  q2, q2                         \n"  // B 16 bytes -> 8 shorts.
186795730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpaddl.u8  q1, q1                         \n"  // G 16 bytes -> 8 shorts.
186895730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpaddl.u8  q0, q0                         \n"  // R 16 bytes -> 8 shorts.
18690bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
187095730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vld4.8     {d8, d10, d12, d14}, [%1]!     \n"  // load 8 more ABGR pixels.
18710bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
187295730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vld4.8     {d9, d11, d13, d15}, [%1]!     \n"  // load last 8 ABGR pixels.
187395730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpadal.u8  q2, q6                         \n"  // B 16 bytes -> 8 shorts.
187495730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpadal.u8  q1, q5                         \n"  // G 16 bytes -> 8 shorts.
187595730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpadal.u8  q0, q4                         \n"  // R 16 bytes -> 8 shorts.
187606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com
187706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q0, q0, #1                     \n"  // 2x average
187806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q1, q1, #1                     \n"
187906ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q2, q2, #1                     \n"
188006ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com
188195730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "subs       %4, %4, #16                    \n"  // 32 processed per loop.
188295730719503137a7db61a105bec02220f9ed159efbarchard@google.com    RGBTOUV(q2, q1, q0)
18830bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(2)
188495730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
18850bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(3)
188695730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
188795730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "bgt        1b                             \n"
188895730719503137a7db61a105bec02220f9ed159efbarchard@google.com  : "+r"(src_abgr),  // %0
188995730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "+r"(src_stride_abgr),  // %1
189095730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "+r"(dst_u),     // %2
189195730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "+r"(dst_v),     // %3
189295730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "+r"(pix)        // %4
189395730719503137a7db61a105bec02220f9ed159efbarchard@google.com  :
18948f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
189595730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
189695730719503137a7db61a105bec02220f9ed159efbarchard@google.com  );
189795730719503137a7db61a105bec02220f9ed159efbarchard@google.com}
189895730719503137a7db61a105bec02220f9ed159efbarchard@google.com
189995730719503137a7db61a105bec02220f9ed159efbarchard@google.comvoid RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba,
190095730719503137a7db61a105bec02220f9ed159efbarchard@google.com                      uint8* dst_u, uint8* dst_v, int pix) {
190195730719503137a7db61a105bec02220f9ed159efbarchard@google.com  asm volatile (
190295730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "add        %1, %0, %1                     \n"  // src_stride + src_rgba
190306ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
190406ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
190506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
190606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
190706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
190895730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vmov.u16   q15, #0x8080                   \n"  // 128.5
1909c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
191095730719503137a7db61a105bec02220f9ed159efbarchard@google.com  "1:                                          \n"
19110bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
191295730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 RGBA pixels.
19130bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
191495730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vld4.8     {d1, d3, d5, d7}, [%0]!        \n"  // load next 8 RGBA pixels.
191595730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpaddl.u8  q0, q1                         \n"  // B 16 bytes -> 8 shorts.
191695730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpaddl.u8  q1, q2                         \n"  // G 16 bytes -> 8 shorts.
191795730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpaddl.u8  q2, q3                         \n"  // R 16 bytes -> 8 shorts.
19180bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
191995730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vld4.8     {d8, d10, d12, d14}, [%1]!     \n"  // load 8 more RGBA pixels.
19200bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
192195730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vld4.8     {d9, d11, d13, d15}, [%1]!     \n"  // load last 8 RGBA pixels.
192295730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpadal.u8  q0, q5                         \n"  // B 16 bytes -> 8 shorts.
192395730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpadal.u8  q1, q6                         \n"  // G 16 bytes -> 8 shorts.
192495730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpadal.u8  q2, q7                         \n"  // R 16 bytes -> 8 shorts.
192506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com
192606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q0, q0, #1                     \n"  // 2x average
192706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q1, q1, #1                     \n"
192806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q2, q2, #1                     \n"
192906ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com
193095730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "subs       %4, %4, #16                    \n"  // 32 processed per loop.
193195730719503137a7db61a105bec02220f9ed159efbarchard@google.com    RGBTOUV(q0, q1, q2)
19320bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(2)
193395730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
19340bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(3)
193595730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
193695730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "bgt        1b                             \n"
193795730719503137a7db61a105bec02220f9ed159efbarchard@google.com  : "+r"(src_rgba),  // %0
193895730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "+r"(src_stride_rgba),  // %1
193995730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "+r"(dst_u),     // %2
194095730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "+r"(dst_v),     // %3
194195730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "+r"(pix)        // %4
194295730719503137a7db61a105bec02220f9ed159efbarchard@google.com  :
19438f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
194495730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
194595730719503137a7db61a105bec02220f9ed159efbarchard@google.com  );
194695730719503137a7db61a105bec02220f9ed159efbarchard@google.com}
194795730719503137a7db61a105bec02220f9ed159efbarchard@google.com
194895730719503137a7db61a105bec02220f9ed159efbarchard@google.comvoid RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24,
194995730719503137a7db61a105bec02220f9ed159efbarchard@google.com                       uint8* dst_u, uint8* dst_v, int pix) {
195095730719503137a7db61a105bec02220f9ed159efbarchard@google.com  asm volatile (
195195730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "add        %1, %0, %1                     \n"  // src_stride + src_rgb24
195206ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
195306ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
195406ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
195506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
195606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
195795730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vmov.u16   q15, #0x8080                   \n"  // 128.5
1958c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
195995730719503137a7db61a105bec02220f9ed159efbarchard@google.com  "1:                                          \n"
19600bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
196195730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vld3.8     {d0, d2, d4}, [%0]!            \n"  // load 8 RGB24 pixels.
19620bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
196395730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vld3.8     {d1, d3, d5}, [%0]!            \n"  // load next 8 RGB24 pixels.
196495730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpaddl.u8  q0, q0                         \n"  // B 16 bytes -> 8 shorts.
196595730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpaddl.u8  q1, q1                         \n"  // G 16 bytes -> 8 shorts.
196695730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpaddl.u8  q2, q2                         \n"  // R 16 bytes -> 8 shorts.
19670bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
196895730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vld3.8     {d8, d10, d12}, [%1]!          \n"  // load 8 more RGB24 pixels.
19690bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
197095730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vld3.8     {d9, d11, d13}, [%1]!          \n"  // load last 8 RGB24 pixels.
197195730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpadal.u8  q0, q4                         \n"  // B 16 bytes -> 8 shorts.
197295730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpadal.u8  q1, q5                         \n"  // G 16 bytes -> 8 shorts.
197395730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpadal.u8  q2, q6                         \n"  // R 16 bytes -> 8 shorts.
197406ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com
197506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q0, q0, #1                     \n"  // 2x average
197606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q1, q1, #1                     \n"
197706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q2, q2, #1                     \n"
197806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com
197995730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "subs       %4, %4, #16                    \n"  // 32 processed per loop.
198095730719503137a7db61a105bec02220f9ed159efbarchard@google.com    RGBTOUV(q0, q1, q2)
19810bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(2)
198295730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
19830bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(3)
198495730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
198595730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "bgt        1b                             \n"
198695730719503137a7db61a105bec02220f9ed159efbarchard@google.com  : "+r"(src_rgb24),  // %0
198795730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "+r"(src_stride_rgb24),  // %1
198895730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "+r"(dst_u),     // %2
198995730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "+r"(dst_v),     // %3
199095730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "+r"(pix)        // %4
199195730719503137a7db61a105bec02220f9ed159efbarchard@google.com  :
19928f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
199395730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
199495730719503137a7db61a105bec02220f9ed159efbarchard@google.com  );
199595730719503137a7db61a105bec02220f9ed159efbarchard@google.com}
199695730719503137a7db61a105bec02220f9ed159efbarchard@google.com
199795730719503137a7db61a105bec02220f9ed159efbarchard@google.comvoid RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw,
199895730719503137a7db61a105bec02220f9ed159efbarchard@google.com                     uint8* dst_u, uint8* dst_v, int pix) {
199995730719503137a7db61a105bec02220f9ed159efbarchard@google.com  asm volatile (
200095730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "add        %1, %0, %1                     \n"  // src_stride + src_raw
200106ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
200206ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
200306ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
200406ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
200506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
200695730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vmov.u16   q15, #0x8080                   \n"  // 128.5
2007c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
200895730719503137a7db61a105bec02220f9ed159efbarchard@google.com  "1:                                          \n"
20090bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
201095730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vld3.8     {d0, d2, d4}, [%0]!            \n"  // load 8 RAW pixels.
20110bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
201295730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vld3.8     {d1, d3, d5}, [%0]!            \n"  // load next 8 RAW pixels.
201395730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpaddl.u8  q2, q2                         \n"  // B 16 bytes -> 8 shorts.
201495730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpaddl.u8  q1, q1                         \n"  // G 16 bytes -> 8 shorts.
201595730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpaddl.u8  q0, q0                         \n"  // R 16 bytes -> 8 shorts.
20160bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
201795730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vld3.8     {d8, d10, d12}, [%1]!          \n"  // load 8 more RAW pixels.
20180bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
201995730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vld3.8     {d9, d11, d13}, [%1]!          \n"  // load last 8 RAW pixels.
202095730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpadal.u8  q2, q6                         \n"  // B 16 bytes -> 8 shorts.
202195730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpadal.u8  q1, q5                         \n"  // G 16 bytes -> 8 shorts.
202295730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpadal.u8  q0, q4                         \n"  // R 16 bytes -> 8 shorts.
202306ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com
202406ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q0, q0, #1                     \n"  // 2x average
202506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q1, q1, #1                     \n"
202606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q2, q2, #1                     \n"
202706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com
202895730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "subs       %4, %4, #16                    \n"  // 32 processed per loop.
202995730719503137a7db61a105bec02220f9ed159efbarchard@google.com    RGBTOUV(q2, q1, q0)
20300bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(2)
203195730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
20320bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(3)
203395730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
203495730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "bgt        1b                             \n"
203595730719503137a7db61a105bec02220f9ed159efbarchard@google.com  : "+r"(src_raw),  // %0
203695730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "+r"(src_stride_raw),  // %1
203795730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "+r"(dst_u),     // %2
203895730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "+r"(dst_v),     // %3
203995730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "+r"(pix)        // %4
204095730719503137a7db61a105bec02220f9ed159efbarchard@google.com  :
20418f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
204295730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
204395730719503137a7db61a105bec02220f9ed159efbarchard@google.com  );
204495730719503137a7db61a105bec02220f9ed159efbarchard@google.com}
204595730719503137a7db61a105bec02220f9ed159efbarchard@google.com
2046f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com// 16x2 pixels -> 8x1.  pix is number of argb pixels. e.g. 16.
2047f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.comvoid RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565,
2048f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com                        uint8* dst_u, uint8* dst_v, int pix) {
2049f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com  asm volatile (
2050f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "add        %1, %0, %1                     \n"  // src_stride + src_argb
205106ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
205206ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
205306ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
205406ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
205506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
2056f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vmov.u16   q15, #0x8080                   \n"  // 128.5
2057c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
2058f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com  "1:                                          \n"
20590bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
2060f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vld1.8     {q0}, [%0]!                    \n"  // load 8 RGB565 pixels.
2061f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    RGB565TOARGB
2062f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vpaddl.u8  d8, d0                         \n"  // B 8 bytes -> 4 shorts.
2063f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vpaddl.u8  d10, d1                        \n"  // G 8 bytes -> 4 shorts.
2064f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vpaddl.u8  d12, d2                        \n"  // R 8 bytes -> 4 shorts.
20650bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
2066f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vld1.8     {q0}, [%0]!                    \n"  // next 8 RGB565 pixels.
2067f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    RGB565TOARGB
2068f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vpaddl.u8  d9, d0                         \n"  // B 8 bytes -> 4 shorts.
2069f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vpaddl.u8  d11, d1                        \n"  // G 8 bytes -> 4 shorts.
2070f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vpaddl.u8  d13, d2                        \n"  // R 8 bytes -> 4 shorts.
2071f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com
20720bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
2073f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vld1.8     {q0}, [%1]!                    \n"  // load 8 RGB565 pixels.
2074f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    RGB565TOARGB
2075f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vpadal.u8  d8, d0                         \n"  // B 8 bytes -> 4 shorts.
2076f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vpadal.u8  d10, d1                        \n"  // G 8 bytes -> 4 shorts.
2077f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vpadal.u8  d12, d2                        \n"  // R 8 bytes -> 4 shorts.
20780bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
2079f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vld1.8     {q0}, [%1]!                    \n"  // next 8 RGB565 pixels.
2080f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    RGB565TOARGB
2081f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vpadal.u8  d9, d0                         \n"  // B 8 bytes -> 4 shorts.
2082f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vpadal.u8  d11, d1                        \n"  // G 8 bytes -> 4 shorts.
2083f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vpadal.u8  d13, d2                        \n"  // R 8 bytes -> 4 shorts.
2084f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com
208506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q4, q4, #1                     \n"  // 2x average
208606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q5, q5, #1                     \n"
208706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q6, q6, #1                     \n"
208806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com
2089f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "subs       %4, %4, #16                    \n"  // 16 processed per loop.
2090f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vmul.s16   q8, q4, q10                    \n"  // B
2091f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vmls.s16   q8, q5, q11                    \n"  // G
2092f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vmls.s16   q8, q6, q12                    \n"  // R
2093f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vadd.u16   q8, q8, q15                    \n"  // +128 -> unsigned
2094f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vmul.s16   q9, q6, q10                    \n"  // R
2095f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vmls.s16   q9, q5, q14                    \n"  // G
2096f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vmls.s16   q9, q4, q13                    \n"  // B
2097f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vadd.u16   q9, q9, q15                    \n"  // +128 -> unsigned
2098f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vqshrn.u16  d0, q8, #8                    \n"  // 16 bit to 8 bit U
2099f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vqshrn.u16  d1, q9, #8                    \n"  // 16 bit to 8 bit V
21000bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(2)
2101f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
21020bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(3)
2103f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
2104f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "bgt        1b                             \n"
2105f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com  : "+r"(src_rgb565),  // %0
2106f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "+r"(src_stride_rgb565),  // %1
2107f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "+r"(dst_u),     // %2
2108f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "+r"(dst_v),     // %3
2109f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "+r"(pix)        // %4
2110f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com  :
21118f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
2112f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
2113f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com  );
2114f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com}
2115522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com
2116522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com// 16x2 pixels -> 8x1.  pix is number of argb pixels. e.g. 16.
2117522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.comvoid ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555,
2118522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com                        uint8* dst_u, uint8* dst_v, int pix) {
2119522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com  asm volatile (
2120522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "add        %1, %0, %1                     \n"  // src_stride + src_argb
212106ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
212206ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
212306ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
212406ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
212506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
2126522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vmov.u16   q15, #0x8080                   \n"  // 128.5
2127c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
2128522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com  "1:                                          \n"
21290bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
2130522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vld1.8     {q0}, [%0]!                    \n"  // load 8 ARGB1555 pixels.
2131522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    RGB555TOARGB
2132522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpaddl.u8  d8, d0                         \n"  // B 8 bytes -> 4 shorts.
2133522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpaddl.u8  d10, d1                        \n"  // G 8 bytes -> 4 shorts.
2134522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpaddl.u8  d12, d2                        \n"  // R 8 bytes -> 4 shorts.
21350bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
2136522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vld1.8     {q0}, [%0]!                    \n"  // next 8 ARGB1555 pixels.
2137522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    RGB555TOARGB
2138522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpaddl.u8  d9, d0                         \n"  // B 8 bytes -> 4 shorts.
2139522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpaddl.u8  d11, d1                        \n"  // G 8 bytes -> 4 shorts.
2140522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpaddl.u8  d13, d2                        \n"  // R 8 bytes -> 4 shorts.
2141522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com
21420bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
2143522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vld1.8     {q0}, [%1]!                    \n"  // load 8 ARGB1555 pixels.
2144522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    RGB555TOARGB
2145522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpadal.u8  d8, d0                         \n"  // B 8 bytes -> 4 shorts.
2146522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpadal.u8  d10, d1                        \n"  // G 8 bytes -> 4 shorts.
2147522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpadal.u8  d12, d2                        \n"  // R 8 bytes -> 4 shorts.
21480bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
2149522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vld1.8     {q0}, [%1]!                    \n"  // next 8 ARGB1555 pixels.
2150522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    RGB555TOARGB
2151522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpadal.u8  d9, d0                         \n"  // B 8 bytes -> 4 shorts.
2152522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpadal.u8  d11, d1                        \n"  // G 8 bytes -> 4 shorts.
2153522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpadal.u8  d13, d2                        \n"  // R 8 bytes -> 4 shorts.
2154522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com
215506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q4, q4, #1                     \n"  // 2x average
215606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q5, q5, #1                     \n"
215706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q6, q6, #1                     \n"
215806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com
2159522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "subs       %4, %4, #16                    \n"  // 16 processed per loop.
2160522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vmul.s16   q8, q4, q10                    \n"  // B
2161522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vmls.s16   q8, q5, q11                    \n"  // G
2162522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vmls.s16   q8, q6, q12                    \n"  // R
2163522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vadd.u16   q8, q8, q15                    \n"  // +128 -> unsigned
2164522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vmul.s16   q9, q6, q10                    \n"  // R
2165522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vmls.s16   q9, q5, q14                    \n"  // G
2166522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vmls.s16   q9, q4, q13                    \n"  // B
2167522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vadd.u16   q9, q9, q15                    \n"  // +128 -> unsigned
2168522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vqshrn.u16  d0, q8, #8                    \n"  // 16 bit to 8 bit U
2169522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vqshrn.u16  d1, q9, #8                    \n"  // 16 bit to 8 bit V
21700bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(2)
2171522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
21720bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(3)
2173522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
2174522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "bgt        1b                             \n"
2175522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com  : "+r"(src_argb1555),  // %0
2176522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "+r"(src_stride_argb1555),  // %1
2177522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "+r"(dst_u),     // %2
2178522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "+r"(dst_v),     // %3
2179522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "+r"(pix)        // %4
2180522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com  :
21818f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
2182522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
2183522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com  );
2184522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com}
2185522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com
2186522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com// 16x2 pixels -> 8x1.  pix is number of argb pixels. e.g. 16.
2187522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.comvoid ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444,
2188522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com                          uint8* dst_u, uint8* dst_v, int pix) {
2189522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com  asm volatile (
2190522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "add        %1, %0, %1                     \n"  // src_stride + src_argb
219106ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
219206ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
219306ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
219406ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
219506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
2196522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vmov.u16   q15, #0x8080                   \n"  // 128.5
2197c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
2198522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com  "1:                                          \n"
21990bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
2200522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vld1.8     {q0}, [%0]!                    \n"  // load 8 ARGB4444 pixels.
2201522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    ARGB4444TOARGB
2202522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpaddl.u8  d8, d0                         \n"  // B 8 bytes -> 4 shorts.
2203522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpaddl.u8  d10, d1                        \n"  // G 8 bytes -> 4 shorts.
2204522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpaddl.u8  d12, d2                        \n"  // R 8 bytes -> 4 shorts.
22050bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
2206522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vld1.8     {q0}, [%0]!                    \n"  // next 8 ARGB4444 pixels.
2207522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    ARGB4444TOARGB
2208522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpaddl.u8  d9, d0                         \n"  // B 8 bytes -> 4 shorts.
2209522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpaddl.u8  d11, d1                        \n"  // G 8 bytes -> 4 shorts.
2210522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpaddl.u8  d13, d2                        \n"  // R 8 bytes -> 4 shorts.
2211522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com
22120bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
2213522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vld1.8     {q0}, [%1]!                    \n"  // load 8 ARGB4444 pixels.
2214522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    ARGB4444TOARGB
2215522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpadal.u8  d8, d0                         \n"  // B 8 bytes -> 4 shorts.
2216522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpadal.u8  d10, d1                        \n"  // G 8 bytes -> 4 shorts.
2217522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpadal.u8  d12, d2                        \n"  // R 8 bytes -> 4 shorts.
22180bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
2219522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vld1.8     {q0}, [%1]!                    \n"  // next 8 ARGB4444 pixels.
2220522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    ARGB4444TOARGB
2221522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpadal.u8  d9, d0                         \n"  // B 8 bytes -> 4 shorts.
2222522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpadal.u8  d11, d1                        \n"  // G 8 bytes -> 4 shorts.
2223522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpadal.u8  d13, d2                        \n"  // R 8 bytes -> 4 shorts.
2224522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com
222506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q4, q4, #1                     \n"  // 2x average
222606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q5, q5, #1                     \n"
222706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q6, q6, #1                     \n"
222806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com
2229522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "subs       %4, %4, #16                    \n"  // 16 processed per loop.
2230522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vmul.s16   q8, q4, q10                    \n"  // B
2231522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vmls.s16   q8, q5, q11                    \n"  // G
2232522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vmls.s16   q8, q6, q12                    \n"  // R
2233522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vadd.u16   q8, q8, q15                    \n"  // +128 -> unsigned
2234522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vmul.s16   q9, q6, q10                    \n"  // R
2235522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vmls.s16   q9, q5, q14                    \n"  // G
2236522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vmls.s16   q9, q4, q13                    \n"  // B
2237522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vadd.u16   q9, q9, q15                    \n"  // +128 -> unsigned
2238522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vqshrn.u16  d0, q8, #8                    \n"  // 16 bit to 8 bit U
2239522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vqshrn.u16  d1, q9, #8                    \n"  // 16 bit to 8 bit V
22400bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(2)
2241522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
22420bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(3)
2243522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
2244522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "bgt        1b                             \n"
2245522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com  : "+r"(src_argb4444),  // %0
2246522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "+r"(src_stride_argb4444),  // %1
2247522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "+r"(dst_u),     // %2
2248522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "+r"(dst_v),     // %3
2249522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "+r"(pix)        // %4
2250522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com  :
22518f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
2252522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
2253522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com  );
2254522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com}
2255f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com
2256bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.comvoid RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int pix) {
2257bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  asm volatile (
2258bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d24, #13                       \n"  // B * 0.1016 coefficient
2259bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d25, #65                       \n"  // G * 0.5078 coefficient
2260bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d26, #33                       \n"  // R * 0.2578 coefficient
2261bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d27, #16                       \n"  // Add 16 constant
2262c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
2263bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  "1:                                          \n"
22640bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
22651dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vld1.8     {q0}, [%0]!                    \n"  // load 8 RGB565 pixels.
2266bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
2267bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    RGB565TOARGB
2268bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmull.u8   q2, d0, d24                    \n"  // B
2269bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmlal.u8   q2, d1, d25                    \n"  // G
2270bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmlal.u8   q2, d2, d26                    \n"  // R
2271bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vqrshrun.s16 d0, q2, #7                   \n"  // 16 bit to 8 bit Y
2272bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vqadd.u8   d0, d27                        \n"
22730bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
2274bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
2275bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "bgt        1b                             \n"
2276bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  : "+r"(src_rgb565),  // %0
2277bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "+r"(dst_y),       // %1
2278bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "+r"(pix)          // %2
2279bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  :
22808f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13"
2281bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  );
2282bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com}
2283bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com
22841dee6250936424ced8722329369da75935d61580fbarchard@google.comvoid ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int pix) {
22851dee6250936424ced8722329369da75935d61580fbarchard@google.com  asm volatile (
22861dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vmov.u8    d24, #13                       \n"  // B * 0.1016 coefficient
22871dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vmov.u8    d25, #65                       \n"  // G * 0.5078 coefficient
22881dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vmov.u8    d26, #33                       \n"  // R * 0.2578 coefficient
22891dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vmov.u8    d27, #16                       \n"  // Add 16 constant
2290c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
22911dee6250936424ced8722329369da75935d61580fbarchard@google.com  "1:                                          \n"
22920bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
22931dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vld1.8     {q0}, [%0]!                    \n"  // load 8 ARGB1555 pixels.
22941dee6250936424ced8722329369da75935d61580fbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
22951dee6250936424ced8722329369da75935d61580fbarchard@google.com    ARGB1555TOARGB
22961dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vmull.u8   q2, d0, d24                    \n"  // B
22971dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vmlal.u8   q2, d1, d25                    \n"  // G
22981dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vmlal.u8   q2, d2, d26                    \n"  // R
22991dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vqrshrun.s16 d0, q2, #7                   \n"  // 16 bit to 8 bit Y
23001dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vqadd.u8   d0, d27                        \n"
23010bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
23021dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
23031dee6250936424ced8722329369da75935d61580fbarchard@google.com    "bgt        1b                             \n"
23041dee6250936424ced8722329369da75935d61580fbarchard@google.com  : "+r"(src_argb1555),  // %0
23051dee6250936424ced8722329369da75935d61580fbarchard@google.com    "+r"(dst_y),         // %1
23061dee6250936424ced8722329369da75935d61580fbarchard@google.com    "+r"(pix)            // %2
23071dee6250936424ced8722329369da75935d61580fbarchard@google.com  :
23088f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13"
23091dee6250936424ced8722329369da75935d61580fbarchard@google.com  );
23101dee6250936424ced8722329369da75935d61580fbarchard@google.com}
23111dee6250936424ced8722329369da75935d61580fbarchard@google.com
23121dee6250936424ced8722329369da75935d61580fbarchard@google.comvoid ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int pix) {
23131dee6250936424ced8722329369da75935d61580fbarchard@google.com  asm volatile (
23141dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vmov.u8    d24, #13                       \n"  // B * 0.1016 coefficient
23151dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vmov.u8    d25, #65                       \n"  // G * 0.5078 coefficient
23161dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vmov.u8    d26, #33                       \n"  // R * 0.2578 coefficient
23171dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vmov.u8    d27, #16                       \n"  // Add 16 constant
2318c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
23191dee6250936424ced8722329369da75935d61580fbarchard@google.com  "1:                                          \n"
23200bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
23211dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vld1.8     {q0}, [%0]!                    \n"  // load 8 ARGB4444 pixels.
23221dee6250936424ced8722329369da75935d61580fbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
23231dee6250936424ced8722329369da75935d61580fbarchard@google.com    ARGB4444TOARGB
23241dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vmull.u8   q2, d0, d24                    \n"  // B
23251dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vmlal.u8   q2, d1, d25                    \n"  // G
23261dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vmlal.u8   q2, d2, d26                    \n"  // R
23271dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vqrshrun.s16 d0, q2, #7                   \n"  // 16 bit to 8 bit Y
23281dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vqadd.u8   d0, d27                        \n"
23290bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
23301dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
23311dee6250936424ced8722329369da75935d61580fbarchard@google.com    "bgt        1b                             \n"
23321dee6250936424ced8722329369da75935d61580fbarchard@google.com  : "+r"(src_argb4444),  // %0
23331dee6250936424ced8722329369da75935d61580fbarchard@google.com    "+r"(dst_y),         // %1
23341dee6250936424ced8722329369da75935d61580fbarchard@google.com    "+r"(pix)            // %2
23351dee6250936424ced8722329369da75935d61580fbarchard@google.com  :
23368f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13"
23371dee6250936424ced8722329369da75935d61580fbarchard@google.com  );
23381dee6250936424ced8722329369da75935d61580fbarchard@google.com}
2339bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com
2340bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.comvoid BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int pix) {
2341bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  asm volatile (
2342bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d4, #33                        \n"  // R * 0.2578 coefficient
2343bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d5, #65                        \n"  // G * 0.5078 coefficient
2344bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d6, #13                        \n"  // B * 0.1016 coefficient
2345bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d7, #16                        \n"  // Add 16 constant
2346c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
2347bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  "1:                                          \n"
23480bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
2349bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 pixels of BGRA.
2350bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
2351bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmull.u8   q8, d1, d4                     \n"  // R
2352bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmlal.u8   q8, d2, d5                     \n"  // G
2353bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmlal.u8   q8, d3, d6                     \n"  // B
2354bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vqrshrun.s16 d0, q8, #7                   \n"  // 16 bit to 8 bit Y
2355bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vqadd.u8   d0, d7                         \n"
23560bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
2357bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
2358bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "bgt        1b                             \n"
2359bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  : "+r"(src_bgra),  // %0
2360bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "+r"(dst_y),     // %1
2361bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "+r"(pix)        // %2
2362bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  :
23638f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
2364bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  );
2365bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com}
2366bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com
2367bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.comvoid ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int pix) {
2368bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  asm volatile (
2369bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d4, #33                        \n"  // R * 0.2578 coefficient
2370bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d5, #65                        \n"  // G * 0.5078 coefficient
2371bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d6, #13                        \n"  // B * 0.1016 coefficient
2372bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d7, #16                        \n"  // Add 16 constant
2373c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
2374bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  "1:                                          \n"
23750bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
2376bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 pixels of ABGR.
2377bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
2378bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmull.u8   q8, d0, d4                     \n"  // R
2379bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmlal.u8   q8, d1, d5                     \n"  // G
2380bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmlal.u8   q8, d2, d6                     \n"  // B
2381bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vqrshrun.s16 d0, q8, #7                   \n"  // 16 bit to 8 bit Y
2382bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vqadd.u8   d0, d7                         \n"
23830bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
2384bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
2385bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "bgt        1b                             \n"
2386bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  : "+r"(src_abgr),  // %0
2387bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "+r"(dst_y),  // %1
2388bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "+r"(pix)        // %2
2389bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  :
23908f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
2391bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  );
2392bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com}
2393bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com
2394bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.comvoid RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int pix) {
2395bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  asm volatile (
23960908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com    "vmov.u8    d4, #13                        \n"  // B * 0.1016 coefficient
23970908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com    "vmov.u8    d5, #65                        \n"  // G * 0.5078 coefficient
23980908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com    "vmov.u8    d6, #33                        \n"  // R * 0.2578 coefficient
23990908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com    "vmov.u8    d7, #16                        \n"  // Add 16 constant
2400c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
24010908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com  "1:                                          \n"
24020bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
2403bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 pixels of RGBA.
2404bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
2405bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmull.u8   q8, d1, d4                     \n"  // B
2406bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmlal.u8   q8, d2, d5                     \n"  // G
2407bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmlal.u8   q8, d3, d6                     \n"  // R
2408bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vqrshrun.s16 d0, q8, #7                   \n"  // 16 bit to 8 bit Y
2409bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vqadd.u8   d0, d7                         \n"
24100bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
2411bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
2412bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "bgt        1b                             \n"
2413bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  : "+r"(src_rgba),  // %0
2414bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "+r"(dst_y),  // %1
2415bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "+r"(pix)        // %2
2416bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  :
24178f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
2418bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  );
2419bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com}
2420bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com
2421bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.comvoid RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int pix) {
2422bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  asm volatile (
2423bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d4, #13                        \n"  // B * 0.1016 coefficient
2424bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d5, #65                        \n"  // G * 0.5078 coefficient
2425bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d6, #33                        \n"  // R * 0.2578 coefficient
2426bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d7, #16                        \n"  // Add 16 constant
2427c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
2428bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  "1:                                          \n"
24290bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
2430bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vld3.8     {d0, d1, d2}, [%0]!            \n"  // load 8 pixels of RGB24.
24310908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
24320908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com    "vmull.u8   q8, d0, d4                     \n"  // B
24330908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com    "vmlal.u8   q8, d1, d5                     \n"  // G
24340908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com    "vmlal.u8   q8, d2, d6                     \n"  // R
24350908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com    "vqrshrun.s16 d0, q8, #7                   \n"  // 16 bit to 8 bit Y
24360908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com    "vqadd.u8   d0, d7                         \n"
24370bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
24380908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
24390908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com    "bgt        1b                             \n"
2440bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  : "+r"(src_rgb24),  // %0
24410908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com    "+r"(dst_y),  // %1
24420908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com    "+r"(pix)        // %2
24430908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com  :
24448f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
24450908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com  );
24460908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com}
2447bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com
2448bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.comvoid RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int pix) {
2449bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  asm volatile (
2450bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d4, #33                        \n"  // R * 0.2578 coefficient
2451bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d5, #65                        \n"  // G * 0.5078 coefficient
2452bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d6, #13                        \n"  // B * 0.1016 coefficient
2453bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d7, #16                        \n"  // Add 16 constant
2454c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
2455bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  "1:                                          \n"
24560bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
2457bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vld3.8     {d0, d1, d2}, [%0]!            \n"  // load 8 pixels of RAW.
2458bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
2459bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmull.u8   q8, d0, d4                     \n"  // B
2460bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmlal.u8   q8, d1, d5                     \n"  // G
2461bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmlal.u8   q8, d2, d6                     \n"  // R
2462bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vqrshrun.s16 d0, q8, #7                   \n"  // 16 bit to 8 bit Y
2463bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vqadd.u8   d0, d7                         \n"
24640bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
2465bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
2466bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "bgt        1b                             \n"
2467bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  : "+r"(src_raw),  // %0
2468bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "+r"(dst_y),  // %1
2469bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "+r"(pix)        // %2
2470bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  :
24718f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
2472bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  );
2473bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com}
24740908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com
2475b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com// Bilinear filter 16x2 -> 16x1
2476b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.comvoid InterpolateRow_NEON(uint8* dst_ptr,
2477b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com                         const uint8* src_ptr, ptrdiff_t src_stride,
2478b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com                         int dst_width, int source_y_fraction) {
2479b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com  asm volatile (
24808f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "cmp        %4, #0                         \n"
24818f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "beq        100f                           \n"
24828f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "add        %2, %1                         \n"
24838f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "cmp        %4, #64                        \n"
24848f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "beq        75f                            \n"
24858f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "cmp        %4, #128                       \n"
24868f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "beq        50f                            \n"
24878f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "cmp        %4, #192                       \n"
24888f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "beq        25f                            \n"
24898f506332af217882648eed166a257557855b9fdbfbarchard@google.com
24908f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "vdup.8     d5, %4                         \n"
24918f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "rsb        %4, #256                       \n"
24928f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "vdup.8     d4, %4                         \n"
2493b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com    // General purpose row blend.
2494b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com  "1:                                          \n"
24950bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
24962c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {q0}, [%1]!                    \n"
24970bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(2)
24982c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {q1}, [%2]!                    \n"
2499b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com    "subs       %3, %3, #16                    \n"
25008f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "vmull.u8   q13, d0, d4                    \n"
25018f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "vmull.u8   q14, d1, d4                    \n"
25028f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "vmlal.u8   q13, d2, d5                    \n"
25038f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "vmlal.u8   q14, d3, d5                    \n"
25048f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "vrshrn.u16 d0, q13, #8                    \n"
25058f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "vrshrn.u16 d1, q14, #8                    \n"
25060bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
25072c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {q0}, [%0]!                    \n"
25088f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "bgt        1b                             \n"
25098f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "b          99f                            \n"
2510b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com
2511b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com    // Blend 25 / 75.
2512b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com  "25:                                         \n"
25130bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
25142c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {q0}, [%1]!                    \n"
25150bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(2)
25162c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {q1}, [%2]!                    \n"
2517b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com    "subs       %3, %3, #16                    \n"
25188f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "vrhadd.u8  q0, q1                         \n"
25198f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "vrhadd.u8  q0, q1                         \n"
25200bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
25212c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {q0}, [%0]!                    \n"
25228f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "bgt        25b                            \n"
25238f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "b          99f                            \n"
2524b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com
2525b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com    // Blend 50 / 50.
2526b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com  "50:                                         \n"
25270bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
25282c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {q0}, [%1]!                    \n"
25290bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(2)
25302c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {q1}, [%2]!                    \n"
2531b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com    "subs       %3, %3, #16                    \n"
25328f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "vrhadd.u8  q0, q1                         \n"
25330bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
25342c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {q0}, [%0]!                    \n"
25358f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "bgt        50b                            \n"
25368f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "b          99f                            \n"
2537b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com
2538b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com    // Blend 75 / 25.
2539b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com  "75:                                         \n"
25400bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
25412c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {q1}, [%1]!                    \n"
25420bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(2)
25432c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {q0}, [%2]!                    \n"
2544b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com    "subs       %3, %3, #16                    \n"
25458f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "vrhadd.u8  q0, q1                         \n"
25468f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "vrhadd.u8  q0, q1                         \n"
25470bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
25482c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {q0}, [%0]!                    \n"
25498f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "bgt        75b                            \n"
25508f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "b          99f                            \n"
2551b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com
2552b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com    // Blend 100 / 0 - Copy row unchanged.
2553b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com  "100:                                        \n"
25540bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
25552c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {q0}, [%1]!                    \n"
2556b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com    "subs       %3, %3, #16                    \n"
25570bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
25582c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {q0}, [%0]!                    \n"
25598f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "bgt        100b                           \n"
2560b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com
2561b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com  "99:                                         \n"
2562b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com  : "+r"(dst_ptr),          // %0
2563b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com    "+r"(src_ptr),          // %1
2564b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com    "+r"(src_stride),       // %2
2565b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com    "+r"(dst_width),        // %3
2566b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com    "+r"(source_y_fraction) // %4
2567b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com  :
25688f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1", "d4", "d5", "q13", "q14"
2569b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com  );
2570b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com}
25718f506332af217882648eed166a257557855b9fdbfbarchard@google.com
25728f506332af217882648eed166a257557855b9fdbfbarchard@google.com// dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr
25738f506332af217882648eed166a257557855b9fdbfbarchard@google.comvoid ARGBBlendRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
25748f506332af217882648eed166a257557855b9fdbfbarchard@google.com                       uint8* dst_argb, int width) {
25758f506332af217882648eed166a257557855b9fdbfbarchard@google.com  asm volatile (
25768f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "subs       %3, #8                         \n"
25778f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "blt        89f                            \n"
25788f506332af217882648eed166a257557855b9fdbfbarchard@google.com    // Blend 8 pixels.
25798f506332af217882648eed166a257557855b9fdbfbarchard@google.com  "8:                                          \n"
25800bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
25818f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 pixels of ARGB0.
25820bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
25838f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "vld4.8     {d4, d5, d6, d7}, [%1]!        \n"  // load 8 pixels of ARGB1.
25848f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
2585d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com    "vmull.u8   q10, d4, d3                    \n"  // db * a
2586d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com    "vmull.u8   q11, d5, d3                    \n"  // dg * a
2587d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com    "vmull.u8   q12, d6, d3                    \n"  // dr * a
2588d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com    "vqrshrn.u16 d20, q10, #8                  \n"  // db >>= 8
2589d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com    "vqrshrn.u16 d21, q11, #8                  \n"  // dg >>= 8
2590d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com    "vqrshrn.u16 d22, q12, #8                  \n"  // dr >>= 8
2591d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com    "vqsub.u8   q2, q2, q10                    \n"  // dbg - dbg * a / 256
2592d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com    "vqsub.u8   d6, d6, d22                    \n"  // dr - dr * a / 256
2593d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com    "vqadd.u8   q0, q0, q2                     \n"  // + sbg
2594d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com    "vqadd.u8   d2, d2, d6                     \n"  // + sr
25958f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "vmov.u8    d3, #255                       \n"  // a = 255
25960bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(2)
25978f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "vst4.8     {d0, d1, d2, d3}, [%2]!        \n"  // store 8 pixels of ARGB.
25988f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "bge        8b                             \n"
25998f506332af217882648eed166a257557855b9fdbfbarchard@google.com
26008f506332af217882648eed166a257557855b9fdbfbarchard@google.com  "89:                                         \n"
26018f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "adds       %3, #8-1                       \n"
26028f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "blt        99f                            \n"
26038f506332af217882648eed166a257557855b9fdbfbarchard@google.com
26048f506332af217882648eed166a257557855b9fdbfbarchard@google.com    // Blend 1 pixels.
26058f506332af217882648eed166a257557855b9fdbfbarchard@google.com  "1:                                          \n"
26060bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
26078f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "vld4.8     {d0[0],d1[0],d2[0],d3[0]}, [%0]! \n"  // load 1 pixel ARGB0.
26080bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
26098f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "vld4.8     {d4[0],d5[0],d6[0],d7[0]}, [%1]! \n"  // load 1 pixel ARGB1.
26108f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "subs       %3, %3, #1                     \n"  // 1 processed per loop.
2611d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com    "vmull.u8   q10, d4, d3                    \n"  // db * a
2612d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com    "vmull.u8   q11, d5, d3                    \n"  // dg * a
2613d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com    "vmull.u8   q12, d6, d3                    \n"  // dr * a
2614d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com    "vqrshrn.u16 d20, q10, #8                  \n"  // db >>= 8
2615d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com    "vqrshrn.u16 d21, q11, #8                  \n"  // dg >>= 8
2616d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com    "vqrshrn.u16 d22, q12, #8                  \n"  // dr >>= 8
2617d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com    "vqsub.u8   q2, q2, q10                    \n"  // dbg - dbg * a / 256
2618d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com    "vqsub.u8   d6, d6, d22                    \n"  // dr - dr * a / 256
2619d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com    "vqadd.u8   q0, q0, q2                     \n"  // + sbg
2620d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com    "vqadd.u8   d2, d2, d6                     \n"  // + sr
26218f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "vmov.u8    d3, #255                       \n"  // a = 255
26220bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(2)
26238f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "vst4.8     {d0[0],d1[0],d2[0],d3[0]}, [%2]! \n"  // store 1 pixel.
26248f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "bge        1b                             \n"
26258f506332af217882648eed166a257557855b9fdbfbarchard@google.com
26268f506332af217882648eed166a257557855b9fdbfbarchard@google.com  "99:                                         \n"
26278f506332af217882648eed166a257557855b9fdbfbarchard@google.com
26288f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "+r"(src_argb0),    // %0
26298f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "+r"(src_argb1),    // %1
26308f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "+r"(dst_argb),     // %2
26318f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "+r"(width)         // %3
26328f506332af217882648eed166a257557855b9fdbfbarchard@google.com  :
2633d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3", "q10", "q11", "q12"
26348f506332af217882648eed166a257557855b9fdbfbarchard@google.com  );
26358f506332af217882648eed166a257557855b9fdbfbarchard@google.com}
26368f506332af217882648eed166a257557855b9fdbfbarchard@google.com
26371d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com// Attenuate 8 pixels at a time.
26381d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.comvoid ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) {
26391d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com  asm volatile (
26401d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com    // Attenuate 8 pixels.
26411d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com  "1:                                          \n"
26420bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
26431d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 pixels of ARGB.
26441d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
26451d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com    "vmull.u8   q10, d0, d3                    \n"  // b * a
26461d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com    "vmull.u8   q11, d1, d3                    \n"  // g * a
26471d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com    "vmull.u8   q12, d2, d3                    \n"  // r * a
26481d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com    "vqrshrn.u16 d0, q10, #8                   \n"  // b >>= 8
26491d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com    "vqrshrn.u16 d1, q11, #8                   \n"  // g >>= 8
26501d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com    "vqrshrn.u16 d2, q12, #8                   \n"  // r >>= 8
26510bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
26521d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com    "vst4.8     {d0, d1, d2, d3}, [%1]!        \n"  // store 8 pixels of ARGB.
26531d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com    "bgt        1b                             \n"
26541d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com  : "+r"(src_argb),   // %0
26551d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com    "+r"(dst_argb),   // %1
26561d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com    "+r"(width)       // %2
26571d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com  :
26581d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com  : "cc", "memory", "q0", "q1", "q10", "q11", "q12"
26591d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com  );
26601d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com}
26611d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com
2662ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com// Quantize 8 ARGB pixels (32 bytes).
2663ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com// dst = (dst * scale >> 16) * interval_size + interval_offset;
2664ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.comvoid ARGBQuantizeRow_NEON(uint8* dst_argb, int scale, int interval_size,
2665ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com                          int interval_offset, int width) {
2666ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com  asm volatile (
2667ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "vdup.u16   q8, %2                         \n"
2668ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "vshr.u16   q8, q8, #1                     \n"  // scale >>= 1
2669ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "vdup.u16   q9, %3                         \n"  // interval multiply.
2670ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "vdup.u16   q10, %4                        \n"  // interval add
2671ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com
2672ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    // 8 pixel loop.
2673ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    ".p2align   2                              \n"
2674ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com  "1:                                          \n"
26750bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
2676ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "vld4.8     {d0, d2, d4, d6}, [%0]         \n"  // load 8 pixels of ARGB.
2677ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "subs       %1, %1, #8                     \n"  // 8 processed per loop.
2678ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "vmovl.u8   q0, d0                         \n"  // b (0 .. 255)
2679ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "vmovl.u8   q1, d2                         \n"
2680ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "vmovl.u8   q2, d4                         \n"
2681ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "vqdmulh.s16 q0, q0, q8                    \n"  // b * scale
2682ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "vqdmulh.s16 q1, q1, q8                    \n"  // g
2683ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "vqdmulh.s16 q2, q2, q8                    \n"  // r
2684ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "vmul.u16   q0, q0, q9                     \n"  // b * interval_size
2685ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "vmul.u16   q1, q1, q9                     \n"  // g
2686ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "vmul.u16   q2, q2, q9                     \n"  // r
2687ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "vadd.u16   q0, q0, q10                    \n"  // b + interval_offset
2688ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "vadd.u16   q1, q1, q10                    \n"  // g
2689ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "vadd.u16   q2, q2, q10                    \n"  // r
2690ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "vqmovn.u16 d0, q0                         \n"
2691ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "vqmovn.u16 d2, q1                         \n"
2692ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "vqmovn.u16 d4, q2                         \n"
26930bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
2694ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "vst4.8     {d0, d2, d4, d6}, [%0]!        \n"  // store 8 pixels of ARGB.
2695ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "bgt        1b                             \n"
2696ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com  : "+r"(dst_argb),       // %0
2697ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "+r"(width)           // %1
2698ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com  : "r"(scale),           // %2
2699ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "r"(interval_size),   // %3
2700ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "r"(interval_offset)  // %4
2701b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9", "q10"
2702b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com  );
2703b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com}
2704b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com
2705b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com// Shade 8 pixels at a time by specified value.
2706b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com// NOTE vqrdmulh.s16 q10, q10, d0[0] must use a scaler register from 0 to 8.
2707fa5d5fb491b7b0a654871295bddbbe4e0e69542cfbarchard@google.com// Rounding in vqrdmulh does +1 to high if high bit of low s16 is set.
2708b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.comvoid ARGBShadeRow_NEON(const uint8* src_argb, uint8* dst_argb, int width,
2709b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com                       uint32 value) {
2710b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com  asm volatile (
2711b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com    "vdup.u32   q0, %3                         \n"  // duplicate scale value.
2712fa5d5fb491b7b0a654871295bddbbe4e0e69542cfbarchard@google.com    "vzip.u8    d0, d1                         \n"  // d0 aarrggbb.
2713fa5d5fb491b7b0a654871295bddbbe4e0e69542cfbarchard@google.com    "vshr.u16   q0, q0, #1                     \n"  // scale / 2.
2714b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com
2715b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com    // 8 pixel loop.
2716b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com    ".p2align   2                              \n"
2717b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com  "1:                                          \n"
27180bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
2719b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com    "vld4.8     {d20, d22, d24, d26}, [%0]!    \n"  // load 8 pixels of ARGB.
2720b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
2721b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com    "vmovl.u8   q10, d20                       \n"  // b (0 .. 255)
2722b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com    "vmovl.u8   q11, d22                       \n"
2723b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com    "vmovl.u8   q12, d24                       \n"
2724b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com    "vmovl.u8   q13, d26                       \n"
2725578c88a9f7114b3ede887b3c6d9a11d8d06b043bfbarchard@google.com    "vqrdmulh.s16 q10, q10, d0[0]              \n"  // b * scale * 2
2726fa5d5fb491b7b0a654871295bddbbe4e0e69542cfbarchard@google.com    "vqrdmulh.s16 q11, q11, d0[1]              \n"  // g
2727fa5d5fb491b7b0a654871295bddbbe4e0e69542cfbarchard@google.com    "vqrdmulh.s16 q12, q12, d0[2]              \n"  // r
2728fa5d5fb491b7b0a654871295bddbbe4e0e69542cfbarchard@google.com    "vqrdmulh.s16 q13, q13, d0[3]              \n"  // a
2729b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com    "vqmovn.u16 d20, q10                       \n"
2730b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com    "vqmovn.u16 d22, q11                       \n"
2731b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com    "vqmovn.u16 d24, q12                       \n"
2732b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com    "vqmovn.u16 d26, q13                       \n"
27330bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
2734b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com    "vst4.8     {d20, d22, d24, d26}, [%1]!    \n"  // store 8 pixels of ARGB.
2735b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com    "bgt        1b                             \n"
2736b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com  : "+r"(src_argb),       // %0
2737b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com    "+r"(dst_argb),       // %1
2738b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com    "+r"(width)           // %2
2739b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com  : "r"(value)            // %3
2740b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com  : "cc", "memory", "q0", "q10", "q11", "q12", "q13"
2741ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com  );
2742ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com}
2743ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com
274482375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels
2745050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com// Similar to ARGBToYJ but stores ARGB.
2746050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com// C code is (15 * b + 75 * g + 38 * r + 64) >> 7;
274782375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.comvoid ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) {
274882375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com  asm volatile (
2749050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "vmov.u8    d24, #15                       \n"  // B * 0.11400 coefficient
2750050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "vmov.u8    d25, #75                       \n"  // G * 0.58700 coefficient
2751050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "vmov.u8    d26, #38                       \n"  // R * 0.29900 coefficient
2752c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
275382375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com  "1:                                          \n"
27540bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
275582375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 ARGB pixels.
275682375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
275782375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com    "vmull.u8   q2, d0, d24                    \n"  // B
275882375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com    "vmlal.u8   q2, d1, d25                    \n"  // G
275982375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com    "vmlal.u8   q2, d2, d26                    \n"  // R
2760050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "vqrshrun.s16 d0, q2, #7                   \n"  // 15 bit to 8 bit B
276182375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com    "vmov       d1, d0                         \n"  // G
276282375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com    "vmov       d2, d0                         \n"  // R
27630bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
276482375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com    "vst4.8     {d0, d1, d2, d3}, [%1]!        \n"  // store 8 ARGB pixels.
276582375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com    "bgt        1b                             \n"
276682375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com  : "+r"(src_argb),  // %0
276782375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com    "+r"(dst_argb),  // %1
276882375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com    "+r"(width)      // %2
276982375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com  :
277082375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q12", "q13"
277182375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com  );
277282375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com}
277382375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com
2774c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com// Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels.
2775c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com//    b = (r * 35 + g * 68 + b * 17) >> 7
2776c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com//    g = (r * 45 + g * 88 + b * 22) >> 7
2777c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com//    r = (r * 50 + g * 98 + b * 24) >> 7
2778c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.comvoid ARGBSepiaRow_NEON(uint8* dst_argb, int width) {
2779c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com  asm volatile (
2780c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "vmov.u8    d20, #17                       \n"  // BB coefficient
2781c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "vmov.u8    d21, #68                       \n"  // BG coefficient
2782c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "vmov.u8    d22, #35                       \n"  // BR coefficient
2783c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "vmov.u8    d24, #22                       \n"  // GB coefficient
2784c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "vmov.u8    d25, #88                       \n"  // GG coefficient
2785c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "vmov.u8    d26, #45                       \n"  // GR coefficient
2786c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "vmov.u8    d28, #24                       \n"  // BB coefficient
2787c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "vmov.u8    d29, #98                       \n"  // BG coefficient
2788c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "vmov.u8    d30, #50                       \n"  // BR coefficient
2789c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
2790c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com  "1:                                          \n"
27910bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
2792c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "vld4.8     {d0, d1, d2, d3}, [%0]         \n"  // load 8 ARGB pixels.
2793c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "subs       %1, %1, #8                     \n"  // 8 processed per loop.
2794c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "vmull.u8   q2, d0, d20                    \n"  // B to Sepia B
2795c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "vmlal.u8   q2, d1, d21                    \n"  // G
2796c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "vmlal.u8   q2, d2, d22                    \n"  // R
2797c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "vmull.u8   q3, d0, d24                    \n"  // B to Sepia G
2798c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "vmlal.u8   q3, d1, d25                    \n"  // G
2799c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "vmlal.u8   q3, d2, d26                    \n"  // R
2800c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "vmull.u8   q8, d0, d28                    \n"  // B to Sepia R
2801c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "vmlal.u8   q8, d1, d29                    \n"  // G
2802c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "vmlal.u8   q8, d2, d30                    \n"  // R
280387adfaa61ea7704874ad3494a3bce3e7364b146afbarchard@google.com    "vqshrn.u16 d0, q2, #7                     \n"  // 16 bit to 8 bit B
280487adfaa61ea7704874ad3494a3bce3e7364b146afbarchard@google.com    "vqshrn.u16 d1, q3, #7                     \n"  // 16 bit to 8 bit G
280587adfaa61ea7704874ad3494a3bce3e7364b146afbarchard@google.com    "vqshrn.u16 d2, q8, #7                     \n"  // 16 bit to 8 bit R
28060bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
2807c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "vst4.8     {d0, d1, d2, d3}, [%0]!        \n"  // store 8 ARGB pixels.
2808c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "bgt        1b                             \n"
2809c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com  : "+r"(dst_argb),  // %0
2810c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "+r"(width)      // %1
2811c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com  :
2812c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3",
2813c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "q10", "q11", "q12", "q13", "q14", "q15"
2814c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com  );
2815c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com}
2816c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com
281762154e53a2820136f6a8cab8cc029622e3b100c1fbarchard@google.com// Tranform 8 ARGB pixels (32 bytes) with color matrix.
2818c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com// TODO(fbarchard): Was same as Sepia except matrix is provided.  This function
2819c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com// needs to saturate.  Consider doing a non-saturating version.
2820c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.comvoid ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb,
2821c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com                             const int8* matrix_argb, int width) {
282262154e53a2820136f6a8cab8cc029622e3b100c1fbarchard@google.com  asm volatile (
28230bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(3)
2824c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com    "vld1.8     {q2}, [%3]                     \n"  // load 3 ARGB vectors.
282562154e53a2820136f6a8cab8cc029622e3b100c1fbarchard@google.com    "vmovl.s8   q0, d4                         \n"  // B,G coefficients s16.
2826c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com    "vmovl.s8   q1, d5                         \n"  // R,A coefficients s16.
282762154e53a2820136f6a8cab8cc029622e3b100c1fbarchard@google.com
2828c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
282962154e53a2820136f6a8cab8cc029622e3b100c1fbarchard@google.com  "1:                                          \n"
28300bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
2831c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com    "vld4.8     {d16, d18, d20, d22}, [%0]!    \n"  // load 8 ARGB pixels.
2832c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
28330cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vmovl.u8   q8, d16                        \n"  // b (0 .. 255) 16 bit
28340cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vmovl.u8   q9, d18                        \n"  // g
28350cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vmovl.u8   q10, d20                       \n"  // r
28360cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vmovl.u8   q15, d22                       \n"  // a
28370cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vmul.s16   q12, q8, d0[0]                 \n"  // B = B * Matrix B
28380cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vmul.s16   q13, q8, d1[0]                 \n"  // G = B * Matrix G
28390cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vmul.s16   q14, q8, d2[0]                 \n"  // R = B * Matrix R
2840c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com    "vmul.s16   q15, q8, d3[0]                 \n"  // A = B * Matrix A
28410cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vmul.s16   q4, q9, d0[1]                  \n"  // B += G * Matrix B
28420cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vmul.s16   q5, q9, d1[1]                  \n"  // G += G * Matrix G
28430cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vmul.s16   q6, q9, d2[1]                  \n"  // R += G * Matrix R
2844c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com    "vmul.s16   q7, q9, d3[1]                  \n"  // A += G * Matrix A
28450cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vqadd.s16  q12, q12, q4                   \n"  // Accumulate B
28460cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vqadd.s16  q13, q13, q5                   \n"  // Accumulate G
28470cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vqadd.s16  q14, q14, q6                   \n"  // Accumulate R
2848c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com    "vqadd.s16  q15, q15, q7                   \n"  // Accumulate A
28490cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vmul.s16   q4, q10, d0[2]                 \n"  // B += R * Matrix B
28500cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vmul.s16   q5, q10, d1[2]                 \n"  // G += R * Matrix G
28510cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vmul.s16   q6, q10, d2[2]                 \n"  // R += R * Matrix R
2852c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com    "vmul.s16   q7, q10, d3[2]                 \n"  // A += R * Matrix A
28530cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vqadd.s16  q12, q12, q4                   \n"  // Accumulate B
28540cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vqadd.s16  q13, q13, q5                   \n"  // Accumulate G
28550cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vqadd.s16  q14, q14, q6                   \n"  // Accumulate R
2856c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com    "vqadd.s16  q15, q15, q7                   \n"  // Accumulate A
28570cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vmul.s16   q4, q15, d0[3]                 \n"  // B += A * Matrix B
28580cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vmul.s16   q5, q15, d1[3]                 \n"  // G += A * Matrix G
28590cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vmul.s16   q6, q15, d2[3]                 \n"  // R += A * Matrix R
2860c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com    "vmul.s16   q7, q15, d3[3]                 \n"  // A += A * Matrix A
28610cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vqadd.s16  q12, q12, q4                   \n"  // Accumulate B
28620cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vqadd.s16  q13, q13, q5                   \n"  // Accumulate G
28630cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vqadd.s16  q14, q14, q6                   \n"  // Accumulate R
2864c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com    "vqadd.s16  q15, q15, q7                   \n"  // Accumulate A
2865c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com    "vqshrun.s16 d16, q12, #6                  \n"  // 16 bit to 8 bit B
2866c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com    "vqshrun.s16 d18, q13, #6                  \n"  // 16 bit to 8 bit G
2867c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com    "vqshrun.s16 d20, q14, #6                  \n"  // 16 bit to 8 bit R
2868c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com    "vqshrun.s16 d22, q15, #6                  \n"  // 16 bit to 8 bit A
28690bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
2870c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com    "vst4.8     {d16, d18, d20, d22}, [%1]!    \n"  // store 8 ARGB pixels.
2871c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com    "bgt        1b                             \n"
2872c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com  : "+r"(src_argb),   // %0
2873c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com    "+r"(dst_argb),   // %1
2874c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com    "+r"(width)       // %2
2875c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com  : "r"(matrix_argb)  // %3
2876c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9",
28770cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "q10", "q11", "q12", "q13", "q14", "q15"
287862154e53a2820136f6a8cab8cc029622e3b100c1fbarchard@google.com  );
287962154e53a2820136f6a8cab8cc029622e3b100c1fbarchard@google.com}
288062154e53a2820136f6a8cab8cc029622e3b100c1fbarchard@google.com
2881512bec91edaea60129d08c2d8053653b9fe51db4fbarchard@google.com// TODO(fbarchard): fix vqshrun in ARGBMultiplyRow_NEON and reenable.
2882512bec91edaea60129d08c2d8053653b9fe51db4fbarchard@google.com#ifdef HAS_ARGBMULTIPLYROW_NEON
28835b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com// Multiply 2 rows of ARGB pixels together, 8 pixels at a time.
28845b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.comvoid ARGBMultiplyRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
28855b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com                          uint8* dst_argb, int width) {
28865b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com  asm volatile (
28875b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    // 8 pixel loop.
2888c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
28895b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com  "1:                                          \n"
28900bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
28915b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 ARGB pixels.
28920bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
28935b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    "vld4.8     {d1, d3, d5, d7}, [%1]!        \n"  // load 8 more ARGB pixels.
28945b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
28955b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    "vmull.u8   q0, d0, d1                     \n"  // multiply B
28965b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    "vmull.u8   q1, d2, d3                     \n"  // multiply G
28975b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    "vmull.u8   q2, d4, d5                     \n"  // multiply R
28985b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    "vmull.u8   q3, d6, d7                     \n"  // multiply A
28996a352141ef2167c204a450cea179c65e4e34cb65fbarchard@google.com    "vrshrn.u16 d0, q0, #8                     \n"  // 16 bit to 8 bit B
29006a352141ef2167c204a450cea179c65e4e34cb65fbarchard@google.com    "vrshrn.u16 d1, q1, #8                     \n"  // 16 bit to 8 bit G
29016a352141ef2167c204a450cea179c65e4e34cb65fbarchard@google.com    "vrshrn.u16 d2, q2, #8                     \n"  // 16 bit to 8 bit R
29026a352141ef2167c204a450cea179c65e4e34cb65fbarchard@google.com    "vrshrn.u16 d3, q3, #8                     \n"  // 16 bit to 8 bit A
29030bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(2)
29045b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    "vst4.8     {d0, d1, d2, d3}, [%2]!        \n"  // store 8 ARGB pixels.
29055b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    "bgt        1b                             \n"
29065b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com
29075b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com  : "+r"(src_argb0),  // %0
29085b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    "+r"(src_argb1),  // %1
29095b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    "+r"(dst_argb),   // %2
29105b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    "+r"(width)       // %3
29115b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com  :
29125b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3"
29135b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com  );
29145b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com}
2915512bec91edaea60129d08c2d8053653b9fe51db4fbarchard@google.com#endif  // HAS_ARGBMULTIPLYROW_NEON
29165b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com
29175b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com// Add 2 rows of ARGB pixels together, 8 pixels at a time.
29185b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.comvoid ARGBAddRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
29195b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com                     uint8* dst_argb, int width) {
29205b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com  asm volatile (
29215b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    // 8 pixel loop.
2922c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
29235b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com  "1:                                          \n"
29240bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
29255b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 ARGB pixels.
29260bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
29275b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    "vld4.8     {d4, d5, d6, d7}, [%1]!        \n"  // load 8 more ARGB pixels.
29285b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
29295b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    "vqadd.u8   q0, q0, q2                     \n"  // add B, G
29305b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    "vqadd.u8   q1, q1, q3                     \n"  // add R, A
29310bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(2)
29325b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    "vst4.8     {d0, d1, d2, d3}, [%2]!        \n"  // store 8 ARGB pixels.
29335b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    "bgt        1b                             \n"
29345b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com
29355b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com  : "+r"(src_argb0),  // %0
29365b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    "+r"(src_argb1),  // %1
29375b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    "+r"(dst_argb),   // %2
29385b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    "+r"(width)       // %3
29395b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com  :
2940573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3"
2941573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com  );
2942573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com}
2943573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com
2944573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com// Subtract 2 rows of ARGB pixels, 8 pixels at a time.
2945573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.comvoid ARGBSubtractRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
2946573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com                          uint8* dst_argb, int width) {
2947573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com  asm volatile (
2948573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com    // 8 pixel loop.
2949c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
2950573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com  "1:                                          \n"
29510bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
2952573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 ARGB pixels.
29530bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
2954573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com    "vld4.8     {d4, d5, d6, d7}, [%1]!        \n"  // load 8 more ARGB pixels.
2955573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
2956573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com    "vqsub.u8   q0, q0, q2                     \n"  // subtract B, G
2957573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com    "vqsub.u8   q1, q1, q3                     \n"  // subtract R, A
29580bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(2)
2959573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com    "vst4.8     {d0, d1, d2, d3}, [%2]!        \n"  // store 8 ARGB pixels.
2960573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com    "bgt        1b                             \n"
2961573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com
2962573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com  : "+r"(src_argb0),  // %0
2963573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com    "+r"(src_argb1),  // %1
2964573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com    "+r"(dst_argb),   // %2
2965573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com    "+r"(width)       // %3
2966573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com  :
2967573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3"
29685b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com  );
29695b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com}
29705b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com
2971c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// Adds Sobel X and Sobel Y and stores Sobel into ARGB.
2972c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// A = 255
2973c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// R = Sobel
2974c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// G = Sobel
2975c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// B = Sobel
2976c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.comvoid SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
2977c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com                     uint8* dst_argb, int width) {
2978c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  asm volatile (
2979c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vmov.u8    d3, #255                       \n"  // alpha
2980c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    // 8 pixel loop.
2981c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
2982c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  "1:                                          \n"
29830bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
2984c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vld1.8     {d0}, [%0]!                    \n"  // load 8 sobelx.
29850bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
2986c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vld1.8     {d1}, [%1]!                    \n"  // load 8 sobely.
2987c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
2988c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vqadd.u8   d0, d0, d1                     \n"  // add
2989c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vmov.u8    d1, d0                         \n"
2990c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vmov.u8    d2, d0                         \n"
29910bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(2)
2992c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vst4.8     {d0, d1, d2, d3}, [%2]!        \n"  // store 8 ARGB pixels.
2993c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "bgt        1b                             \n"
2994c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  : "+r"(src_sobelx),  // %0
2995c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "+r"(src_sobely),  // %1
29968be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com    "+r"(dst_argb),    // %2
29978be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com    "+r"(width)        // %3
29988be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com  :
29998be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com  : "cc", "memory", "q0", "q1"
30008be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com  );
30018be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com}
30028be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com
30038be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com// Adds Sobel X and Sobel Y and stores Sobel into plane.
30048be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.comvoid SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
30058be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com                          uint8* dst_y, int width) {
30068be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com  asm volatile (
30078be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com    // 16 pixel loop.
3008c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
30098be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com  "1:                                          \n"
30100bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
30118be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com    "vld1.8     {q0}, [%0]!                    \n"  // load 16 sobelx.
30120bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
30138be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com    "vld1.8     {q1}, [%1]!                    \n"  // load 16 sobely.
30148be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com    "subs       %3, %3, #16                    \n"  // 16 processed per loop.
30158be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com    "vqadd.u8   q0, q0, q1                     \n"  // add
30160bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(2)
30178be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com    "vst1.8     {q0}, [%2]!                    \n"  // store 16 pixels.
30188be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com    "bgt        1b                             \n"
30198be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com  : "+r"(src_sobelx),  // %0
30208be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com    "+r"(src_sobely),  // %1
30218be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com    "+r"(dst_y),       // %2
30228be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com    "+r"(width)        // %3
3023c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  :
3024c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  : "cc", "memory", "q0", "q1"
3025c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  );
3026c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com}
3027c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com
3028c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// Mixes Sobel X, Sobel Y and Sobel into ARGB.
3029c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// A = 255
3030c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// R = Sobel X
3031c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// G = Sobel
3032c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// B = Sobel Y
3033c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.comvoid SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
3034c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com                     uint8* dst_argb, int width) {
3035c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  asm volatile (
3036c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vmov.u8    d3, #255                       \n"  // alpha
3037c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    // 8 pixel loop.
3038c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
3039c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  "1:                                          \n"
30400bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
3041c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vld1.8     {d2}, [%0]!                    \n"  // load 8 sobelx.
30420bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
3043c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vld1.8     {d0}, [%1]!                    \n"  // load 8 sobely.
3044c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
3045c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vqadd.u8   d1, d0, d2                     \n"  // add
30460bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(2)
3047c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vst4.8     {d0, d1, d2, d3}, [%2]!        \n"  // store 8 ARGB pixels.
3048c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "bgt        1b                             \n"
3049c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  : "+r"(src_sobelx),  // %0
3050c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "+r"(src_sobely),  // %1
3051ff4a84168d731c33ce7dedcfb497376a8669cecafbarchard@google.com    "+r"(dst_argb),    // %2
3052ff4a84168d731c33ce7dedcfb497376a8669cecafbarchard@google.com    "+r"(width)        // %3
3053c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  :
3054c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  : "cc", "memory", "q0", "q1"
3055c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  );
3056c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com}
3057c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com
3058c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// SobelX as a matrix is
3059c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// -1  0  1
3060c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// -2  0  2
3061c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// -1  0  1
3062c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.comvoid SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1,
3063c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com                    const uint8* src_y2, uint8* dst_sobelx, int width) {
3064c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  asm volatile (
3065c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
3066c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  "1:                                          \n"
30670bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
30682c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d0}, [%0],%5                  \n"  // top
30690bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
30702c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d1}, [%0],%6                  \n"
3071c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vsubl.u8   q0, d0, d1                     \n"
30720bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
30732c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d2}, [%1],%5                  \n"  // center * 2
30740bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
30752c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d3}, [%1],%6                  \n"
3076c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vsubl.u8   q1, d2, d3                     \n"
3077c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vadd.s16   q0, q0, q1                     \n"
3078c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vadd.s16   q0, q0, q1                     \n"
30790bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(2)
30802c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d2}, [%2],%5                  \n"  // bottom
30810bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(2)
30822c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d3}, [%2],%6                  \n"
3083c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "subs       %4, %4, #8                     \n"  // 8 pixels
3084c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vsubl.u8   q1, d2, d3                     \n"
3085c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vadd.s16   q0, q0, q1                     \n"
3086c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vabs.s16   q0, q0                         \n"
3087c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vqmovn.u16 d0, q0                         \n"
30880bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(3)
30892c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {d0}, [%3]!                    \n"  // store 8 sobelx
3090c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "bgt        1b                             \n"
3091c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  : "+r"(src_y0),      // %0
3092c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "+r"(src_y1),      // %1
3093c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "+r"(src_y2),      // %2
3094c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "+r"(dst_sobelx),  // %3
3095c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "+r"(width)        // %4
3096c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  : "r"(2),            // %5
3097c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "r"(6)             // %6
3098c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  : "cc", "memory", "q0", "q1"  // Clobber List
3099c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  );
3100c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com}
3101c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com
3102c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// SobelY as a matrix is
3103c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// -1 -2 -1
3104c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com//  0  0  0
3105c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com//  1  2  1
3106c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.comvoid SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1,
3107c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com                    uint8* dst_sobely, int width) {
3108c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  asm volatile (
3109c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
3110c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  "1:                                          \n"
31110bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
31122c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d0}, [%0],%4                  \n"  // left
31130bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
31142c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d1}, [%1],%4                  \n"
3115c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vsubl.u8   q0, d0, d1                     \n"
31160bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
31172c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d2}, [%0],%4                  \n"  // center * 2
31180bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
31192c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d3}, [%1],%4                  \n"
3120c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vsubl.u8   q1, d2, d3                     \n"
3121c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vadd.s16   q0, q0, q1                     \n"
3122c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vadd.s16   q0, q0, q1                     \n"
31230bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(0)
31242c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d2}, [%0],%5                  \n"  // right
31250bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(1)
31262c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d3}, [%1],%5                  \n"
3127c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "subs       %3, %3, #8                     \n"  // 8 pixels
3128c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vsubl.u8   q1, d2, d3                     \n"
3129c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vadd.s16   q0, q0, q1                     \n"
3130c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vabs.s16   q0, q0                         \n"
3131c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vqmovn.u16 d0, q0                         \n"
31320bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com    MEMACCESS(2)
31332c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {d0}, [%2]!                    \n"  // store 8 sobely
3134c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "bgt        1b                             \n"
3135c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  : "+r"(src_y0),      // %0
3136c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "+r"(src_y1),      // %1
3137c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "+r"(dst_sobely),  // %2
3138c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "+r"(width)        // %3
3139c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  : "r"(1),            // %4
3140c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "r"(6)             // %5
3141c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  : "cc", "memory", "q0", "q1"  // Clobber List
3142c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  );
3143c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com}
314419932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com#endif  // __ARM_NEON__
31452d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com
3146fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.com#ifdef __cplusplus
3147fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.com}  // extern "C"
3148fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.com}  // namespace libyuv
3149fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.com#endif
3150