193d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com/*
2b0c97975894a5eebebf9d93147cdd941a3accb63fbarchard@google.com *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
393d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com *
493d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com *  Use of this source code is governed by a BSD-style license
593d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com *  that can be found in the LICENSE file in the root of the source
693d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com *  tree. An additional intellectual property rights grant can be found
7cde587092fef0dbed2c35602f30b79e7b892e766fbarchard@google.com *  in the file PATENTS. All contributing project authors may
893d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com *  be found in the AUTHORS file in the root of the source tree.
993d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com */
1093d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com
11142f6c4ed5eaeec0176f255e64bac8d8c70b42e1fbarchard@google.com#include "libyuv/row.h"
1293d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com
13fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.com#ifdef __cplusplus
14fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.comnamespace libyuv {
15fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.comextern "C" {
16fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.com#endif
17fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.com
182d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com// This module is for GCC Neon
1937ad8b650717568e34a5ac807b63cc9f072c96b6fbarchard@google.com#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \
2037ad8b650717568e34a5ac807b63cc9f072c96b6fbarchard@google.com  !defined(__native_client__)
212d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com
224807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com// Read 8 Y, 4 U and 4 V from 422
234807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com#define READYUV422                                                             \
242c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d0}, [%0]!                    \n"                             \
252c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.32    {d2[0]}, [%1]!                 \n"                             \
262c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.32    {d2[1]}, [%2]!                 \n"
274807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com
28b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com// Read 8 Y, 2 U and 2 V from 422
29b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com#define READYUV411                                                             \
302c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d0}, [%0]!                    \n"                             \
312c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.16    {d2[0]}, [%1]!                 \n"                             \
322c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.16    {d2[1]}, [%2]!                 \n"                             \
33b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    "vmov.u8    d3, d2                         \n"                             \
34b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    "vzip.u8    d2, d3                         \n"
35b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com
36b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com// Read 8 Y, 8 U and 8 V from 444
37b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com#define READYUV444                                                             \
382c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d0}, [%0]!                    \n"                             \
392c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d2}, [%1]!                    \n"                             \
402c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d3}, [%2]!                    \n"                             \
41b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    "vpaddl.u8  q1, q1                         \n"                             \
42b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    "vrshrn.u16 d2, q1, #1                     \n"
43b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com
4400b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com// Read 8 Y, and set 4 U and 4 V to 128
4500b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com#define READYUV400                                                             \
462c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d0}, [%0]!                    \n"                             \
4700b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com    "vmov.u8    d2, #128                       \n"
4800b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com
494807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com// Read 8 Y and 4 UV from NV12
504807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com#define READNV12                                                               \
512c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d0}, [%0]!                    \n"                             \
522c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d2}, [%1]!                    \n"                             \
534807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "vmov.u8    d3, d2                         \n"/* split odd/even uv apart */\
544807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "vuzp.u8    d2, d3                         \n"                             \
55793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "vtrn.u32   d2, d3                         \n"
564807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com
574807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com// Read 8 Y and 4 VU from NV21
584807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com#define READNV21                                                               \
592c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d0}, [%0]!                    \n"                             \
602c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d2}, [%1]!                    \n"                             \
614807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "vmov.u8    d3, d2                         \n"/* split odd/even uv apart */\
624807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "vuzp.u8    d3, d2                         \n"                             \
63793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "vtrn.u32   d2, d3                         \n"
64793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com
65793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com// Read 8 YUY2
66793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com#define READYUY2                                                               \
672c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld2.8     {d0, d2}, [%0]!                \n"                             \
68793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "vmov.u8    d3, d2                         \n"                             \
69793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "vuzp.u8    d2, d3                         \n"                             \
70793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "vtrn.u32   d2, d3                         \n"
71793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com
72793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com// Read 8 UYVY
73793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com#define READUYVY                                                               \
742c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld2.8     {d2, d3}, [%0]!                \n"                             \
75793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "vmov.u8    d0, d3                         \n"                             \
76793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "vmov.u8    d3, d2                         \n"                             \
77793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "vuzp.u8    d2, d3                         \n"                             \
78793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "vtrn.u32   d2, d3                         \n"
794807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com
804807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com#define YUV422TORGB                                                            \
8193d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "veor.u8    d2, d26                        \n"/*subtract 128 from u and v*/\
8293d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vmull.s8   q8, d2, d24                    \n"/*  u/v B/R component      */\
8393d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vmull.s8   q9, d2, d25                    \n"/*  u/v G component        */\
8493d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vmov.u8    d1, #0                         \n"/*  split odd/even y apart */\
8593d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vtrn.u8    d0, d1                         \n"                             \
8693d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vsub.s16   q0, q0, q15                    \n"/*  offset y               */\
8793d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vmul.s16   q0, q0, q14                    \n"                             \
8893d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vadd.s16   d18, d19                       \n"                             \
89c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com    "vqadd.s16  d20, d0, d16                   \n" /* B */                     \
9093d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vqadd.s16  d21, d1, d16                   \n"                             \
91c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com    "vqadd.s16  d22, d0, d17                   \n" /* R */                     \
9293d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vqadd.s16  d23, d1, d17                   \n"                             \
93c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com    "vqadd.s16  d16, d0, d18                   \n" /* G */                     \
9493d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vqadd.s16  d17, d1, d18                   \n"                             \
95d39ce16ba0654179bb51c4ecd46d8943cc24d130fbarchard@google.com    "vqshrun.s16 d0, q10, #6                   \n" /* B */                     \
96d39ce16ba0654179bb51c4ecd46d8943cc24d130fbarchard@google.com    "vqshrun.s16 d1, q11, #6                   \n" /* G */                     \
97d39ce16ba0654179bb51c4ecd46d8943cc24d130fbarchard@google.com    "vqshrun.s16 d2, q8, #6                    \n" /* R */                     \
9893d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vmovl.u8   q10, d0                        \n"/*  set up for reinterleave*/\
9993d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vmovl.u8   q11, d1                        \n"                             \
10093d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vmovl.u8   q8, d2                         \n"                             \
10193d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vtrn.u8    d20, d21                       \n"                             \
10293d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vtrn.u8    d22, d23                       \n"                             \
10393d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vtrn.u8    d16, d17                       \n"                             \
1044807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "vmov.u8    d21, d16                       \n"
10593d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com
106f2aa91a1ac08703d5a22af7fa48c59eba8eb397afbarchard@google.comstatic vec8 kUVToRB  = { 127, 127, 127, 127, 102, 102, 102, 102,
107f2aa91a1ac08703d5a22af7fa48c59eba8eb397afbarchard@google.com                         0, 0, 0, 0, 0, 0, 0, 0 };
108f2aa91a1ac08703d5a22af7fa48c59eba8eb397afbarchard@google.comstatic vec8 kUVToG = { -25, -25, -25, -25, -52, -52, -52, -52,
109f2aa91a1ac08703d5a22af7fa48c59eba8eb397afbarchard@google.com                       0, 0, 0, 0, 0, 0, 0, 0 };
11093d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com
111b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.comvoid I444ToARGBRow_NEON(const uint8* src_y,
112b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com                        const uint8* src_u,
113b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com                        const uint8* src_v,
114b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com                        uint8* dst_argb,
115b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com                        int width) {
116b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com  asm volatile (
1172c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d24}, [%5]                    \n"
1182c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d25}, [%6]                    \n"
119b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    "vmov.u8    d26, #128                      \n"
120b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    "vmov.u16   q14, #74                       \n"
121b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    "vmov.u16   q15, #16                       \n"
122c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
123b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com  "1:                                          \n"
124b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    READYUV444
125b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    YUV422TORGB
126b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    "subs       %4, %4, #8                     \n"
127b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    "vmov.u8    d23, #255                      \n"
128b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    "vst4.8     {d20, d21, d22, d23}, [%3]!    \n"
129b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    "bgt        1b                             \n"
130b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    : "+r"(src_y),     // %0
131b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com      "+r"(src_u),     // %1
132b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com      "+r"(src_v),     // %2
133b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com      "+r"(dst_argb),  // %3
134b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com      "+r"(width)      // %4
135b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    : "r"(&kUVToRB),   // %5
136b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com      "r"(&kUVToG)     // %6
137b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    : "cc", "memory", "q0", "q1", "q2", "q3",
138b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
139b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com  );
140b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com}
141b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com
1429de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.comvoid I422ToARGBRow_NEON(const uint8* src_y,
1439de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                        const uint8* src_u,
1449de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                        const uint8* src_v,
1459de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                        uint8* dst_argb,
146e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com                        int width) {
1475b22506b14367f38c1e31d1aab4dc1132158737ffbarchard@google.com  asm volatile (
1482c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d24}, [%5]                    \n"
1492c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d25}, [%6]                    \n"
15093d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vmov.u8    d26, #128                      \n"
15193d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vmov.u16   q14, #74                       \n"
15293d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vmov.u16   q15, #16                       \n"
153c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
15493d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com  "1:                                          \n"
1554807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    READYUV422
156e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com    YUV422TORGB
1574807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "subs       %4, %4, #8                     \n"
15893d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vmov.u8    d23, #255                      \n"
159dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "vst4.8     {d20, d21, d22, d23}, [%3]!    \n"
16018184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com    "bgt        1b                             \n"
1619de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    : "+r"(src_y),     // %0
1629de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(src_u),     // %1
1639de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(src_v),     // %2
1649de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(dst_argb),  // %3
1659de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(width)      // %4
1669de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    : "r"(&kUVToRB),   // %5
1679de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "r"(&kUVToG)     // %6
16864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    : "cc", "memory", "q0", "q1", "q2", "q3",
16964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
17093d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com  );
17193d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com}
17293d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com
173b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.comvoid I411ToARGBRow_NEON(const uint8* src_y,
174b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com                        const uint8* src_u,
175b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com                        const uint8* src_v,
176b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com                        uint8* dst_argb,
177b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com                        int width) {
178b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com  asm volatile (
1792c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d24}, [%5]                    \n"
1802c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d25}, [%6]                    \n"
181b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    "vmov.u8    d26, #128                      \n"
182b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    "vmov.u16   q14, #74                       \n"
183b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    "vmov.u16   q15, #16                       \n"
184c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
185b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com  "1:                                          \n"
186b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    READYUV411
187b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    YUV422TORGB
188b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    "subs       %4, %4, #8                     \n"
189b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    "vmov.u8    d23, #255                      \n"
190b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    "vst4.8     {d20, d21, d22, d23}, [%3]!    \n"
191b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    "bgt        1b                             \n"
192b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    : "+r"(src_y),     // %0
193b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com      "+r"(src_u),     // %1
194b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com      "+r"(src_v),     // %2
195b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com      "+r"(dst_argb),  // %3
196b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com      "+r"(width)      // %4
197b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    : "r"(&kUVToRB),   // %5
198b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com      "r"(&kUVToG)     // %6
199b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com    : "cc", "memory", "q0", "q1", "q2", "q3",
200b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
201b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com  );
202b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com}
203b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com
2049de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.comvoid I422ToBGRARow_NEON(const uint8* src_y,
2059de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                        const uint8* src_u,
2069de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                        const uint8* src_v,
2079de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                        uint8* dst_bgra,
208e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com                        int width) {
2095b22506b14367f38c1e31d1aab4dc1132158737ffbarchard@google.com  asm volatile (
2102c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d24}, [%5]                    \n"
2112c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d25}, [%6]                    \n"
21293d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vmov.u8    d26, #128                      \n"
21393d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vmov.u16   q14, #74                       \n"
21493d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vmov.u16   q15, #16                       \n"
215c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
21693d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com  "1:                                          \n"
2174807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    READYUV422
218e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com    YUV422TORGB
2194807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "subs       %4, %4, #8                     \n"
22093d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vswp.u8    d20, d22                       \n"
22193d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vmov.u8    d19, #255                      \n"
222dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "vst4.8     {d19, d20, d21, d22}, [%3]!    \n"
22318184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com    "bgt        1b                             \n"
2249de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    : "+r"(src_y),     // %0
2259de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(src_u),     // %1
2269de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(src_v),     // %2
2279de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(dst_bgra),  // %3
2289de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(width)      // %4
2299de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    : "r"(&kUVToRB),   // %5
2309de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "r"(&kUVToG)     // %6
23164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    : "cc", "memory", "q0", "q1", "q2", "q3",
23264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
23393d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com  );
23493d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com}
23593d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com
2369de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.comvoid I422ToABGRRow_NEON(const uint8* src_y,
2379de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                        const uint8* src_u,
2389de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                        const uint8* src_v,
2399de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                        uint8* dst_abgr,
240e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com                        int width) {
2415b22506b14367f38c1e31d1aab4dc1132158737ffbarchard@google.com  asm volatile (
2422c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d24}, [%5]                    \n"
2432c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d25}, [%6]                    \n"
24493d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vmov.u8    d26, #128                      \n"
24593d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vmov.u16   q14, #74                       \n"
24693d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vmov.u16   q15, #16                       \n"
247c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
24893d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com  "1:                                          \n"
2494807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    READYUV422
250e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com    YUV422TORGB
2514807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "subs       %4, %4, #8                     \n"
25293d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vswp.u8    d20, d22                       \n"
25393d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com    "vmov.u8    d23, #255                      \n"
254dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "vst4.8     {d20, d21, d22, d23}, [%3]!    \n"
25518184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com    "bgt        1b                             \n"
2569de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    : "+r"(src_y),     // %0
2579de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(src_u),     // %1
2589de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(src_v),     // %2
2599de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(dst_abgr),  // %3
2609de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(width)      // %4
2619de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    : "r"(&kUVToRB),   // %5
2629de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "r"(&kUVToG)     // %6
26364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    : "cc", "memory", "q0", "q1", "q2", "q3",
26464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
26593d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com  );
26693d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com}
267fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.com
2689de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.comvoid I422ToRGBARow_NEON(const uint8* src_y,
2699de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                        const uint8* src_u,
2709de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                        const uint8* src_v,
2719de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                        uint8* dst_rgba,
2722d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com                        int width) {
2732d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com  asm volatile (
2742c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d24}, [%5]                    \n"
2752c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d25}, [%6]                    \n"
2762d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com    "vmov.u8    d26, #128                      \n"
2772d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com    "vmov.u16   q14, #74                       \n"
2782d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com    "vmov.u16   q15, #16                       \n"
279c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
2802d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com  "1:                                          \n"
2814807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    READYUV422
2822d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com    YUV422TORGB
2834807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "subs       %4, %4, #8                     \n"
2842d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com    "vmov.u8    d19, #255                      \n"
285dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "vst4.8     {d19, d20, d21, d22}, [%3]!    \n"
2862d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com    "bgt        1b                             \n"
2879de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    : "+r"(src_y),     // %0
2889de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(src_u),     // %1
2899de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(src_v),     // %2
2909de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(dst_rgba),  // %3
2919de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(width)      // %4
2929de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    : "r"(&kUVToRB),   // %5
2939de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "r"(&kUVToG)     // %6
29464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    : "cc", "memory", "q0", "q1", "q2", "q3",
29564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
2962d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com  );
2972d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com}
2982d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com
2999de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.comvoid I422ToRGB24Row_NEON(const uint8* src_y,
300834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com                         const uint8* src_u,
301834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com                         const uint8* src_v,
302834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com                         uint8* dst_rgb24,
303834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com                         int width) {
30464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com  asm volatile (
3052c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d24}, [%5]                    \n"
3062c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d25}, [%6]                    \n"
30764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vmov.u8    d26, #128                      \n"
30864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vmov.u16   q14, #74                       \n"
30964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vmov.u16   q15, #16                       \n"
310c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
31164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com  "1:                                          \n"
31264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    READYUV422
31364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    YUV422TORGB
31464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "subs       %4, %4, #8                     \n"
31564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vst3.8     {d20, d21, d22}, [%3]!         \n"
31664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "bgt        1b                             \n"
3179de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    : "+r"(src_y),      // %0
3189de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(src_u),      // %1
3199de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(src_v),      // %2
3209de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(dst_rgb24),  // %3
3219de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(width)       // %4
3229de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    : "r"(&kUVToRB),    // %5
3239de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "r"(&kUVToG)      // %6
32464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    : "cc", "memory", "q0", "q1", "q2", "q3",
32564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
32664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com  );
32764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com}
32864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
3299de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.comvoid I422ToRAWRow_NEON(const uint8* src_y,
3309de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                       const uint8* src_u,
3319de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                       const uint8* src_v,
3329de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                       uint8* dst_raw,
33364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com                       int width) {
33464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com  asm volatile (
3352c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d24}, [%5]                    \n"
3362c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d25}, [%6]                    \n"
33764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vmov.u8    d26, #128                      \n"
33864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vmov.u16   q14, #74                       \n"
33964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vmov.u16   q15, #16                       \n"
340c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
34164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com  "1:                                          \n"
34264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    READYUV422
34364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    YUV422TORGB
34464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "subs       %4, %4, #8                     \n"
34564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vswp.u8    d20, d22                       \n"
34664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vst3.8     {d20, d21, d22}, [%3]!         \n"
34764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "bgt        1b                             \n"
3489de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    : "+r"(src_y),    // %0
3499de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(src_u),    // %1
3509de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(src_v),    // %2
3519de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(dst_raw),  // %3
35264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com      "+r"(width)     // %4
35364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    : "r"(&kUVToRB),  // %5
35464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com      "r"(&kUVToG)    // %6
35564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    : "cc", "memory", "q0", "q1", "q2", "q3",
35664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
35764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com  );
35864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com}
35964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
36011c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com#define ARGBTORGB565                                                           \
36111c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vshr.u8    d20, d20, #3                   \n"  /* B                    */ \
36211c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vshr.u8    d21, d21, #2                   \n"  /* G                    */ \
36311c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vshr.u8    d22, d22, #3                   \n"  /* R                    */ \
36411c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vmovl.u8   q8, d20                        \n"  /* B                    */ \
36511c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vmovl.u8   q9, d21                        \n"  /* G                    */ \
36611c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vmovl.u8   q10, d22                       \n"  /* R                    */ \
36711c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vshl.u16   q9, q9, #5                     \n"  /* G                    */ \
36811c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vshl.u16   q10, q10, #11                  \n"  /* R                    */ \
36911c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vorr       q0, q8, q9                     \n"  /* BG                   */ \
37011c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vorr       q0, q0, q10                    \n"  /* BGR                  */
37111c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com
37215449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.comvoid I422ToRGB565Row_NEON(const uint8* src_y,
373834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com                          const uint8* src_u,
374834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com                          const uint8* src_v,
375834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com                          uint8* dst_rgb565,
376834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com                          int width) {
37715449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com  asm volatile (
3782c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d24}, [%5]                    \n"
3792c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d25}, [%6]                    \n"
38015449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com    "vmov.u8    d26, #128                      \n"
38115449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com    "vmov.u16   q14, #74                       \n"
38215449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com    "vmov.u16   q15, #16                       \n"
383c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
38415449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com  "1:                                          \n"
38515449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com    READYUV422
38615449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com    YUV422TORGB
38715449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com    "subs       %4, %4, #8                     \n"
38811c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    ARGBTORGB565
38915449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com    "vst1.8     {q0}, [%3]!                    \n"  // store 8 pixels RGB565.
39015449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com    "bgt        1b                             \n"
39115449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com    : "+r"(src_y),    // %0
39215449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com      "+r"(src_u),    // %1
39315449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com      "+r"(src_v),    // %2
39415449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com      "+r"(dst_rgb565),  // %3
39515449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com      "+r"(width)     // %4
39615449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com    : "r"(&kUVToRB),  // %5
39715449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com      "r"(&kUVToG)    // %6
39815449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com    : "cc", "memory", "q0", "q1", "q2", "q3",
39915449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
40015449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com  );
40115449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com}
40215449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com
40311c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com#define ARGBTOARGB1555                                                         \
40411c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vshr.u8    q10, q10, #3                   \n"  /* B                    */ \
40511c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vshr.u8    d22, d22, #3                   \n"  /* R                    */ \
40611c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vshr.u8    d23, d23, #7                   \n"  /* A                    */ \
40711c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vmovl.u8   q8, d20                        \n"  /* B                    */ \
40811c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vmovl.u8   q9, d21                        \n"  /* G                    */ \
40911c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vmovl.u8   q10, d22                       \n"  /* R                    */ \
41011c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vmovl.u8   q11, d23                       \n"  /* A                    */ \
41111c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vshl.u16   q9, q9, #5                     \n"  /* G                    */ \
41211c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vshl.u16   q10, q10, #10                  \n"  /* R                    */ \
41311c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vshl.u16   q11, q11, #15                  \n"  /* A                    */ \
41411c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vorr       q0, q8, q9                     \n"  /* BG                   */ \
41511c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vorr       q1, q10, q11                   \n"  /* RA                   */ \
41611c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vorr       q0, q0, q1                     \n"  /* BGRA                 */
41711c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com
41811c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.comvoid I422ToARGB1555Row_NEON(const uint8* src_y,
419834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com                            const uint8* src_u,
420834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com                            const uint8* src_v,
421834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com                            uint8* dst_argb1555,
422834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com                            int width) {
42311c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com  asm volatile (
4242c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d24}, [%5]                    \n"
4252c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d25}, [%6]                    \n"
42611c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vmov.u8    d26, #128                      \n"
42711c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vmov.u16   q14, #74                       \n"
42811c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vmov.u16   q15, #16                       \n"
429c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
43011c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com  "1:                                          \n"
43111c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    READYUV422
43211c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    YUV422TORGB
43311c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "subs       %4, %4, #8                     \n"
43411c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vmov.u8    d23, #255                      \n"
43511c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    ARGBTOARGB1555
43611c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vst1.8     {q0}, [%3]!                    \n"  // store 8 pixels ARGB1555.
43711c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "bgt        1b                             \n"
43811c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    : "+r"(src_y),    // %0
43911c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com      "+r"(src_u),    // %1
44011c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com      "+r"(src_v),    // %2
44111c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com      "+r"(dst_argb1555),  // %3
44211c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com      "+r"(width)     // %4
44311c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    : "r"(&kUVToRB),  // %5
44411c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com      "r"(&kUVToG)    // %6
44511c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    : "cc", "memory", "q0", "q1", "q2", "q3",
44611c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
44711c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com  );
44811c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com}
44911c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com
45011c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com#define ARGBTOARGB4444                                                         \
45111c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vshr.u8    d20, d20, #4                   \n"  /* B                    */ \
45211c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vbic.32    d21, d21, d4                   \n"  /* G                    */ \
45311c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vshr.u8    d22, d22, #4                   \n"  /* R                    */ \
45411c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vbic.32    d23, d23, d4                   \n"  /* A                    */ \
45511c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vorr       d0, d20, d21                   \n"  /* BG                   */ \
45611c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vorr       d1, d22, d23                   \n"  /* RA                   */ \
45711c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vzip.u8    d0, d1                         \n"  /* BGRA                 */
45811c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com
45911c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.comvoid I422ToARGB4444Row_NEON(const uint8* src_y,
460834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com                            const uint8* src_u,
461834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com                            const uint8* src_v,
462834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com                            uint8* dst_argb4444,
463834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com                            int width) {
46411c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com  asm volatile (
4652c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d24}, [%5]                    \n"
4662c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d25}, [%6]                    \n"
46711c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vmov.u8    d26, #128                      \n"
46811c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vmov.u16   q14, #74                       \n"
46911c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vmov.u16   q15, #16                       \n"
47011c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vmov.u8    d4, #0x0f                      \n"  // bits to clear with vbic.
471c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
47211c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com  "1:                                          \n"
47311c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    READYUV422
47411c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    YUV422TORGB
47511c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "subs       %4, %4, #8                     \n"
47611c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vmov.u8    d23, #255                      \n"
47711c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    ARGBTOARGB4444
47811c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vst1.8     {q0}, [%3]!                    \n"  // store 8 pixels ARGB4444.
47911c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "bgt        1b                             \n"
48011c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    : "+r"(src_y),    // %0
48111c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com      "+r"(src_u),    // %1
48211c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com      "+r"(src_v),    // %2
48311c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com      "+r"(dst_argb4444),  // %3
48411c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com      "+r"(width)     // %4
48511c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    : "r"(&kUVToRB),  // %5
48611c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com      "r"(&kUVToG)    // %6
48711c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    : "cc", "memory", "q0", "q1", "q2", "q3",
48811c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
48911c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com  );
49011c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com}
49111c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com
49200b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.comvoid YToARGBRow_NEON(const uint8* src_y,
49300b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com                     uint8* dst_argb,
49400b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com                     int width) {
49500b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com  asm volatile (
4962c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d24}, [%3]                    \n"
4972c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d25}, [%4]                    \n"
49800b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com    "vmov.u8    d26, #128                      \n"
49900b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com    "vmov.u16   q14, #74                       \n"
50000b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com    "vmov.u16   q15, #16                       \n"
501c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
50200b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com  "1:                                          \n"
50300b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com    READYUV400
50400b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com    YUV422TORGB
50500b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com    "subs       %2, %2, #8                     \n"
50600b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com    "vmov.u8    d23, #255                      \n"
50700b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com    "vst4.8     {d20, d21, d22, d23}, [%1]!    \n"
50800b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com    "bgt        1b                             \n"
50900b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com    : "+r"(src_y),     // %0
51000b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com      "+r"(dst_argb),  // %1
51100b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com      "+r"(width)      // %2
51200b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com    : "r"(&kUVToRB),   // %3
51300b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com      "r"(&kUVToG)     // %4
51400b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com    : "cc", "memory", "q0", "q1", "q2", "q3",
51500b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
51600b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com  );
51700b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com}
51800b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com
51900b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.comvoid I400ToARGBRow_NEON(const uint8* src_y,
52000b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com                        uint8* dst_argb,
52100b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com                        int width) {
52200b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com  asm volatile (
523c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
52400b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com    "vmov.u8    d23, #255                      \n"
52500b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com  "1:                                          \n"
5262c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d20}, [%0]!                   \n"
52700b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com    "vmov       d21, d20                       \n"
52800b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com    "vmov       d22, d20                       \n"
52900b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com    "subs       %2, %2, #8                     \n"
53000b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com    "vst4.8     {d20, d21, d22, d23}, [%1]!    \n"
53100b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com    "bgt        1b                             \n"
53200b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com    : "+r"(src_y),     // %0
53300b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com      "+r"(dst_argb),  // %1
53400b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com      "+r"(width)      // %2
53500b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com    :
53600b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com    : "cc", "memory", "d20", "d21", "d22", "d23"
53700b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com  );
53800b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com}
53900b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com
5409de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.comvoid NV12ToARGBRow_NEON(const uint8* src_y,
5419de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                        const uint8* src_uv,
5429de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                        uint8* dst_argb,
5434807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com                        int width) {
5444807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com  asm volatile (
5452c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d24}, [%4]                    \n"
5462c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d25}, [%5]                    \n"
5474807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "vmov.u8    d26, #128                      \n"
5484807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "vmov.u16   q14, #74                       \n"
5494807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "vmov.u16   q15, #16                       \n"
550c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
5514807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com  "1:                                          \n"
5524807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    READNV12
5534807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    YUV422TORGB
5544807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "subs       %3, %3, #8                     \n"
5554807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "vmov.u8    d23, #255                      \n"
5564807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "vst4.8     {d20, d21, d22, d23}, [%2]!    \n"
5574807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "bgt        1b                             \n"
5589de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    : "+r"(src_y),     // %0
5599de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(src_uv),    // %1
5609de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(dst_argb),  // %2
5619de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(width)      // %3
5629de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    : "r"(&kUVToRB),   // %4
5639de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "r"(&kUVToG)     // %5
56464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    : "cc", "memory", "q0", "q1", "q2", "q3",
56564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
5664807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com  );
5674807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com}
5684807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com
5699de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.comvoid NV21ToARGBRow_NEON(const uint8* src_y,
5709de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                        const uint8* src_uv,
5719de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                        uint8* dst_argb,
5724807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com                        int width) {
5734807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com  asm volatile (
5742c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d24}, [%4]                    \n"
5752c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d25}, [%5]                    \n"
5764807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "vmov.u8    d26, #128                      \n"
5774807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "vmov.u16   q14, #74                       \n"
5784807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "vmov.u16   q15, #16                       \n"
579c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
5804807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com  "1:                                          \n"
5814807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    READNV21
5824807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    YUV422TORGB
5834807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "subs       %3, %3, #8                     \n"
5844807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "vmov.u8    d23, #255                      \n"
5854807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "vst4.8     {d20, d21, d22, d23}, [%2]!    \n"
5864807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "bgt        1b                             \n"
5879de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    : "+r"(src_y),     // %0
5889de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(src_uv),    // %1
5899de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(dst_argb),  // %2
5909de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "+r"(width)      // %3
5919de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    : "r"(&kUVToRB),   // %4
5929de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com      "r"(&kUVToG)     // %5
59364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    : "cc", "memory", "q0", "q1", "q2", "q3",
59464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
5954807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com  );
5964807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com}
5974807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com
5989f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.comvoid NV12ToRGB565Row_NEON(const uint8* src_y,
599bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com                          const uint8* src_uv,
600bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com                          uint8* dst_rgb565,
601bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com                          int width) {
6029f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com  asm volatile (
6032c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d24}, [%4]                    \n"
6042c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d25}, [%5]                    \n"
6059f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    "vmov.u8    d26, #128                      \n"
6069f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    "vmov.u16   q14, #74                       \n"
6079f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    "vmov.u16   q15, #16                       \n"
608c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
6099f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com  "1:                                          \n"
6109f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    READNV12
6119f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    YUV422TORGB
6129f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    "subs       %3, %3, #8                     \n"
6139f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    ARGBTORGB565
6149f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    "vst1.8     {q0}, [%2]!                    \n"  // store 8 pixels RGB565.
6159f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    "bgt        1b                             \n"
6169f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    : "+r"(src_y),     // %0
6179f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com      "+r"(src_uv),    // %1
6189f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com      "+r"(dst_rgb565),  // %2
6199f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com      "+r"(width)      // %3
6209f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    : "r"(&kUVToRB),   // %4
6219f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com      "r"(&kUVToG)     // %5
6229f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    : "cc", "memory", "q0", "q1", "q2", "q3",
6239f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
6249f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com  );
6259f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com}
6269f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com
6279f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.comvoid NV21ToRGB565Row_NEON(const uint8* src_y,
628bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com                          const uint8* src_uv,
629bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com                          uint8* dst_rgb565,
630bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com                          int width) {
6319f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com  asm volatile (
6322c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d24}, [%4]                    \n"
6332c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d25}, [%5]                    \n"
6349f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    "vmov.u8    d26, #128                      \n"
6359f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    "vmov.u16   q14, #74                       \n"
6369f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    "vmov.u16   q15, #16                       \n"
637c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
6389f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com  "1:                                          \n"
6399f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    READNV21
6409f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    YUV422TORGB
6419f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    "subs       %3, %3, #8                     \n"
6429f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    ARGBTORGB565
6439f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    "vst1.8     {q0}, [%2]!                    \n"  // store 8 pixels RGB565.
6449f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    "bgt        1b                             \n"
6459f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    : "+r"(src_y),     // %0
6469f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com      "+r"(src_uv),    // %1
6479f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com      "+r"(dst_rgb565),  // %2
6489f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com      "+r"(width)      // %3
6499f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    : "r"(&kUVToRB),   // %4
6509f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com      "r"(&kUVToG)     // %5
6519f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com    : "cc", "memory", "q0", "q1", "q2", "q3",
6529f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
6539f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com  );
6549f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com}
6559f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com
656793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.comvoid YUY2ToARGBRow_NEON(const uint8* src_yuy2,
657793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com                        uint8* dst_argb,
658793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com                        int width) {
659793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com  asm volatile (
6602c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d24}, [%3]                    \n"
6612c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d25}, [%4]                    \n"
662793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "vmov.u8    d26, #128                      \n"
663793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "vmov.u16   q14, #74                       \n"
664793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "vmov.u16   q15, #16                       \n"
665c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
666793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com  "1:                                          \n"
667793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    READYUY2
668793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    YUV422TORGB
669793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "subs       %2, %2, #8                     \n"
670793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "vmov.u8    d23, #255                      \n"
671793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "vst4.8     {d20, d21, d22, d23}, [%1]!    \n"
672793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "bgt        1b                             \n"
673793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    : "+r"(src_yuy2),  // %0
674793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com      "+r"(dst_argb),  // %1
675793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com      "+r"(width)      // %2
676793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    : "r"(&kUVToRB),   // %3
677793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com      "r"(&kUVToG)     // %4
678793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    : "cc", "memory", "q0", "q1", "q2", "q3",
679793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
680793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com  );
681793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com}
682793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com
683793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.comvoid UYVYToARGBRow_NEON(const uint8* src_uyvy,
684793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com                        uint8* dst_argb,
685793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com                        int width) {
686793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com  asm volatile (
6872c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d24}, [%3]                    \n"
6882c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d25}, [%4]                    \n"
689793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "vmov.u8    d26, #128                      \n"
690793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "vmov.u16   q14, #74                       \n"
691793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "vmov.u16   q15, #16                       \n"
692c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
693793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com  "1:                                          \n"
694793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    READUYVY
695793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    YUV422TORGB
696793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "subs       %2, %2, #8                     \n"
697793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "vmov.u8    d23, #255                      \n"
698793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "vst4.8     {d20, d21, d22, d23}, [%1]!    \n"
699793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    "bgt        1b                             \n"
700793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    : "+r"(src_uyvy),  // %0
701793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com      "+r"(dst_argb),  // %1
702793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com      "+r"(width)      // %2
703793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    : "r"(&kUVToRB),   // %3
704793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com      "r"(&kUVToG)     // %4
705793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com    : "cc", "memory", "q0", "q1", "q2", "q3",
706793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
707793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com  );
708793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com}
709793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com
7104a86a836fcde981b6c3fd3f4a216a3253a2d26bcfbarchard@google.com// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v.
711f08ac6bb095348565b5259f2fab95f259ef47edefbarchard@google.comvoid SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
712f08ac6bb095348565b5259f2fab95f259ef47edefbarchard@google.com                     int width) {
7135b22506b14367f38c1e31d1aab4dc1132158737ffbarchard@google.com  asm volatile (
714c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
7152d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com  "1:                                          \n"
7162c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld2.8     {q0, q1}, [%0]!                \n"  // load 16 pairs of UV
7174807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "subs       %3, %3, #16                    \n"  // 16 processed per loop
7182c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {q0}, [%1]!                    \n"  // store U
7192c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {q1}, [%2]!                    \n"  // store V
72018184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com    "bgt        1b                             \n"
7218536b2f389dea8f8b7177f4886d995e3315f12e8fbarchard@google.com    : "+r"(src_uv),  // %0
7228536b2f389dea8f8b7177f4886d995e3315f12e8fbarchard@google.com      "+r"(dst_u),   // %1
7238536b2f389dea8f8b7177f4886d995e3315f12e8fbarchard@google.com      "+r"(dst_v),   // %2
72416a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com      "+r"(width)    // %3  // Output registers
7252d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com    :                       // Input registers
7268f506332af217882648eed166a257557855b9fdbfbarchard@google.com    : "cc", "memory", "q0", "q1"  // Clobber List
7272d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com  );
7282d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com}
7292d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com
73062a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com// Reads 16 U's and V's and writes out 16 pairs of UV.
731f08ac6bb095348565b5259f2fab95f259ef47edefbarchard@google.comvoid MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
732f08ac6bb095348565b5259f2fab95f259ef47edefbarchard@google.com                     int width) {
73362a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com  asm volatile (
734c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
73562a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com  "1:                                          \n"
7362c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {q0}, [%0]!                    \n"  // load U
7372c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {q1}, [%1]!                    \n"  // load V
73862a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com    "subs       %3, %3, #16                    \n"  // 16 processed per loop
739d26b4514726a9a7476f6dfb6730cda2b422bf550fbarchard@google.com    "vst2.u8    {q0, q1}, [%2]!                \n"  // store 16 pairs of UV
74062a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com    "bgt        1b                             \n"
74162a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com    :
74262a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com      "+r"(src_u),   // %0
74362a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com      "+r"(src_v),   // %1
74462a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com      "+r"(dst_uv),  // %2
74562a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com      "+r"(width)    // %3  // Output registers
74662a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com    :                       // Input registers
7478f506332af217882648eed166a257557855b9fdbfbarchard@google.com    : "cc", "memory", "q0", "q1"  // Clobber List
74862a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com  );
74962a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com}
750834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com
7512c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com// Copy multiple of 32.  vld4.8  allow unaligned and is fastest on a15.
75219932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.comvoid CopyRow_NEON(const uint8* src, uint8* dst, int count) {
7535b22506b14367f38c1e31d1aab4dc1132158737ffbarchard@google.com  asm volatile (
754c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
75519932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com  "1:                                          \n"
7562c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 32
75762a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com    "subs       %2, %2, #32                    \n"  // 32 processed per loop
7582c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {d0, d1, d2, d3}, [%1]!        \n"  // store 32
75918184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com    "bgt        1b                             \n"
7603e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  : "+r"(src),   // %0
7613e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "+r"(dst),   // %1
7623e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "+r"(count)  // %2  // Output registers
7633e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  :                     // Input registers
7648f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1"  // Clobber List
76519932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com  );
76619932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com}
76719932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com
76864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com// SetRow8 writes 'count' bytes using a 32 bit value repeated.
769f08ac6bb095348565b5259f2fab95f259ef47edefbarchard@google.comvoid SetRow_NEON(uint8* dst, uint32 v32, int count) {
7703e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  asm volatile (
7714807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "vdup.u32  q0, %2                          \n"  // duplicate 4 ints
7724807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "1:                                        \n"
7734807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "subs      %1, %1, #16                     \n"  // 16 bytes per loop
7742c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8    {q0}, [%0]!                     \n"  // store
7754807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "bgt       1b                              \n"
7763e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  : "+r"(dst),   // %0
7773e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "+r"(count)  // %1
7783e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  : "r"(v32)     // %2
7798f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0"
7803e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  );
7814807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com}
7824807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com
7834807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com// TODO(fbarchard): Make fully assembler
78464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com// SetRow32 writes 'count' words using a 32 bit value repeated.
785f08ac6bb095348565b5259f2fab95f259ef47edefbarchard@google.comvoid ARGBSetRows_NEON(uint8* dst, uint32 v32, int width,
7861b40d8caa2811759aa5fa87f2e23061d26f8968cfbarchard@google.com                      int dst_stride, int height) {
7874807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com  for (int y = 0; y < height; ++y) {
788f08ac6bb095348565b5259f2fab95f259ef47edefbarchard@google.com    SetRow_NEON(dst, v32, width << 2);
7894807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    dst += dst_stride;
7904807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com  }
7914807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com}
7924807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com
79316a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.comvoid MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
7945b22506b14367f38c1e31d1aab4dc1132158737ffbarchard@google.com  asm volatile (
7953e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    // Start at end of source row.
7963e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "mov        r3, #-16                       \n"
7973e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "add        %0, %0, %2                     \n"
7983e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "sub        %0, #16                        \n"
7993e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com
800c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
80182069e7a7108a956d0d78f7e923851925b2827d4fbarchard@google.com  "1:                                          \n"
8023e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "vld1.8     {q0}, [%0], r3                 \n"  // src -= 16
8033e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "subs       %2, #16                        \n"  // 16 pixels per loop.
8043e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "vrev64.8   q0, q0                         \n"
8053e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "vst1.8     {d1}, [%1]!                    \n"  // dst += 16
8063e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "vst1.8     {d0}, [%1]!                    \n"
8073e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "bgt        1b                             \n"
8083e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  : "+r"(src),   // %0
8093e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "+r"(dst),   // %1
8103e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "+r"(width)  // %2
8113e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  :
8128f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "r3", "q0"
81316a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com  );
81416a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com}
81516a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com
816752cb9e057c8c36a251810e57f98f195196fedc6fbarchard@google.comvoid MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
817752cb9e057c8c36a251810e57f98f195196fedc6fbarchard@google.com                      int width) {
8185b22506b14367f38c1e31d1aab4dc1132158737ffbarchard@google.com  asm volatile (
8193e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    // Start at end of source row.
820752cb9e057c8c36a251810e57f98f195196fedc6fbarchard@google.com    "mov        r12, #-16                      \n"
8213e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "add        %0, %0, %3, lsl #1             \n"
8223e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "sub        %0, #16                        \n"
8233e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com
824c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
82582069e7a7108a956d0d78f7e923851925b2827d4fbarchard@google.com  "1:                                          \n"
826752cb9e057c8c36a251810e57f98f195196fedc6fbarchard@google.com    "vld2.8     {d0, d1}, [%0], r12            \n"  // src -= 16
8273e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "subs       %3, #8                         \n"  // 8 pixels per loop.
8283e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "vrev64.8   q0, q0                         \n"
8293e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "vst1.8     {d0}, [%1]!                    \n"  // dst += 8
8303e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "vst1.8     {d1}, [%2]!                    \n"
8313e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "bgt        1b                             \n"
8323e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  : "+r"(src_uv),  // %0
8333e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "+r"(dst_u),   // %1
8343e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "+r"(dst_v),   // %2
8353e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "+r"(width)    // %3
8363e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  :
8378f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "r12", "q0"
83816a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com  );
83916a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com}
8403e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com
8413e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.comvoid ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) {
8423e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  asm volatile (
8433e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    // Start at end of source row.
8443e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "mov        r3, #-16                       \n"
8453e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "add        %0, %0, %2, lsl #2             \n"
8463e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "sub        %0, #16                        \n"
8473e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com
848c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
8493e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  "1:                                          \n"
8503e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "vld1.8     {q0}, [%0], r3                 \n"  // src -= 16
8513e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "subs       %2, #4                         \n"  // 4 pixels per loop.
8523e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "vrev64.32  q0, q0                         \n"
8533e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "vst1.8     {d1}, [%1]!                    \n"  // dst += 16
8543e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "vst1.8     {d0}, [%1]!                    \n"
8553e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "bgt        1b                             \n"
8563e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  : "+r"(src),   // %0
8573e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "+r"(dst),   // %1
8583e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "+r"(width)  // %2
8593e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  :
8608f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "r3", "q0"
8613e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  );
8623e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com}
86316a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com
864797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.comvoid RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix) {
865797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com  asm volatile (
866275913410d879df91edd1648dfee19207870264bfbarchard@google.com    "vmov.u8    d4, #255                       \n"  // Alpha
867c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
868797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com  "1:                                          \n"
869275913410d879df91edd1648dfee19207870264bfbarchard@google.com    "vld3.8     {d1, d2, d3}, [%0]!            \n"  // load 8 pixels of RGB24.
87082069e7a7108a956d0d78f7e923851925b2827d4fbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
8714807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "vst4.8     {d1, d2, d3, d4}, [%1]!        \n"  // store 8 pixels of ARGB.
872797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com    "bgt        1b                             \n"
873797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com  : "+r"(src_rgb24),  // %0
874dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "+r"(dst_argb),   // %1
875dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "+r"(pix)         // %2
876797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com  :
8778f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "d1", "d2", "d3", "d4"  // Clobber List
878797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com  );
879797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com}
880797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com
881797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.comvoid RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int pix) {
882797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com  asm volatile (
883275913410d879df91edd1648dfee19207870264bfbarchard@google.com    "vmov.u8    d4, #255                       \n"  // Alpha
884c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
885797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com  "1:                                          \n"
886275913410d879df91edd1648dfee19207870264bfbarchard@google.com    "vld3.8     {d1, d2, d3}, [%0]!            \n"  // load 8 pixels of RAW.
8874807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
888275913410d879df91edd1648dfee19207870264bfbarchard@google.com    "vswp.u8    d1, d3                         \n"  // swap R, B
889275913410d879df91edd1648dfee19207870264bfbarchard@google.com    "vst4.8     {d1, d2, d3, d4}, [%1]!        \n"  // store 8 pixels of ARGB.
890797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com    "bgt        1b                             \n"
891dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  : "+r"(src_raw),   // %0
892797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com    "+r"(dst_argb),  // %1
893797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com    "+r"(pix)        // %2
894797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com  :
8958f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "d1", "d2", "d3", "d4"  // Clobber List
896797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com  );
897797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com}
898797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com
899bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com#define RGB565TOARGB                                                           \
900f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vshrn.u16  d6, q0, #5                     \n"  /* G xxGGGGGG           */ \
901f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vuzp.u8    d0, d1                         \n"  /* d0 xxxBBBBB RRRRRxxx */ \
902f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vshl.u8    d6, d6, #2                     \n"  /* G GGGGGG00 upper 6   */ \
903f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vshr.u8    d1, d1, #3                     \n"  /* R 000RRRRR lower 5   */ \
904f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vshl.u8    q0, q0, #3                     \n"  /* B,R BBBBB000 upper 5 */ \
905f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vshr.u8    q2, q0, #5                     \n"  /* B,R 00000BBB lower 3 */ \
906f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vorr.u8    d0, d0, d4                     \n"  /* B                    */ \
907f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vshr.u8    d4, d6, #6                     \n"  /* G 000000GG lower 2   */ \
908f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vorr.u8    d2, d1, d5                     \n"  /* R                    */ \
909f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vorr.u8    d1, d4, d6                     \n"  /* G                    */
910bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com
911bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.comvoid RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int pix) {
912bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  asm volatile (
913bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d3, #255                       \n"  // Alpha
914c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
915bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  "1:                                          \n"
9164b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vld1.8     {q0}, [%0]!                    \n"  // load 8 RGB565 pixels.
917bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
918bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    RGB565TOARGB
919bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vst4.8     {d0, d1, d2, d3}, [%1]!        \n"  // store 8 pixels of ARGB.
920bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "bgt        1b                             \n"
921bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  : "+r"(src_rgb565),  // %0
922bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "+r"(dst_argb),    // %1
923bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "+r"(pix)          // %2
924bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  :
9258f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3"  // Clobber List
926bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  );
927bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com}
928bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com
9294b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com#define ARGB1555TOARGB                                                         \
9304b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vshrn.u16  d7, q0, #8                     \n"  /* A Arrrrrxx           */ \
9314b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vshr.u8    d6, d7, #2                     \n"  /* R xxxRRRRR           */ \
9324b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vshrn.u16  d5, q0, #5                     \n"  /* G xxxGGGGG           */ \
9334b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vmovn.u16  d4, q0                         \n"  /* B xxxBBBBB           */ \
9344b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vshr.u8    d7, d7, #7                     \n"  /* A 0000000A           */ \
9354b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vneg.s8    d7, d7                         \n"  /* A AAAAAAAA upper 8   */ \
9364b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vshl.u8    d6, d6, #3                     \n"  /* R RRRRR000 upper 5   */ \
9374b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vshr.u8    q1, q3, #5                     \n"  /* R,A 00000RRR lower 3 */ \
9384b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vshl.u8    q0, q2, #3                     \n"  /* B,G BBBBB000 upper 5 */ \
9394b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vshr.u8    q2, q0, #5                     \n"  /* B,G 00000BBB lower 3 */ \
9404b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vorr.u8    q1, q1, q3                     \n"  /* R,A                  */ \
9414b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vorr.u8    q0, q0, q2                     \n"  /* B,G                  */ \
9424b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com
943522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com// RGB555TOARGB is same as ARGB1555TOARGB but ignores alpha.
944522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com#define RGB555TOARGB                                                           \
945522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vshrn.u16  d6, q0, #5                     \n"  /* G xxxGGGGG           */ \
946522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vuzp.u8    d0, d1                         \n"  /* d0 xxxBBBBB xRRRRRxx */ \
947522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vshl.u8    d6, d6, #3                     \n"  /* G GGGGG000 upper 5   */ \
948522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vshr.u8    d1, d1, #2                     \n"  /* R 00xRRRRR lower 5   */ \
949522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vshl.u8    q0, q0, #3                     \n"  /* B,R BBBBB000 upper 5 */ \
950522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vshr.u8    q2, q0, #5                     \n"  /* B,R 00000BBB lower 3 */ \
951522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vorr.u8    d0, d0, d4                     \n"  /* B                    */ \
952522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vshr.u8    d4, d6, #5                     \n"  /* G 00000GGG lower 3   */ \
953522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vorr.u8    d2, d1, d5                     \n"  /* R                    */ \
954522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vorr.u8    d1, d4, d6                     \n"  /* G                    */
955522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com
9564b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.comvoid ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb,
9574b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com                            int pix) {
9584b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com  asm volatile (
9594b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vmov.u8    d3, #255                       \n"  // Alpha
960c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
9614b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com  "1:                                          \n"
9624b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vld1.8     {q0}, [%0]!                    \n"  // load 8 ARGB1555 pixels.
9634b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
9644b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    ARGB1555TOARGB
9654b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vst4.8     {d0, d1, d2, d3}, [%1]!        \n"  // store 8 pixels of ARGB.
9664b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "bgt        1b                             \n"
9674b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com  : "+r"(src_argb1555),  // %0
9684b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "+r"(dst_argb),    // %1
9694b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "+r"(pix)          // %2
9704b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com  :
9718f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3"  // Clobber List
9724b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com  );
9734b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com}
9744b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com
9754b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com#define ARGB4444TOARGB                                                         \
9764b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vuzp.u8    d0, d1                         \n"  /* d0 BG, d1 RA         */ \
9774b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vshl.u8    q2, q0, #4                     \n"  /* B,R BBBB0000         */ \
9784b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vshr.u8    q1, q0, #4                     \n"  /* G,A 0000GGGG         */ \
9794b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vshr.u8    q0, q2, #4                     \n"  /* B,R 0000BBBB         */ \
9804b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vorr.u8    q0, q0, q2                     \n"  /* B,R BBBBBBBB         */ \
9814b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vshl.u8    q2, q1, #4                     \n"  /* G,A GGGG0000         */ \
9824b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vorr.u8    q1, q1, q2                     \n"  /* G,A GGGGGGGG         */ \
9834b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vswp.u8    d1, d2                         \n"  /* B,R,G,A -> B,G,R,A   */
9844b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com
9854b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.comvoid ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb,
9864b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com                            int pix) {
9874b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com  asm volatile (
9884b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vmov.u8    d3, #255                       \n"  // Alpha
989c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
9904b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com  "1:                                          \n"
9914b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vld1.8     {q0}, [%0]!                    \n"  // load 8 ARGB4444 pixels.
9924b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
9934b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    ARGB4444TOARGB
9944b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "vst4.8     {d0, d1, d2, d3}, [%1]!        \n"  // store 8 pixels of ARGB.
9954b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "bgt        1b                             \n"
9964b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com  : "+r"(src_argb4444),  // %0
9974b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "+r"(dst_argb),    // %1
9984b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com    "+r"(pix)          // %2
9994b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com  :
10008f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1", "q2"  // Clobber List
10014b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com  );
10024b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com}
10034b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com
100464961c01b200a77b4af9629bf1215358ec056f0afbarchard@google.comvoid ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb24, int pix) {
100564961c01b200a77b4af9629bf1215358ec056f0afbarchard@google.com  asm volatile (
1006c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
100764961c01b200a77b4af9629bf1215358ec056f0afbarchard@google.com  "1:                                          \n"
1008275913410d879df91edd1648dfee19207870264bfbarchard@google.com    "vld4.8     {d1, d2, d3, d4}, [%0]!        \n"  // load 8 pixels of ARGB.
100982069e7a7108a956d0d78f7e923851925b2827d4fbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
10104807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "vst3.8     {d1, d2, d3}, [%1]!            \n"  // store 8 pixels of RGB24.
101164961c01b200a77b4af9629bf1215358ec056f0afbarchard@google.com    "bgt        1b                             \n"
101264961c01b200a77b4af9629bf1215358ec056f0afbarchard@google.com  : "+r"(src_argb),   // %0
101364961c01b200a77b4af9629bf1215358ec056f0afbarchard@google.com    "+r"(dst_rgb24),  // %1
101464961c01b200a77b4af9629bf1215358ec056f0afbarchard@google.com    "+r"(pix)         // %2
101564961c01b200a77b4af9629bf1215358ec056f0afbarchard@google.com  :
10168f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "d1", "d2", "d3", "d4"  // Clobber List
101764961c01b200a77b4af9629bf1215358ec056f0afbarchard@google.com  );
101864961c01b200a77b4af9629bf1215358ec056f0afbarchard@google.com}
101964961c01b200a77b4af9629bf1215358ec056f0afbarchard@google.com
10205808cb22ce60bf963e15bfb1a0958cb362f5efbcfbarchard@google.comvoid ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_raw, int pix) {
10215808cb22ce60bf963e15bfb1a0958cb362f5efbcfbarchard@google.com  asm volatile (
1022c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
10235808cb22ce60bf963e15bfb1a0958cb362f5efbcfbarchard@google.com  "1:                                          \n"
1024275913410d879df91edd1648dfee19207870264bfbarchard@google.com    "vld4.8     {d1, d2, d3, d4}, [%0]!        \n"  // load 8 pixels of ARGB.
10254807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
1026275913410d879df91edd1648dfee19207870264bfbarchard@google.com    "vswp.u8    d1, d3                         \n"  // swap R, B
1027275913410d879df91edd1648dfee19207870264bfbarchard@google.com    "vst3.8     {d1, d2, d3}, [%1]!            \n"  // store 8 pixels of RAW.
10285808cb22ce60bf963e15bfb1a0958cb362f5efbcfbarchard@google.com    "bgt        1b                             \n"
10295808cb22ce60bf963e15bfb1a0958cb362f5efbcfbarchard@google.com  : "+r"(src_argb),  // %0
10305808cb22ce60bf963e15bfb1a0958cb362f5efbcfbarchard@google.com    "+r"(dst_raw),   // %1
10315808cb22ce60bf963e15bfb1a0958cb362f5efbcfbarchard@google.com    "+r"(pix)        // %2
10325808cb22ce60bf963e15bfb1a0958cb362f5efbcfbarchard@google.com  :
10338f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "d1", "d2", "d3", "d4"  // Clobber List
10345808cb22ce60bf963e15bfb1a0958cb362f5efbcfbarchard@google.com  );
10355808cb22ce60bf963e15bfb1a0958cb362f5efbcfbarchard@google.com}
10365808cb22ce60bf963e15bfb1a0958cb362f5efbcfbarchard@google.com
1037dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.comvoid YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int pix) {
1038dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  asm volatile (
1039c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
1040dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  "1:                                          \n"
10412c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld2.8     {q0, q1}, [%0]!                \n"  // load 16 pixels of YUY2.
104282069e7a7108a956d0d78f7e923851925b2827d4fbarchard@google.com    "subs       %2, %2, #16                    \n"  // 16 processed per loop.
10432c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {q0}, [%1]!                    \n"  // store 16 pixels of Y.
1044dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "bgt        1b                             \n"
1045dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  : "+r"(src_yuy2),  // %0
1046dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "+r"(dst_y),     // %1
1047dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "+r"(pix)        // %2
1048dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  :
10498f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1"  // Clobber List
1050dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  );
1051dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com}
1052dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com
1053dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.comvoid UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int pix) {
1054dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  asm volatile (
1055c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
1056dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  "1:                                          \n"
10572c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld2.8     {q0, q1}, [%0]!                \n"  // load 16 pixels of UYVY.
105882069e7a7108a956d0d78f7e923851925b2827d4fbarchard@google.com    "subs       %2, %2, #16                    \n"  // 16 processed per loop.
10592c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {q1}, [%1]!                    \n"  // store 16 pixels of Y.
1060dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "bgt        1b                             \n"
1061dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  : "+r"(src_uyvy),  // %0
1062dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "+r"(dst_y),     // %1
1063dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "+r"(pix)        // %2
1064dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  :
10658f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1"  // Clobber List
1066dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  );
1067dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com}
1068dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com
1069dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.comvoid YUY2ToUV422Row_NEON(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v,
1070dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com                         int pix) {
1071dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  asm volatile (
1072c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
1073dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  "1:                                          \n"
1074dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 16 pixels of YUY2.
10754807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "subs       %3, %3, #16                    \n"  // 16 pixels = 8 UVs.
10762c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {d1}, [%1]!                    \n"  // store 8 U.
10772c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {d3}, [%2]!                    \n"  // store 8 V.
1078dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "bgt        1b                             \n"
1079dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  : "+r"(src_yuy2),  // %0
1080dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "+r"(dst_u),     // %1
1081dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "+r"(dst_v),     // %2
1082dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "+r"(pix)        // %3
1083dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  :
10848f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "d0", "d1", "d2", "d3"  // Clobber List
1085dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  );
1086dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com}
1087dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com
1088dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.comvoid UYVYToUV422Row_NEON(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v,
1089dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com                         int pix) {
1090dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  asm volatile (
1091c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
1092dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  "1:                                          \n"
1093dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 16 pixels of UYVY.
10944807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "subs       %3, %3, #16                    \n"  // 16 pixels = 8 UVs.
10952c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {d0}, [%1]!                    \n"  // store 8 U.
10962c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {d2}, [%2]!                    \n"  // store 8 V.
1097dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "bgt        1b                             \n"
1098dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  : "+r"(src_uyvy),  // %0
1099dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "+r"(dst_u),     // %1
1100dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "+r"(dst_v),     // %2
1101dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "+r"(pix)        // %3
1102dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  :
11038f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "d0", "d1", "d2", "d3"  // Clobber List
1104dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  );
1105dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com}
1106dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com
1107dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.comvoid YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2,
1108dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com                      uint8* dst_u, uint8* dst_v, int pix) {
1109dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  asm volatile (
1110dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com    "add        %1, %0, %1                     \n"  // stride + src_yuy2
1111c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
1112dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  "1:                                          \n"
1113dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 16 pixels of YUY2.
11144807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "subs       %4, %4, #16                    \n"  // 16 pixels = 8 UVs.
1115dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "vld4.8     {d4, d5, d6, d7}, [%1]!        \n"  // load next row YUY2.
1116dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "vrhadd.u8  d1, d1, d5                     \n"  // average rows of U
1117dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "vrhadd.u8  d3, d3, d7                     \n"  // average rows of V
11182c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {d1}, [%2]!                    \n"  // store 8 U.
11192c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {d3}, [%3]!                    \n"  // store 8 V.
1120dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "bgt        1b                             \n"
1121cc206dee46f09369d5b4dad79ec662fd4289959cfbarchard@google.com  : "+r"(src_yuy2),     // %0
1122dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "+r"(stride_yuy2),  // %1
1123cc206dee46f09369d5b4dad79ec662fd4289959cfbarchard@google.com    "+r"(dst_u),        // %2
1124cc206dee46f09369d5b4dad79ec662fd4289959cfbarchard@google.com    "+r"(dst_v),        // %3
1125cc206dee46f09369d5b4dad79ec662fd4289959cfbarchard@google.com    "+r"(pix)           // %4
1126dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  :
11278f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7"  // Clobber List
1128dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  );
1129dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com}
1130dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com
1131dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.comvoid UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy,
1132dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com                      uint8* dst_u, uint8* dst_v, int pix) {
1133dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  asm volatile (
1134dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com    "add        %1, %0, %1                     \n"  // stride + src_uyvy
1135c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
1136dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  "1:                                          \n"
1137dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 16 pixels of UYVY.
11384807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com    "subs       %4, %4, #16                    \n"  // 16 pixels = 8 UVs.
1139dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "vld4.8     {d4, d5, d6, d7}, [%1]!        \n"  // load next row UYVY.
1140dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "vrhadd.u8  d0, d0, d4                     \n"  // average rows of U
1141dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "vrhadd.u8  d2, d2, d6                     \n"  // average rows of V
11422c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {d0}, [%2]!                    \n"  // store 8 U.
11432c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {d2}, [%3]!                    \n"  // store 8 V.
1144dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "bgt        1b                             \n"
1145cc206dee46f09369d5b4dad79ec662fd4289959cfbarchard@google.com  : "+r"(src_uyvy),     // %0
1146dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com    "+r"(stride_uyvy),  // %1
1147cc206dee46f09369d5b4dad79ec662fd4289959cfbarchard@google.com    "+r"(dst_u),        // %2
1148cc206dee46f09369d5b4dad79ec662fd4289959cfbarchard@google.com    "+r"(dst_v),        // %3
1149cc206dee46f09369d5b4dad79ec662fd4289959cfbarchard@google.com    "+r"(pix)           // %4
1150dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  :
11518f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7"  // Clobber List
1152dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com  );
1153dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com}
11544807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com
1155e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.comvoid HalfRow_NEON(const uint8* src_uv, int src_uv_stride,
1156e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com                  uint8* dst_uv, int pix) {
1157e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com  asm volatile (
1158e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com    // change the stride to row 2 pointer
1159e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com    "add        %1, %0                         \n"
1160f658aebb7c2e072ad5b94ad82b94b8299649bdf2fbarchard@google.com  "1:                                          \n"
11612c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {q0}, [%0]!                    \n"  // load row 1 16 pixels.
1162e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com    "subs       %3, %3, #16                    \n"  // 16 processed per loop
11632c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {q1}, [%1]!                    \n"  // load row 2 16 pixels.
1164e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com    "vrhadd.u8  q0, q1                         \n"  // average row 1 and 2
11652c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {q0}, [%2]!                    \n"
1166e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com    "bgt        1b                             \n"
11673e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  : "+r"(src_uv),         // %0
11683e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "+r"(src_uv_stride),  // %1
11693e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "+r"(dst_uv),         // %2
11703e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "+r"(pix)             // %3
11713e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  :
11728f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1"  // Clobber List
11733e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  );
1174e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com}
1175e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com
11768d37dd5c205216e0ad13c5091061908cb981c5f9fbarchard@google.com// Select 2 channels from ARGB on alternating pixels.  e.g.  BGBGBGBG
11771096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.comvoid ARGBToBayerRow_NEON(const uint8* src_argb, uint8* dst_bayer,
11781096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com                         uint32 selector, int pix) {
1179f658aebb7c2e072ad5b94ad82b94b8299649bdf2fbarchard@google.com  asm volatile (
1180c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vmov.u32   d6[0], %3                      \n"  // selector
1181f658aebb7c2e072ad5b94ad82b94b8299649bdf2fbarchard@google.com  "1:                                          \n"
11822c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {q0, q1}, [%0]!                \n"  // load row 8 pixels.
1183c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop
1184c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vtbl.8     d4, {d0, d1}, d6               \n"  // look up 4 pixels
1185c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vtbl.8     d5, {d2, d3}, d6               \n"  // look up 4 pixels
1186c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vtrn.u32   d4, d5                         \n"  // combine 8 pixels
11872c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {d4}, [%1]!                    \n"  // store 8.
1188f658aebb7c2e072ad5b94ad82b94b8299649bdf2fbarchard@google.com    "bgt        1b                             \n"
11894f4c8b544cf0d4c30cca0baaa19d8be29c79ed2ffbarchard@google.com  : "+r"(src_argb),   // %0
11904f4c8b544cf0d4c30cca0baaa19d8be29c79ed2ffbarchard@google.com    "+r"(dst_bayer),  // %1
11914f4c8b544cf0d4c30cca0baaa19d8be29c79ed2ffbarchard@google.com    "+r"(pix)         // %2
11924f4c8b544cf0d4c30cca0baaa19d8be29c79ed2ffbarchard@google.com  : "r"(selector)     // %3
1193c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3"  // Clobber List
11941096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com  );
11951096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com}
11961096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com
119708b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com// Select G channels from ARGB.  e.g.  GGGGGGGG
119808b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.comvoid ARGBToBayerGGRow_NEON(const uint8* src_argb, uint8* dst_bayer,
119908b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com                           uint32 /*selector*/, int pix) {
120008b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com  asm volatile (
120108b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com  "1:                                          \n"
120208b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load row 8 pixels.
120308b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop
120408b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com    "vst1.8     {d1}, [%1]!                    \n"  // store 8 G's.
120508b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com    "bgt        1b                             \n"
120608b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com  : "+r"(src_argb),   // %0
120708b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com    "+r"(dst_bayer),  // %1
120808b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com    "+r"(pix)         // %2
120908b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com  :
121008b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com  : "cc", "memory", "q0", "q1"  // Clobber List
121108b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com  );
121208b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com}
121308b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com
12141096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
12151096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.comvoid ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb,
12161096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com                         const uint8* shuffler, int pix) {
12171096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com  asm volatile (
12182c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {q2}, [%3]                     \n"  // shuffler
12191096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com  "1:                                          \n"
12202c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {q0}, [%0]!                    \n"  // load 4 pixels.
12211096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com    "subs       %2, %2, #4                     \n"  // 4 processed per loop
12221096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com    "vtbl.8     d2, {d0, d1}, d4               \n"  // look up 2 first pixels
12231096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com    "vtbl.8     d3, {d0, d1}, d5               \n"  // look up 2 next pixels
12242c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {q1}, [%1]!                    \n"  // store 4.
12251096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com    "bgt        1b                             \n"
12261096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com  : "+r"(src_argb),  // %0
12271096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com    "+r"(dst_argb),  // %1
12281096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com    "+r"(pix)        // %2
12291096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com  : "r"(shuffler)    // %3
12301096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com  : "cc", "memory", "q0", "q1", "q2"  // Clobber List
12313e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  );
12328d37dd5c205216e0ad13c5091061908cb981c5f9fbarchard@google.com}
12338d37dd5c205216e0ad13c5091061908cb981c5f9fbarchard@google.com
12349de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.comvoid I422ToYUY2Row_NEON(const uint8* src_y,
12359de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                        const uint8* src_u,
12369de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                        const uint8* src_v,
12379de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                        uint8* dst_yuy2, int width) {
12389de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com  asm volatile (
1239c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
12409de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com  "1:                                          \n"
12419de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    "vld2.8     {d0, d2}, [%0]!                \n"  // load 16 Ys
12429de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    "vld1.8     {d1}, [%1]!                    \n"  // load 8 Us
12439de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    "vld1.8     {d3}, [%2]!                    \n"  // load 8 Vs
12449de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    "subs       %4, %4, #16                    \n"  // 16 pixels
12452c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst4.8     {d0, d1, d2, d3}, [%3]!        \n"  // Store 8 YUY2/16 pixels.
12469de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    "bgt        1b                             \n"
12473e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  : "+r"(src_y),     // %0
12483e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "+r"(src_u),     // %1
12493e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "+r"(src_v),     // %2
12503e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "+r"(dst_yuy2),  // %3
12513e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "+r"(width)      // %4
12523e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  :
12533e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  : "cc", "memory", "d0", "d1", "d2", "d3"
12549de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com  );
12559de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com}
12569de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com
12579de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.comvoid I422ToUYVYRow_NEON(const uint8* src_y,
12589de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                        const uint8* src_u,
12599de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                        const uint8* src_v,
12609de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com                        uint8* dst_uyvy, int width) {
12619de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com  asm volatile (
1262c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
12639de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com  "1:                                          \n"
12649de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    "vld2.8     {d1, d3}, [%0]!                \n"  // load 16 Ys
12659de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    "vld1.8     {d0}, [%1]!                    \n"  // load 8 Us
12669de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    "vld1.8     {d2}, [%2]!                    \n"  // load 8 Vs
12679de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    "subs       %4, %4, #16                    \n"  // 16 pixels
12682c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst4.8     {d0, d1, d2, d3}, [%3]!        \n"  // Store 8 UYVY/16 pixels.
12699de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com    "bgt        1b                             \n"
12703e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  : "+r"(src_y),     // %0
12713e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "+r"(src_u),     // %1
12723e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "+r"(src_v),     // %2
12733e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "+r"(dst_uyvy),  // %3
12743e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com    "+r"(width)      // %4
12753e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  :
12763e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com  : "cc", "memory", "d0", "d1", "d2", "d3"
12779de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com  );
12789de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com}
12799de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com
12801bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.comvoid ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb565, int pix) {
12811bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com  asm volatile (
1282c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
12831bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com  "1:                                          \n"
128411c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vld4.8     {d20, d21, d22, d23}, [%0]!    \n"  // load 8 pixels of ARGB.
12851bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
128611c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    ARGBTORGB565
12871bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com    "vst1.8     {q0}, [%1]!                    \n"  // store 8 pixels RGB565.
12881bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com    "bgt        1b                             \n"
12891bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com  : "+r"(src_argb),  // %0
12901bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com    "+r"(dst_rgb565),  // %1
12911bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com    "+r"(pix)        // %2
12921bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com  :
12938f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q8", "q9", "q10", "q11"
12941bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com  );
12951bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com}
12961bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com
12971bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.comvoid ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_argb1555,
12981bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com                            int pix) {
12991bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com  asm volatile (
1300c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
13011bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com  "1:                                          \n"
130211c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vld4.8     {d20, d21, d22, d23}, [%0]!    \n"  // load 8 pixels of ARGB.
13031bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
130411c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    ARGBTOARGB1555
13051bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com    "vst1.8     {q0}, [%1]!                    \n"  // store 8 pixels ARGB1555.
13061bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com    "bgt        1b                             \n"
13071bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com  : "+r"(src_argb),  // %0
13081bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com    "+r"(dst_argb1555),  // %1
13091bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com    "+r"(pix)        // %2
13101bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com  :
13118f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q8", "q9", "q10", "q11"
13121bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com  );
13131bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com}
13141bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com
1315bb6bddc9fb4aea694ef26d7761d9fbcba8f5b6c1fbarchard@google.comvoid ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_argb4444,
1316bb6bddc9fb4aea694ef26d7761d9fbcba8f5b6c1fbarchard@google.com                            int pix) {
1317c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com  asm volatile (
1318c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com    "vmov.u8    d4, #0x0f                      \n"  // bits to clear with vbic.
1319c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
1320c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com  "1:                                          \n"
132111c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    "vld4.8     {d20, d21, d22, d23}, [%0]!    \n"  // load 8 pixels of ARGB.
1322c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
132311c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com    ARGBTOARGB4444
1324c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com    "vst1.8     {q0}, [%1]!                    \n"  // store 8 pixels ARGB4444.
1325c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com    "bgt        1b                             \n"
1326bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  : "+r"(src_argb),      // %0
1327c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com    "+r"(dst_argb4444),  // %1
1328bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "+r"(pix)            // %2
1329c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com  :
13308f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q8", "q9", "q10", "q11"
1331c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com  );
1332c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com}
13330908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com
13340908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.comvoid ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
13350908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com  asm volatile (
1336bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d24, #13                       \n"  // B * 0.1016 coefficient
1337bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d25, #65                       \n"  // G * 0.5078 coefficient
1338bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d26, #33                       \n"  // R * 0.2578 coefficient
1339bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d27, #16                       \n"  // Add 16 constant
1340c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
1341bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  "1:                                          \n"
13421dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 ARGB pixels.
1343bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
1344bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmull.u8   q2, d0, d24                    \n"  // B
1345bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmlal.u8   q2, d1, d25                    \n"  // G
1346bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmlal.u8   q2, d2, d26                    \n"  // R
1347bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vqrshrun.s16 d0, q2, #7                   \n"  // 16 bit to 8 bit Y
1348bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vqadd.u8   d0, d27                        \n"
1349bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
1350bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "bgt        1b                             \n"
1351bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  : "+r"(src_argb),  // %0
1352bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "+r"(dst_y),     // %1
1353bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "+r"(pix)        // %2
1354bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  :
13558f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q12", "q13"
1356bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  );
1357bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com}
1358bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com
1359cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.comvoid ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
1360cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com  asm volatile (
1361050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "vmov.u8    d24, #15                       \n"  // B * 0.11400 coefficient
1362050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "vmov.u8    d25, #75                       \n"  // G * 0.58700 coefficient
1363050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "vmov.u8    d26, #38                       \n"  // R * 0.29900 coefficient
1364c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
1365cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com  "1:                                          \n"
1366cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 ARGB pixels.
1367cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
1368cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com    "vmull.u8   q2, d0, d24                    \n"  // B
1369cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com    "vmlal.u8   q2, d1, d25                    \n"  // G
1370cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com    "vmlal.u8   q2, d2, d26                    \n"  // R
1371050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "vqrshrun.s16 d0, q2, #7                   \n"  // 15 bit to 8 bit Y
1372cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
1373cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com    "bgt        1b                             \n"
1374cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com  : "+r"(src_argb),  // %0
1375cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com    "+r"(dst_y),     // %1
1376cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com    "+r"(pix)        // %2
1377cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com  :
1378cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q12", "q13"
1379cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com  );
1380cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com}
1381cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com
1382c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com// 8x1 pixels.
1383c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.comvoid ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
1384c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com                         int pix) {
1385c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com  asm volatile (
1386c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vmov.u8    d24, #112                      \n"  // UB / VR 0.875 coefficient
1387c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vmov.u8    d25, #74                       \n"  // UG -0.5781 coefficient
1388c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vmov.u8    d26, #38                       \n"  // UR -0.2969 coefficient
1389c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vmov.u8    d27, #18                       \n"  // VB -0.1406 coefficient
1390c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vmov.u8    d28, #94                       \n"  // VG -0.7344 coefficient
1391c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vmov.u16   q15, #0x8080                   \n"  // 128.5
1392c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
1393c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com  "1:                                          \n"
1394c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 ARGB pixels.
1395c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
1396c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com    "vmull.u8   q2, d0, d24                    \n"  // B
1397c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com    "vmlsl.u8   q2, d1, d25                    \n"  // G
1398c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com    "vmlsl.u8   q2, d2, d26                    \n"  // R
1399c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com    "vadd.u16   q2, q2, q15                    \n"  // +128 -> unsigned
1400c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com
1401c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com    "vmull.u8   q3, d2, d24                    \n"  // R
1402c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com    "vmlsl.u8   q3, d1, d28                    \n"  // G
1403c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com    "vmlsl.u8   q3, d0, d27                    \n"  // B
1404c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com    "vadd.u16   q3, q3, q15                    \n"  // +128 -> unsigned
1405c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com
1406c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vqshrn.u16  d0, q2, #8                    \n"  // 16 bit to 8 bit U
1407c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vqshrn.u16  d1, q3, #8                    \n"  // 16 bit to 8 bit V
1408c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com
1409c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels U.
1410c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vst1.8     {d1}, [%2]!                    \n"  // store 8 pixels V.
1411c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com    "bgt        1b                             \n"
1412c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com  : "+r"(src_argb),  // %0
1413c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com    "+r"(dst_u),     // %1
1414c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com    "+r"(dst_v),     // %2
1415c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com    "+r"(pix)        // %3
1416c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com  :
14178f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q12", "q13", "q14", "q15"
1418c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com  );
1419c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com}
1420c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com
1421c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com// 16x1 pixels -> 8x1.  pix is number of argb pixels. e.g. 16.
1422c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.comvoid ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
1423c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com                         int pix) {
1424c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com  asm volatile (
1425c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
1426c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
1427c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
1428c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
142976e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
1430c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vmov.u16   q15, #0x8080                   \n"  // 128.5
1431c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
1432c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com  "1:                                          \n"
1433c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 ARGB pixels.
1434c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vld4.8     {d1, d3, d5, d7}, [%0]!        \n"  // load next 8 ARGB pixels.
1435c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com
1436c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vpaddl.u8  q0, q0                         \n"  // B 16 bytes -> 8 shorts.
1437c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vpaddl.u8  q1, q1                         \n"  // G 16 bytes -> 8 shorts.
1438c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vpaddl.u8  q2, q2                         \n"  // R 16 bytes -> 8 shorts.
1439c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com
1440c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "subs       %3, %3, #16                    \n"  // 16 processed per loop.
1441c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vmul.s16   q8, q0, q10                    \n"  // B
1442c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vmls.s16   q8, q1, q11                    \n"  // G
1443c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vmls.s16   q8, q2, q12                    \n"  // R
1444c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vadd.u16   q8, q8, q15                    \n"  // +128 -> unsigned
1445c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com
1446c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vmul.s16   q9, q2, q10                    \n"  // R
1447c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vmls.s16   q9, q1, q14                    \n"  // G
1448c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vmls.s16   q9, q0, q13                    \n"  // B
1449c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vadd.u16   q9, q9, q15                    \n"  // +128 -> unsigned
1450c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com
1451c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vqshrn.u16  d0, q8, #8                    \n"  // 16 bit to 8 bit U
1452c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vqshrn.u16  d1, q9, #8                    \n"  // 16 bit to 8 bit V
1453c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com
1454c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels U.
1455c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "vst1.8     {d1}, [%2]!                    \n"  // store 8 pixels V.
1456c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "bgt        1b                             \n"
1457c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com  : "+r"(src_argb),  // %0
1458c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "+r"(dst_u),     // %1
1459c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "+r"(dst_v),     // %2
1460c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "+r"(pix)        // %3
1461c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com  :
14628f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3",
1463c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
1464c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com  );
1465c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com}
1466c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com
146776e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com// 32x1 pixels -> 8x1.  pix is number of argb pixels. e.g. 32.
146876e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.comvoid ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
146976e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com                         int pix) {
147076e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com  asm volatile (
147106ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
147206ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
147306ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
147406ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
147506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
147676e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vmov.u16   q15, #0x8080                   \n"  // 128.5
1477c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
147876e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com  "1:                                          \n"
147976e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 ARGB pixels.
148076e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vld4.8     {d1, d3, d5, d7}, [%0]!        \n"  // load next 8 ARGB pixels.
148176e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vpaddl.u8  q0, q0                         \n"  // B 16 bytes -> 8 shorts.
148276e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vpaddl.u8  q1, q1                         \n"  // G 16 bytes -> 8 shorts.
148376e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vpaddl.u8  q2, q2                         \n"  // R 16 bytes -> 8 shorts.
148476e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vld4.8     {d8, d10, d12, d14}, [%0]!     \n"  // load 8 more ARGB pixels.
148576e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vld4.8     {d9, d11, d13, d15}, [%0]!     \n"  // load last 8 ARGB pixels.
148676e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vpaddl.u8  q4, q4                         \n"  // B 16 bytes -> 8 shorts.
148776e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vpaddl.u8  q5, q5                         \n"  // G 16 bytes -> 8 shorts.
148876e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vpaddl.u8  q6, q6                         \n"  // R 16 bytes -> 8 shorts.
148906ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com
149076e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vpadd.u16  d0, d0, d1                     \n"  // B 16 shorts -> 8 shorts.
149176e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vpadd.u16  d1, d8, d9                     \n"  // B
149276e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vpadd.u16  d2, d2, d3                     \n"  // G 16 shorts -> 8 shorts.
149376e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vpadd.u16  d3, d10, d11                   \n"  // G
149476e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vpadd.u16  d4, d4, d5                     \n"  // R 16 shorts -> 8 shorts.
149576e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vpadd.u16  d5, d12, d13                   \n"  // R
149606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com
149706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q0, q0, #1                     \n"  // 2x average
149806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q1, q1, #1                     \n"
149906ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q2, q2, #1                     \n"
150006ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com
150176e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "subs       %3, %3, #32                    \n"  // 32 processed per loop.
150276e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vmul.s16   q8, q0, q10                    \n"  // B
150376e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vmls.s16   q8, q1, q11                    \n"  // G
150476e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vmls.s16   q8, q2, q12                    \n"  // R
150576e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vadd.u16   q8, q8, q15                    \n"  // +128 -> unsigned
150676e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vmul.s16   q9, q2, q10                    \n"  // R
150776e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vmls.s16   q9, q1, q14                    \n"  // G
150876e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vmls.s16   q9, q0, q13                    \n"  // B
150976e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vadd.u16   q9, q9, q15                    \n"  // +128 -> unsigned
151076e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vqshrn.u16  d0, q8, #8                    \n"  // 16 bit to 8 bit U
151176e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vqshrn.u16  d1, q9, #8                    \n"  // 16 bit to 8 bit V
151276e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels U.
151376e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vst1.8     {d1}, [%2]!                    \n"  // store 8 pixels V.
151476e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "bgt        1b                             \n"
151576e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com  : "+r"(src_argb),  // %0
151676e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "+r"(dst_u),     // %1
151776e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "+r"(dst_v),     // %2
151876e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "+r"(pix)        // %3
151976e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com  :
15208f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
152176e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
152276e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com  );
152376e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com}
152476e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com
1525dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com// 16x2 pixels -> 8x1.  pix is number of argb pixels. e.g. 16.
152695730719503137a7db61a105bec02220f9ed159efbarchard@google.com#define RGBTOUV(QB, QG, QR) \
152795730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vmul.s16   q8, " #QB ", q10               \n"  /* B                    */ \
152895730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vmls.s16   q8, " #QG ", q11               \n"  /* G                    */ \
152995730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vmls.s16   q8, " #QR ", q12               \n"  /* R                    */ \
1530522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vadd.u16   q8, q8, q15                    \n"  /* +128 -> unsigned     */ \
153195730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vmul.s16   q9, " #QR ", q10               \n"  /* R                    */ \
153295730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vmls.s16   q9, " #QG ", q14               \n"  /* G                    */ \
153395730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vmls.s16   q9, " #QB ", q13               \n"  /* B                    */ \
1534522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vadd.u16   q9, q9, q15                    \n"  /* +128 -> unsigned     */ \
1535522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vqshrn.u16  d0, q8, #8                    \n"  /* 16 bit to 8 bit U    */ \
1536522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vqshrn.u16  d1, q9, #8                    \n"  /* 16 bit to 8 bit V    */
1537522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com
153806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com// TODO(fbarchard): Consider vhadd vertical, then vpaddl horizontal, avoid shr.
1539dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.comvoid ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
1540dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com                      uint8* dst_u, uint8* dst_v, int pix) {
154176e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com  asm volatile (
1542dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com    "add        %1, %0, %1                     \n"  // src_stride + src_argb
154306ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
154406ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
154506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
154606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
154706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
154876e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "vmov.u16   q15, #0x8080                   \n"  // 128.5
1549c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
155076e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com  "1:                                          \n"
1551dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 ARGB pixels.
1552dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com    "vld4.8     {d1, d3, d5, d7}, [%0]!        \n"  // load next 8 ARGB pixels.
1553dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com    "vpaddl.u8  q0, q0                         \n"  // B 16 bytes -> 8 shorts.
1554dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com    "vpaddl.u8  q1, q1                         \n"  // G 16 bytes -> 8 shorts.
1555dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com    "vpaddl.u8  q2, q2                         \n"  // R 16 bytes -> 8 shorts.
1556dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com    "vld4.8     {d8, d10, d12, d14}, [%1]!     \n"  // load 8 more ARGB pixels.
1557dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com    "vld4.8     {d9, d11, d13, d15}, [%1]!     \n"  // load last 8 ARGB pixels.
1558dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com    "vpadal.u8  q0, q4                         \n"  // B 16 bytes -> 8 shorts.
1559dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com    "vpadal.u8  q1, q5                         \n"  // G 16 bytes -> 8 shorts.
1560dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com    "vpadal.u8  q2, q6                         \n"  // R 16 bytes -> 8 shorts.
156106ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com
156206ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q0, q0, #1                     \n"  // 2x average
156306ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q1, q1, #1                     \n"
156406ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q2, q2, #1                     \n"
156506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com
1566dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com    "subs       %4, %4, #16                    \n"  // 32 processed per loop.
156795730719503137a7db61a105bec02220f9ed159efbarchard@google.com    RGBTOUV(q0, q1, q2)
1568dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
1569dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
157076e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "bgt        1b                             \n"
157176e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com  : "+r"(src_argb),  // %0
1572dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com    "+r"(src_stride_argb),  // %1
1573dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com    "+r"(dst_u),     // %2
1574dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com    "+r"(dst_v),     // %3
1575dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com    "+r"(pix)        // %4
157676e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com  :
15778f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
157876e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
157976e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com  );
158076e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com}
158195730719503137a7db61a105bec02220f9ed159efbarchard@google.com
1582050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com// TODO(fbarchard): Subsample match C code.
1583050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.comvoid ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb,
1584050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com                       uint8* dst_u, uint8* dst_v, int pix) {
1585050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com  asm volatile (
1586050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "add        %1, %0, %1                     \n"  // src_stride + src_argb
158706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q10, #127 / 2                  \n"  // UB / VR 0.500 coefficient
158806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q11, #84 / 2                   \n"  // UG -0.33126 coefficient
158906ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q12, #43 / 2                   \n"  // UR -0.16874 coefficient
159006ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q13, #20 / 2                   \n"  // VB -0.08131 coefficient
159106ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q14, #107 / 2                  \n"  // VG -0.41869 coefficient
1592050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "vmov.u16   q15, #0x8080                   \n"  // 128.5
1593c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
1594050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com  "1:                                          \n"
1595050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 ARGB pixels.
1596050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "vld4.8     {d1, d3, d5, d7}, [%0]!        \n"  // load next 8 ARGB pixels.
1597050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "vpaddl.u8  q0, q0                         \n"  // B 16 bytes -> 8 shorts.
1598050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "vpaddl.u8  q1, q1                         \n"  // G 16 bytes -> 8 shorts.
1599050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "vpaddl.u8  q2, q2                         \n"  // R 16 bytes -> 8 shorts.
1600050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "vld4.8     {d8, d10, d12, d14}, [%1]!     \n"  // load 8 more ARGB pixels.
1601050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "vld4.8     {d9, d11, d13, d15}, [%1]!     \n"  // load last 8 ARGB pixels.
1602050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "vpadal.u8  q0, q4                         \n"  // B 16 bytes -> 8 shorts.
1603050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "vpadal.u8  q1, q5                         \n"  // G 16 bytes -> 8 shorts.
1604050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "vpadal.u8  q2, q6                         \n"  // R 16 bytes -> 8 shorts.
160506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com
160606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q0, q0, #1                     \n"  // 2x average
160706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q1, q1, #1                     \n"
160806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q2, q2, #1                     \n"
160906ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com
1610050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "subs       %4, %4, #16                    \n"  // 32 processed per loop.
1611050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    RGBTOUV(q0, q1, q2)
1612050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
1613050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
1614050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "bgt        1b                             \n"
1615050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com  : "+r"(src_argb),  // %0
1616050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "+r"(src_stride_argb),  // %1
1617050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "+r"(dst_u),     // %2
1618050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "+r"(dst_v),     // %3
1619050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "+r"(pix)        // %4
1620050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com  :
1621050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
1622050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
1623050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com  );
1624050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com}
1625050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com
162695730719503137a7db61a105bec02220f9ed159efbarchard@google.comvoid BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra,
162795730719503137a7db61a105bec02220f9ed159efbarchard@google.com                      uint8* dst_u, uint8* dst_v, int pix) {
162895730719503137a7db61a105bec02220f9ed159efbarchard@google.com  asm volatile (
162995730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "add        %1, %0, %1                     \n"  // src_stride + src_bgra
163006ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
163106ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
163206ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
163306ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
163406ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
163595730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vmov.u16   q15, #0x8080                   \n"  // 128.5
1636c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
163795730719503137a7db61a105bec02220f9ed159efbarchard@google.com  "1:                                          \n"
163895730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 BGRA pixels.
163995730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vld4.8     {d1, d3, d5, d7}, [%0]!        \n"  // load next 8 BGRA pixels.
164095730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpaddl.u8  q3, q3                         \n"  // B 16 bytes -> 8 shorts.
164195730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpaddl.u8  q2, q2                         \n"  // G 16 bytes -> 8 shorts.
164295730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpaddl.u8  q1, q1                         \n"  // R 16 bytes -> 8 shorts.
164395730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vld4.8     {d8, d10, d12, d14}, [%1]!     \n"  // load 8 more BGRA pixels.
164495730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vld4.8     {d9, d11, d13, d15}, [%1]!     \n"  // load last 8 BGRA pixels.
164595730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpadal.u8  q3, q7                         \n"  // B 16 bytes -> 8 shorts.
164695730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpadal.u8  q2, q6                         \n"  // G 16 bytes -> 8 shorts.
164795730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpadal.u8  q1, q5                         \n"  // R 16 bytes -> 8 shorts.
164806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com
164906ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q1, q1, #1                     \n"  // 2x average
165006ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q2, q2, #1                     \n"
165106ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q3, q3, #1                     \n"
165206ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com
165395730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "subs       %4, %4, #16                    \n"  // 32 processed per loop.
165495730719503137a7db61a105bec02220f9ed159efbarchard@google.com    RGBTOUV(q3, q2, q1)
165595730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
165695730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
165795730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "bgt        1b                             \n"
165895730719503137a7db61a105bec02220f9ed159efbarchard@google.com  : "+r"(src_bgra),  // %0
165995730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "+r"(src_stride_bgra),  // %1
166095730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "+r"(dst_u),     // %2
166195730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "+r"(dst_v),     // %3
166295730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "+r"(pix)        // %4
166395730719503137a7db61a105bec02220f9ed159efbarchard@google.com  :
16648f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
166595730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
166695730719503137a7db61a105bec02220f9ed159efbarchard@google.com  );
166795730719503137a7db61a105bec02220f9ed159efbarchard@google.com}
166895730719503137a7db61a105bec02220f9ed159efbarchard@google.com
166995730719503137a7db61a105bec02220f9ed159efbarchard@google.comvoid ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr,
167095730719503137a7db61a105bec02220f9ed159efbarchard@google.com                      uint8* dst_u, uint8* dst_v, int pix) {
167195730719503137a7db61a105bec02220f9ed159efbarchard@google.com  asm volatile (
167295730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "add        %1, %0, %1                     \n"  // src_stride + src_abgr
167306ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
167406ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
167506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
167606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
167706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
167895730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vmov.u16   q15, #0x8080                   \n"  // 128.5
1679c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
168095730719503137a7db61a105bec02220f9ed159efbarchard@google.com  "1:                                          \n"
168195730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 ABGR pixels.
168295730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vld4.8     {d1, d3, d5, d7}, [%0]!        \n"  // load next 8 ABGR pixels.
168395730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpaddl.u8  q2, q2                         \n"  // B 16 bytes -> 8 shorts.
168495730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpaddl.u8  q1, q1                         \n"  // G 16 bytes -> 8 shorts.
168595730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpaddl.u8  q0, q0                         \n"  // R 16 bytes -> 8 shorts.
168695730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vld4.8     {d8, d10, d12, d14}, [%1]!     \n"  // load 8 more ABGR pixels.
168795730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vld4.8     {d9, d11, d13, d15}, [%1]!     \n"  // load last 8 ABGR pixels.
168895730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpadal.u8  q2, q6                         \n"  // B 16 bytes -> 8 shorts.
168995730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpadal.u8  q1, q5                         \n"  // G 16 bytes -> 8 shorts.
169095730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpadal.u8  q0, q4                         \n"  // R 16 bytes -> 8 shorts.
169106ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com
169206ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q0, q0, #1                     \n"  // 2x average
169306ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q1, q1, #1                     \n"
169406ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q2, q2, #1                     \n"
169506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com
169695730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "subs       %4, %4, #16                    \n"  // 32 processed per loop.
169795730719503137a7db61a105bec02220f9ed159efbarchard@google.com    RGBTOUV(q2, q1, q0)
169895730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
169995730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
170095730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "bgt        1b                             \n"
170195730719503137a7db61a105bec02220f9ed159efbarchard@google.com  : "+r"(src_abgr),  // %0
170295730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "+r"(src_stride_abgr),  // %1
170395730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "+r"(dst_u),     // %2
170495730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "+r"(dst_v),     // %3
170595730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "+r"(pix)        // %4
170695730719503137a7db61a105bec02220f9ed159efbarchard@google.com  :
17078f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
170895730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
170995730719503137a7db61a105bec02220f9ed159efbarchard@google.com  );
171095730719503137a7db61a105bec02220f9ed159efbarchard@google.com}
171195730719503137a7db61a105bec02220f9ed159efbarchard@google.com
171295730719503137a7db61a105bec02220f9ed159efbarchard@google.comvoid RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba,
171395730719503137a7db61a105bec02220f9ed159efbarchard@google.com                      uint8* dst_u, uint8* dst_v, int pix) {
171495730719503137a7db61a105bec02220f9ed159efbarchard@google.com  asm volatile (
171595730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "add        %1, %0, %1                     \n"  // src_stride + src_rgba
171606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
171706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
171806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
171906ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
172006ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
172195730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vmov.u16   q15, #0x8080                   \n"  // 128.5
1722c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
172395730719503137a7db61a105bec02220f9ed159efbarchard@google.com  "1:                                          \n"
172495730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 RGBA pixels.
172595730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vld4.8     {d1, d3, d5, d7}, [%0]!        \n"  // load next 8 RGBA pixels.
172695730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpaddl.u8  q0, q1                         \n"  // B 16 bytes -> 8 shorts.
172795730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpaddl.u8  q1, q2                         \n"  // G 16 bytes -> 8 shorts.
172895730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpaddl.u8  q2, q3                         \n"  // R 16 bytes -> 8 shorts.
172995730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vld4.8     {d8, d10, d12, d14}, [%1]!     \n"  // load 8 more RGBA pixels.
173095730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vld4.8     {d9, d11, d13, d15}, [%1]!     \n"  // load last 8 RGBA pixels.
173195730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpadal.u8  q0, q5                         \n"  // B 16 bytes -> 8 shorts.
173295730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpadal.u8  q1, q6                         \n"  // G 16 bytes -> 8 shorts.
173395730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpadal.u8  q2, q7                         \n"  // R 16 bytes -> 8 shorts.
173406ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com
173506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q0, q0, #1                     \n"  // 2x average
173606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q1, q1, #1                     \n"
173706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q2, q2, #1                     \n"
173806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com
173995730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "subs       %4, %4, #16                    \n"  // 32 processed per loop.
174095730719503137a7db61a105bec02220f9ed159efbarchard@google.com    RGBTOUV(q0, q1, q2)
174195730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
174295730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
174395730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "bgt        1b                             \n"
174495730719503137a7db61a105bec02220f9ed159efbarchard@google.com  : "+r"(src_rgba),  // %0
174595730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "+r"(src_stride_rgba),  // %1
174695730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "+r"(dst_u),     // %2
174795730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "+r"(dst_v),     // %3
174895730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "+r"(pix)        // %4
174995730719503137a7db61a105bec02220f9ed159efbarchard@google.com  :
17508f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
175195730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
175295730719503137a7db61a105bec02220f9ed159efbarchard@google.com  );
175395730719503137a7db61a105bec02220f9ed159efbarchard@google.com}
175495730719503137a7db61a105bec02220f9ed159efbarchard@google.com
175595730719503137a7db61a105bec02220f9ed159efbarchard@google.comvoid RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24,
175695730719503137a7db61a105bec02220f9ed159efbarchard@google.com                       uint8* dst_u, uint8* dst_v, int pix) {
175795730719503137a7db61a105bec02220f9ed159efbarchard@google.com  asm volatile (
175895730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "add        %1, %0, %1                     \n"  // src_stride + src_rgb24
175906ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
176006ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
176106ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
176206ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
176306ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
176495730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vmov.u16   q15, #0x8080                   \n"  // 128.5
1765c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
176695730719503137a7db61a105bec02220f9ed159efbarchard@google.com  "1:                                          \n"
176795730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vld3.8     {d0, d2, d4}, [%0]!            \n"  // load 8 RGB24 pixels.
176895730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vld3.8     {d1, d3, d5}, [%0]!            \n"  // load next 8 RGB24 pixels.
176995730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpaddl.u8  q0, q0                         \n"  // B 16 bytes -> 8 shorts.
177095730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpaddl.u8  q1, q1                         \n"  // G 16 bytes -> 8 shorts.
177195730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpaddl.u8  q2, q2                         \n"  // R 16 bytes -> 8 shorts.
177295730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vld3.8     {d8, d10, d12}, [%1]!          \n"  // load 8 more RGB24 pixels.
177395730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vld3.8     {d9, d11, d13}, [%1]!          \n"  // load last 8 RGB24 pixels.
177495730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpadal.u8  q0, q4                         \n"  // B 16 bytes -> 8 shorts.
177595730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpadal.u8  q1, q5                         \n"  // G 16 bytes -> 8 shorts.
177695730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpadal.u8  q2, q6                         \n"  // R 16 bytes -> 8 shorts.
177706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com
177806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q0, q0, #1                     \n"  // 2x average
177906ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q1, q1, #1                     \n"
178006ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q2, q2, #1                     \n"
178106ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com
178295730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "subs       %4, %4, #16                    \n"  // 32 processed per loop.
178395730719503137a7db61a105bec02220f9ed159efbarchard@google.com    RGBTOUV(q0, q1, q2)
178495730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
178595730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
178695730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "bgt        1b                             \n"
178795730719503137a7db61a105bec02220f9ed159efbarchard@google.com  : "+r"(src_rgb24),  // %0
178895730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "+r"(src_stride_rgb24),  // %1
178995730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "+r"(dst_u),     // %2
179095730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "+r"(dst_v),     // %3
179195730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "+r"(pix)        // %4
179295730719503137a7db61a105bec02220f9ed159efbarchard@google.com  :
17938f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
179495730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
179595730719503137a7db61a105bec02220f9ed159efbarchard@google.com  );
179695730719503137a7db61a105bec02220f9ed159efbarchard@google.com}
179795730719503137a7db61a105bec02220f9ed159efbarchard@google.com
179895730719503137a7db61a105bec02220f9ed159efbarchard@google.comvoid RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw,
179995730719503137a7db61a105bec02220f9ed159efbarchard@google.com                     uint8* dst_u, uint8* dst_v, int pix) {
180095730719503137a7db61a105bec02220f9ed159efbarchard@google.com  asm volatile (
180195730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "add        %1, %0, %1                     \n"  // src_stride + src_raw
180206ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
180306ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
180406ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
180506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
180606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
180795730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vmov.u16   q15, #0x8080                   \n"  // 128.5
1808c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
180995730719503137a7db61a105bec02220f9ed159efbarchard@google.com  "1:                                          \n"
181095730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vld3.8     {d0, d2, d4}, [%0]!            \n"  // load 8 RAW pixels.
181195730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vld3.8     {d1, d3, d5}, [%0]!            \n"  // load next 8 RAW pixels.
181295730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpaddl.u8  q2, q2                         \n"  // B 16 bytes -> 8 shorts.
181395730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpaddl.u8  q1, q1                         \n"  // G 16 bytes -> 8 shorts.
181495730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpaddl.u8  q0, q0                         \n"  // R 16 bytes -> 8 shorts.
181595730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vld3.8     {d8, d10, d12}, [%1]!          \n"  // load 8 more RAW pixels.
181695730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vld3.8     {d9, d11, d13}, [%1]!          \n"  // load last 8 RAW pixels.
181795730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpadal.u8  q2, q6                         \n"  // B 16 bytes -> 8 shorts.
181895730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpadal.u8  q1, q5                         \n"  // G 16 bytes -> 8 shorts.
181995730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vpadal.u8  q0, q4                         \n"  // R 16 bytes -> 8 shorts.
182006ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com
182106ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q0, q0, #1                     \n"  // 2x average
182206ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q1, q1, #1                     \n"
182306ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q2, q2, #1                     \n"
182406ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com
182595730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "subs       %4, %4, #16                    \n"  // 32 processed per loop.
182695730719503137a7db61a105bec02220f9ed159efbarchard@google.com    RGBTOUV(q2, q1, q0)
182795730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
182895730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
182995730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "bgt        1b                             \n"
183095730719503137a7db61a105bec02220f9ed159efbarchard@google.com  : "+r"(src_raw),  // %0
183195730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "+r"(src_stride_raw),  // %1
183295730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "+r"(dst_u),     // %2
183395730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "+r"(dst_v),     // %3
183495730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "+r"(pix)        // %4
183595730719503137a7db61a105bec02220f9ed159efbarchard@google.com  :
18368f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
183795730719503137a7db61a105bec02220f9ed159efbarchard@google.com    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
183895730719503137a7db61a105bec02220f9ed159efbarchard@google.com  );
183995730719503137a7db61a105bec02220f9ed159efbarchard@google.com}
184095730719503137a7db61a105bec02220f9ed159efbarchard@google.com
1841f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com// 16x2 pixels -> 8x1.  pix is number of argb pixels. e.g. 16.
1842f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.comvoid RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565,
1843f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com                        uint8* dst_u, uint8* dst_v, int pix) {
1844f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com  asm volatile (
1845f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "add        %1, %0, %1                     \n"  // src_stride + src_argb
184606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
184706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
184806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
184906ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
185006ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
1851f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vmov.u16   q15, #0x8080                   \n"  // 128.5
1852c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
1853f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com  "1:                                          \n"
1854f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vld1.8     {q0}, [%0]!                    \n"  // load 8 RGB565 pixels.
1855f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    RGB565TOARGB
1856f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vpaddl.u8  d8, d0                         \n"  // B 8 bytes -> 4 shorts.
1857f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vpaddl.u8  d10, d1                        \n"  // G 8 bytes -> 4 shorts.
1858f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vpaddl.u8  d12, d2                        \n"  // R 8 bytes -> 4 shorts.
1859f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vld1.8     {q0}, [%0]!                    \n"  // next 8 RGB565 pixels.
1860f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    RGB565TOARGB
1861f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vpaddl.u8  d9, d0                         \n"  // B 8 bytes -> 4 shorts.
1862f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vpaddl.u8  d11, d1                        \n"  // G 8 bytes -> 4 shorts.
1863f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vpaddl.u8  d13, d2                        \n"  // R 8 bytes -> 4 shorts.
1864f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com
1865f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vld1.8     {q0}, [%1]!                    \n"  // load 8 RGB565 pixels.
1866f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    RGB565TOARGB
1867f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vpadal.u8  d8, d0                         \n"  // B 8 bytes -> 4 shorts.
1868f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vpadal.u8  d10, d1                        \n"  // G 8 bytes -> 4 shorts.
1869f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vpadal.u8  d12, d2                        \n"  // R 8 bytes -> 4 shorts.
1870f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vld1.8     {q0}, [%1]!                    \n"  // next 8 RGB565 pixels.
1871f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    RGB565TOARGB
1872f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vpadal.u8  d9, d0                         \n"  // B 8 bytes -> 4 shorts.
1873f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vpadal.u8  d11, d1                        \n"  // G 8 bytes -> 4 shorts.
1874f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vpadal.u8  d13, d2                        \n"  // R 8 bytes -> 4 shorts.
1875f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com
187606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q4, q4, #1                     \n"  // 2x average
187706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q5, q5, #1                     \n"
187806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q6, q6, #1                     \n"
187906ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com
1880f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "subs       %4, %4, #16                    \n"  // 16 processed per loop.
1881f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vmul.s16   q8, q4, q10                    \n"  // B
1882f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vmls.s16   q8, q5, q11                    \n"  // G
1883f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vmls.s16   q8, q6, q12                    \n"  // R
1884f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vadd.u16   q8, q8, q15                    \n"  // +128 -> unsigned
1885f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vmul.s16   q9, q6, q10                    \n"  // R
1886f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vmls.s16   q9, q5, q14                    \n"  // G
1887f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vmls.s16   q9, q4, q13                    \n"  // B
1888f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vadd.u16   q9, q9, q15                    \n"  // +128 -> unsigned
1889f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vqshrn.u16  d0, q8, #8                    \n"  // 16 bit to 8 bit U
1890f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vqshrn.u16  d1, q9, #8                    \n"  // 16 bit to 8 bit V
1891f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
1892f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
1893f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "bgt        1b                             \n"
1894f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com  : "+r"(src_rgb565),  // %0
1895f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "+r"(src_stride_rgb565),  // %1
1896f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "+r"(dst_u),     // %2
1897f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "+r"(dst_v),     // %3
1898f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "+r"(pix)        // %4
1899f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com  :
19008f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
1901f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
1902f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com  );
1903f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com}
1904522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com
1905522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com// 16x2 pixels -> 8x1.  pix is number of argb pixels. e.g. 16.
1906522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.comvoid ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555,
1907522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com                        uint8* dst_u, uint8* dst_v, int pix) {
1908522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com  asm volatile (
1909522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "add        %1, %0, %1                     \n"  // src_stride + src_argb
191006ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
191106ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
191206ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
191306ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
191406ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
1915522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vmov.u16   q15, #0x8080                   \n"  // 128.5
1916c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
1917522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com  "1:                                          \n"
1918522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vld1.8     {q0}, [%0]!                    \n"  // load 8 ARGB1555 pixels.
1919522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    RGB555TOARGB
1920522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpaddl.u8  d8, d0                         \n"  // B 8 bytes -> 4 shorts.
1921522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpaddl.u8  d10, d1                        \n"  // G 8 bytes -> 4 shorts.
1922522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpaddl.u8  d12, d2                        \n"  // R 8 bytes -> 4 shorts.
1923522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vld1.8     {q0}, [%0]!                    \n"  // next 8 ARGB1555 pixels.
1924522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    RGB555TOARGB
1925522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpaddl.u8  d9, d0                         \n"  // B 8 bytes -> 4 shorts.
1926522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpaddl.u8  d11, d1                        \n"  // G 8 bytes -> 4 shorts.
1927522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpaddl.u8  d13, d2                        \n"  // R 8 bytes -> 4 shorts.
1928522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com
1929522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vld1.8     {q0}, [%1]!                    \n"  // load 8 ARGB1555 pixels.
1930522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    RGB555TOARGB
1931522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpadal.u8  d8, d0                         \n"  // B 8 bytes -> 4 shorts.
1932522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpadal.u8  d10, d1                        \n"  // G 8 bytes -> 4 shorts.
1933522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpadal.u8  d12, d2                        \n"  // R 8 bytes -> 4 shorts.
1934522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vld1.8     {q0}, [%1]!                    \n"  // next 8 ARGB1555 pixels.
1935522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    RGB555TOARGB
1936522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpadal.u8  d9, d0                         \n"  // B 8 bytes -> 4 shorts.
1937522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpadal.u8  d11, d1                        \n"  // G 8 bytes -> 4 shorts.
1938522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpadal.u8  d13, d2                        \n"  // R 8 bytes -> 4 shorts.
1939522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com
194006ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q4, q4, #1                     \n"  // 2x average
194106ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q5, q5, #1                     \n"
194206ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q6, q6, #1                     \n"
194306ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com
1944522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "subs       %4, %4, #16                    \n"  // 16 processed per loop.
1945522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vmul.s16   q8, q4, q10                    \n"  // B
1946522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vmls.s16   q8, q5, q11                    \n"  // G
1947522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vmls.s16   q8, q6, q12                    \n"  // R
1948522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vadd.u16   q8, q8, q15                    \n"  // +128 -> unsigned
1949522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vmul.s16   q9, q6, q10                    \n"  // R
1950522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vmls.s16   q9, q5, q14                    \n"  // G
1951522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vmls.s16   q9, q4, q13                    \n"  // B
1952522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vadd.u16   q9, q9, q15                    \n"  // +128 -> unsigned
1953522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vqshrn.u16  d0, q8, #8                    \n"  // 16 bit to 8 bit U
1954522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vqshrn.u16  d1, q9, #8                    \n"  // 16 bit to 8 bit V
1955522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
1956522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
1957522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "bgt        1b                             \n"
1958522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com  : "+r"(src_argb1555),  // %0
1959522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "+r"(src_stride_argb1555),  // %1
1960522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "+r"(dst_u),     // %2
1961522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "+r"(dst_v),     // %3
1962522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "+r"(pix)        // %4
1963522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com  :
19648f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
1965522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
1966522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com  );
1967522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com}
1968522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com
1969522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com// 16x2 pixels -> 8x1.  pix is number of argb pixels. e.g. 16.
1970522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.comvoid ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444,
1971522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com                          uint8* dst_u, uint8* dst_v, int pix) {
1972522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com  asm volatile (
1973522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "add        %1, %0, %1                     \n"  // src_stride + src_argb
197406ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
197506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
197606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
197706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
197806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
1979522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vmov.u16   q15, #0x8080                   \n"  // 128.5
1980c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
1981522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com  "1:                                          \n"
1982522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vld1.8     {q0}, [%0]!                    \n"  // load 8 ARGB4444 pixels.
1983522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    ARGB4444TOARGB
1984522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpaddl.u8  d8, d0                         \n"  // B 8 bytes -> 4 shorts.
1985522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpaddl.u8  d10, d1                        \n"  // G 8 bytes -> 4 shorts.
1986522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpaddl.u8  d12, d2                        \n"  // R 8 bytes -> 4 shorts.
1987522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vld1.8     {q0}, [%0]!                    \n"  // next 8 ARGB4444 pixels.
1988522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    ARGB4444TOARGB
1989522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpaddl.u8  d9, d0                         \n"  // B 8 bytes -> 4 shorts.
1990522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpaddl.u8  d11, d1                        \n"  // G 8 bytes -> 4 shorts.
1991522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpaddl.u8  d13, d2                        \n"  // R 8 bytes -> 4 shorts.
1992522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com
1993522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vld1.8     {q0}, [%1]!                    \n"  // load 8 ARGB4444 pixels.
1994522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    ARGB4444TOARGB
1995522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpadal.u8  d8, d0                         \n"  // B 8 bytes -> 4 shorts.
1996522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpadal.u8  d10, d1                        \n"  // G 8 bytes -> 4 shorts.
1997522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpadal.u8  d12, d2                        \n"  // R 8 bytes -> 4 shorts.
1998522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vld1.8     {q0}, [%1]!                    \n"  // next 8 ARGB4444 pixels.
1999522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    ARGB4444TOARGB
2000522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpadal.u8  d9, d0                         \n"  // B 8 bytes -> 4 shorts.
2001522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpadal.u8  d11, d1                        \n"  // G 8 bytes -> 4 shorts.
2002522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vpadal.u8  d13, d2                        \n"  // R 8 bytes -> 4 shorts.
2003522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com
200406ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q4, q4, #1                     \n"  // 2x average
200506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q5, q5, #1                     \n"
200606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com    "vrshr.u16  q6, q6, #1                     \n"
200706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com
2008522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "subs       %4, %4, #16                    \n"  // 16 processed per loop.
2009522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vmul.s16   q8, q4, q10                    \n"  // B
2010522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vmls.s16   q8, q5, q11                    \n"  // G
2011522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vmls.s16   q8, q6, q12                    \n"  // R
2012522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vadd.u16   q8, q8, q15                    \n"  // +128 -> unsigned
2013522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vmul.s16   q9, q6, q10                    \n"  // R
2014522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vmls.s16   q9, q5, q14                    \n"  // G
2015522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vmls.s16   q9, q4, q13                    \n"  // B
2016522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vadd.u16   q9, q9, q15                    \n"  // +128 -> unsigned
2017522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vqshrn.u16  d0, q8, #8                    \n"  // 16 bit to 8 bit U
2018522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vqshrn.u16  d1, q9, #8                    \n"  // 16 bit to 8 bit V
2019522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
2020522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
2021522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "bgt        1b                             \n"
2022522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com  : "+r"(src_argb4444),  // %0
2023522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "+r"(src_stride_argb4444),  // %1
2024522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "+r"(dst_u),     // %2
2025522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "+r"(dst_v),     // %3
2026522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "+r"(pix)        // %4
2027522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com  :
20288f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
2029522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
2030522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com  );
2031522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com}
2032f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com
2033bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.comvoid RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int pix) {
2034bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  asm volatile (
2035bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d24, #13                       \n"  // B * 0.1016 coefficient
2036bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d25, #65                       \n"  // G * 0.5078 coefficient
2037bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d26, #33                       \n"  // R * 0.2578 coefficient
2038bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d27, #16                       \n"  // Add 16 constant
2039c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
2040bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  "1:                                          \n"
20411dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vld1.8     {q0}, [%0]!                    \n"  // load 8 RGB565 pixels.
2042bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
2043bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    RGB565TOARGB
2044bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmull.u8   q2, d0, d24                    \n"  // B
2045bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmlal.u8   q2, d1, d25                    \n"  // G
2046bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmlal.u8   q2, d2, d26                    \n"  // R
2047bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vqrshrun.s16 d0, q2, #7                   \n"  // 16 bit to 8 bit Y
2048bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vqadd.u8   d0, d27                        \n"
2049bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
2050bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "bgt        1b                             \n"
2051bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  : "+r"(src_rgb565),  // %0
2052bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "+r"(dst_y),       // %1
2053bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "+r"(pix)          // %2
2054bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  :
20558f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13"
2056bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  );
2057bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com}
2058bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com
20591dee6250936424ced8722329369da75935d61580fbarchard@google.comvoid ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int pix) {
20601dee6250936424ced8722329369da75935d61580fbarchard@google.com  asm volatile (
20611dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vmov.u8    d24, #13                       \n"  // B * 0.1016 coefficient
20621dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vmov.u8    d25, #65                       \n"  // G * 0.5078 coefficient
20631dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vmov.u8    d26, #33                       \n"  // R * 0.2578 coefficient
20641dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vmov.u8    d27, #16                       \n"  // Add 16 constant
2065c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
20661dee6250936424ced8722329369da75935d61580fbarchard@google.com  "1:                                          \n"
20671dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vld1.8     {q0}, [%0]!                    \n"  // load 8 ARGB1555 pixels.
20681dee6250936424ced8722329369da75935d61580fbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
20691dee6250936424ced8722329369da75935d61580fbarchard@google.com    ARGB1555TOARGB
20701dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vmull.u8   q2, d0, d24                    \n"  // B
20711dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vmlal.u8   q2, d1, d25                    \n"  // G
20721dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vmlal.u8   q2, d2, d26                    \n"  // R
20731dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vqrshrun.s16 d0, q2, #7                   \n"  // 16 bit to 8 bit Y
20741dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vqadd.u8   d0, d27                        \n"
20751dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
20761dee6250936424ced8722329369da75935d61580fbarchard@google.com    "bgt        1b                             \n"
20771dee6250936424ced8722329369da75935d61580fbarchard@google.com  : "+r"(src_argb1555),  // %0
20781dee6250936424ced8722329369da75935d61580fbarchard@google.com    "+r"(dst_y),         // %1
20791dee6250936424ced8722329369da75935d61580fbarchard@google.com    "+r"(pix)            // %2
20801dee6250936424ced8722329369da75935d61580fbarchard@google.com  :
20818f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13"
20821dee6250936424ced8722329369da75935d61580fbarchard@google.com  );
20831dee6250936424ced8722329369da75935d61580fbarchard@google.com}
20841dee6250936424ced8722329369da75935d61580fbarchard@google.com
20851dee6250936424ced8722329369da75935d61580fbarchard@google.comvoid ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int pix) {
20861dee6250936424ced8722329369da75935d61580fbarchard@google.com  asm volatile (
20871dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vmov.u8    d24, #13                       \n"  // B * 0.1016 coefficient
20881dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vmov.u8    d25, #65                       \n"  // G * 0.5078 coefficient
20891dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vmov.u8    d26, #33                       \n"  // R * 0.2578 coefficient
20901dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vmov.u8    d27, #16                       \n"  // Add 16 constant
2091c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
20921dee6250936424ced8722329369da75935d61580fbarchard@google.com  "1:                                          \n"
20931dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vld1.8     {q0}, [%0]!                    \n"  // load 8 ARGB4444 pixels.
20941dee6250936424ced8722329369da75935d61580fbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
20951dee6250936424ced8722329369da75935d61580fbarchard@google.com    ARGB4444TOARGB
20961dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vmull.u8   q2, d0, d24                    \n"  // B
20971dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vmlal.u8   q2, d1, d25                    \n"  // G
20981dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vmlal.u8   q2, d2, d26                    \n"  // R
20991dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vqrshrun.s16 d0, q2, #7                   \n"  // 16 bit to 8 bit Y
21001dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vqadd.u8   d0, d27                        \n"
21011dee6250936424ced8722329369da75935d61580fbarchard@google.com    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
21021dee6250936424ced8722329369da75935d61580fbarchard@google.com    "bgt        1b                             \n"
21031dee6250936424ced8722329369da75935d61580fbarchard@google.com  : "+r"(src_argb4444),  // %0
21041dee6250936424ced8722329369da75935d61580fbarchard@google.com    "+r"(dst_y),         // %1
21051dee6250936424ced8722329369da75935d61580fbarchard@google.com    "+r"(pix)            // %2
21061dee6250936424ced8722329369da75935d61580fbarchard@google.com  :
21078f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13"
21081dee6250936424ced8722329369da75935d61580fbarchard@google.com  );
21091dee6250936424ced8722329369da75935d61580fbarchard@google.com}
2110bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com
2111bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.comvoid BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int pix) {
2112bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  asm volatile (
2113bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d4, #33                        \n"  // R * 0.2578 coefficient
2114bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d5, #65                        \n"  // G * 0.5078 coefficient
2115bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d6, #13                        \n"  // B * 0.1016 coefficient
2116bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d7, #16                        \n"  // Add 16 constant
2117c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
2118bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  "1:                                          \n"
2119bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 pixels of BGRA.
2120bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
2121bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmull.u8   q8, d1, d4                     \n"  // R
2122bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmlal.u8   q8, d2, d5                     \n"  // G
2123bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmlal.u8   q8, d3, d6                     \n"  // B
2124bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vqrshrun.s16 d0, q8, #7                   \n"  // 16 bit to 8 bit Y
2125bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vqadd.u8   d0, d7                         \n"
2126bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
2127bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "bgt        1b                             \n"
2128bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  : "+r"(src_bgra),  // %0
2129bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "+r"(dst_y),     // %1
2130bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "+r"(pix)        // %2
2131bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  :
21328f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
2133bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  );
2134bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com}
2135bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com
2136bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.comvoid ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int pix) {
2137bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  asm volatile (
2138bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d4, #33                        \n"  // R * 0.2578 coefficient
2139bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d5, #65                        \n"  // G * 0.5078 coefficient
2140bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d6, #13                        \n"  // B * 0.1016 coefficient
2141bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d7, #16                        \n"  // Add 16 constant
2142c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
2143bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  "1:                                          \n"
2144bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 pixels of ABGR.
2145bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
2146bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmull.u8   q8, d0, d4                     \n"  // R
2147bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmlal.u8   q8, d1, d5                     \n"  // G
2148bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmlal.u8   q8, d2, d6                     \n"  // B
2149bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vqrshrun.s16 d0, q8, #7                   \n"  // 16 bit to 8 bit Y
2150bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vqadd.u8   d0, d7                         \n"
2151bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
2152bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "bgt        1b                             \n"
2153bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  : "+r"(src_abgr),  // %0
2154bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "+r"(dst_y),  // %1
2155bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "+r"(pix)        // %2
2156bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  :
21578f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
2158bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  );
2159bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com}
2160bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com
2161bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.comvoid RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int pix) {
2162bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  asm volatile (
21630908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com    "vmov.u8    d4, #13                        \n"  // B * 0.1016 coefficient
21640908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com    "vmov.u8    d5, #65                        \n"  // G * 0.5078 coefficient
21650908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com    "vmov.u8    d6, #33                        \n"  // R * 0.2578 coefficient
21660908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com    "vmov.u8    d7, #16                        \n"  // Add 16 constant
2167c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
21680908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com  "1:                                          \n"
2169bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 pixels of RGBA.
2170bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
2171bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmull.u8   q8, d1, d4                     \n"  // B
2172bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmlal.u8   q8, d2, d5                     \n"  // G
2173bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmlal.u8   q8, d3, d6                     \n"  // R
2174bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vqrshrun.s16 d0, q8, #7                   \n"  // 16 bit to 8 bit Y
2175bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vqadd.u8   d0, d7                         \n"
2176bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
2177bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "bgt        1b                             \n"
2178bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  : "+r"(src_rgba),  // %0
2179bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "+r"(dst_y),  // %1
2180bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "+r"(pix)        // %2
2181bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  :
21828f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
2183bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  );
2184bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com}
2185bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com
2186bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.comvoid RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int pix) {
2187bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  asm volatile (
2188bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d4, #13                        \n"  // B * 0.1016 coefficient
2189bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d5, #65                        \n"  // G * 0.5078 coefficient
2190bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d6, #33                        \n"  // R * 0.2578 coefficient
2191bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d7, #16                        \n"  // Add 16 constant
2192c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
2193bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  "1:                                          \n"
2194bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vld3.8     {d0, d1, d2}, [%0]!            \n"  // load 8 pixels of RGB24.
21950908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
21960908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com    "vmull.u8   q8, d0, d4                     \n"  // B
21970908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com    "vmlal.u8   q8, d1, d5                     \n"  // G
21980908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com    "vmlal.u8   q8, d2, d6                     \n"  // R
21990908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com    "vqrshrun.s16 d0, q8, #7                   \n"  // 16 bit to 8 bit Y
22000908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com    "vqadd.u8   d0, d7                         \n"
22010908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
22020908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com    "bgt        1b                             \n"
2203bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  : "+r"(src_rgb24),  // %0
22040908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com    "+r"(dst_y),  // %1
22050908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com    "+r"(pix)        // %2
22060908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com  :
22078f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
22080908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com  );
22090908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com}
2210bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com
2211bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.comvoid RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int pix) {
2212bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  asm volatile (
2213bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d4, #33                        \n"  // R * 0.2578 coefficient
2214bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d5, #65                        \n"  // G * 0.5078 coefficient
2215bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d6, #13                        \n"  // B * 0.1016 coefficient
2216bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmov.u8    d7, #16                        \n"  // Add 16 constant
2217c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
2218bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  "1:                                          \n"
2219bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vld3.8     {d0, d1, d2}, [%0]!            \n"  // load 8 pixels of RAW.
2220bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
2221bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmull.u8   q8, d0, d4                     \n"  // B
2222bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmlal.u8   q8, d1, d5                     \n"  // G
2223bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vmlal.u8   q8, d2, d6                     \n"  // R
2224bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vqrshrun.s16 d0, q8, #7                   \n"  // 16 bit to 8 bit Y
2225bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vqadd.u8   d0, d7                         \n"
2226bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
2227bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "bgt        1b                             \n"
2228bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  : "+r"(src_raw),  // %0
2229bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "+r"(dst_y),  // %1
2230bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com    "+r"(pix)        // %2
2231bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  :
22328f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
2233bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com  );
2234bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com}
22350908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com
2236b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com// Bilinear filter 16x2 -> 16x1
2237b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.comvoid InterpolateRow_NEON(uint8* dst_ptr,
2238b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com                         const uint8* src_ptr, ptrdiff_t src_stride,
2239b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com                         int dst_width, int source_y_fraction) {
2240b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com  asm volatile (
22418f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "cmp        %4, #0                         \n"
22428f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "beq        100f                           \n"
22438f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "add        %2, %1                         \n"
22448f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "cmp        %4, #64                        \n"
22458f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "beq        75f                            \n"
22468f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "cmp        %4, #128                       \n"
22478f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "beq        50f                            \n"
22488f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "cmp        %4, #192                       \n"
22498f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "beq        25f                            \n"
22508f506332af217882648eed166a257557855b9fdbfbarchard@google.com
22518f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "vdup.8     d5, %4                         \n"
22528f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "rsb        %4, #256                       \n"
22538f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "vdup.8     d4, %4                         \n"
2254b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com    // General purpose row blend.
2255b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com  "1:                                          \n"
22562c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {q0}, [%1]!                    \n"
22572c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {q1}, [%2]!                    \n"
2258b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com    "subs       %3, %3, #16                    \n"
22598f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "vmull.u8   q13, d0, d4                    \n"
22608f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "vmull.u8   q14, d1, d4                    \n"
22618f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "vmlal.u8   q13, d2, d5                    \n"
22628f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "vmlal.u8   q14, d3, d5                    \n"
22638f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "vrshrn.u16 d0, q13, #8                    \n"
22648f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "vrshrn.u16 d1, q14, #8                    \n"
22652c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {q0}, [%0]!                    \n"
22668f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "bgt        1b                             \n"
22678f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "b          99f                            \n"
2268b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com
2269b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com    // Blend 25 / 75.
2270b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com  "25:                                         \n"
22712c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {q0}, [%1]!                    \n"
22722c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {q1}, [%2]!                    \n"
2273b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com    "subs       %3, %3, #16                    \n"
22748f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "vrhadd.u8  q0, q1                         \n"
22758f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "vrhadd.u8  q0, q1                         \n"
22762c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {q0}, [%0]!                    \n"
22778f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "bgt        25b                            \n"
22788f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "b          99f                            \n"
2279b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com
2280b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com    // Blend 50 / 50.
2281b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com  "50:                                         \n"
22822c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {q0}, [%1]!                    \n"
22832c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {q1}, [%2]!                    \n"
2284b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com    "subs       %3, %3, #16                    \n"
22858f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "vrhadd.u8  q0, q1                         \n"
22862c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {q0}, [%0]!                    \n"
22878f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "bgt        50b                            \n"
22888f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "b          99f                            \n"
2289b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com
2290b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com    // Blend 75 / 25.
2291b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com  "75:                                         \n"
22922c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {q1}, [%1]!                    \n"
22932c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {q0}, [%2]!                    \n"
2294b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com    "subs       %3, %3, #16                    \n"
22958f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "vrhadd.u8  q0, q1                         \n"
22968f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "vrhadd.u8  q0, q1                         \n"
22972c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {q0}, [%0]!                    \n"
22988f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "bgt        75b                            \n"
22998f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "b          99f                            \n"
2300b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com
2301b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com    // Blend 100 / 0 - Copy row unchanged.
2302b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com  "100:                                        \n"
23032c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {q0}, [%1]!                    \n"
2304b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com    "subs       %3, %3, #16                    \n"
23052c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {q0}, [%0]!                    \n"
23068f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "bgt        100b                           \n"
2307b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com
2308b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com  "99:                                         \n"
2309b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com  : "+r"(dst_ptr),          // %0
2310b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com    "+r"(src_ptr),          // %1
2311b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com    "+r"(src_stride),       // %2
2312b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com    "+r"(dst_width),        // %3
2313b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com    "+r"(source_y_fraction) // %4
2314b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com  :
23158f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "cc", "memory", "q0", "q1", "d4", "d5", "q13", "q14"
2316b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com  );
2317b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com}
23188f506332af217882648eed166a257557855b9fdbfbarchard@google.com
23198f506332af217882648eed166a257557855b9fdbfbarchard@google.com// dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr
23208f506332af217882648eed166a257557855b9fdbfbarchard@google.comvoid ARGBBlendRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
23218f506332af217882648eed166a257557855b9fdbfbarchard@google.com                       uint8* dst_argb, int width) {
23228f506332af217882648eed166a257557855b9fdbfbarchard@google.com  asm volatile (
23238f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "subs       %3, #8                         \n"
23248f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "blt        89f                            \n"
23258f506332af217882648eed166a257557855b9fdbfbarchard@google.com    // Blend 8 pixels.
23268f506332af217882648eed166a257557855b9fdbfbarchard@google.com  "8:                                          \n"
23278f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 pixels of ARGB0.
23288f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "vld4.8     {d4, d5, d6, d7}, [%1]!        \n"  // load 8 pixels of ARGB1.
23298f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
2330d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com    "vmull.u8   q10, d4, d3                    \n"  // db * a
2331d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com    "vmull.u8   q11, d5, d3                    \n"  // dg * a
2332d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com    "vmull.u8   q12, d6, d3                    \n"  // dr * a
2333d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com    "vqrshrn.u16 d20, q10, #8                  \n"  // db >>= 8
2334d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com    "vqrshrn.u16 d21, q11, #8                  \n"  // dg >>= 8
2335d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com    "vqrshrn.u16 d22, q12, #8                  \n"  // dr >>= 8
2336d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com    "vqsub.u8   q2, q2, q10                    \n"  // dbg - dbg * a / 256
2337d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com    "vqsub.u8   d6, d6, d22                    \n"  // dr - dr * a / 256
2338d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com    "vqadd.u8   q0, q0, q2                     \n"  // + sbg
2339d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com    "vqadd.u8   d2, d2, d6                     \n"  // + sr
23408f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "vmov.u8    d3, #255                       \n"  // a = 255
23418f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "vst4.8     {d0, d1, d2, d3}, [%2]!        \n"  // store 8 pixels of ARGB.
23428f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "bge        8b                             \n"
23438f506332af217882648eed166a257557855b9fdbfbarchard@google.com
23448f506332af217882648eed166a257557855b9fdbfbarchard@google.com  "89:                                         \n"
23458f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "adds       %3, #8-1                       \n"
23468f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "blt        99f                            \n"
23478f506332af217882648eed166a257557855b9fdbfbarchard@google.com
23488f506332af217882648eed166a257557855b9fdbfbarchard@google.com    // Blend 1 pixels.
23498f506332af217882648eed166a257557855b9fdbfbarchard@google.com  "1:                                          \n"
23508f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "vld4.8     {d0[0],d1[0],d2[0],d3[0]}, [%0]! \n"  // load 1 pixel ARGB0.
23518f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "vld4.8     {d4[0],d5[0],d6[0],d7[0]}, [%1]! \n"  // load 1 pixel ARGB1.
23528f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "subs       %3, %3, #1                     \n"  // 1 processed per loop.
2353d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com    "vmull.u8   q10, d4, d3                    \n"  // db * a
2354d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com    "vmull.u8   q11, d5, d3                    \n"  // dg * a
2355d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com    "vmull.u8   q12, d6, d3                    \n"  // dr * a
2356d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com    "vqrshrn.u16 d20, q10, #8                  \n"  // db >>= 8
2357d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com    "vqrshrn.u16 d21, q11, #8                  \n"  // dg >>= 8
2358d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com    "vqrshrn.u16 d22, q12, #8                  \n"  // dr >>= 8
2359d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com    "vqsub.u8   q2, q2, q10                    \n"  // dbg - dbg * a / 256
2360d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com    "vqsub.u8   d6, d6, d22                    \n"  // dr - dr * a / 256
2361d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com    "vqadd.u8   q0, q0, q2                     \n"  // + sbg
2362d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com    "vqadd.u8   d2, d2, d6                     \n"  // + sr
23638f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "vmov.u8    d3, #255                       \n"  // a = 255
23648f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "vst4.8     {d0[0],d1[0],d2[0],d3[0]}, [%2]! \n"  // store 1 pixel.
23658f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "bge        1b                             \n"
23668f506332af217882648eed166a257557855b9fdbfbarchard@google.com
23678f506332af217882648eed166a257557855b9fdbfbarchard@google.com  "99:                                         \n"
23688f506332af217882648eed166a257557855b9fdbfbarchard@google.com
23698f506332af217882648eed166a257557855b9fdbfbarchard@google.com  : "+r"(src_argb0),    // %0
23708f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "+r"(src_argb1),    // %1
23718f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "+r"(dst_argb),     // %2
23728f506332af217882648eed166a257557855b9fdbfbarchard@google.com    "+r"(width)         // %3
23738f506332af217882648eed166a257557855b9fdbfbarchard@google.com  :
2374d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3", "q10", "q11", "q12"
23758f506332af217882648eed166a257557855b9fdbfbarchard@google.com  );
23768f506332af217882648eed166a257557855b9fdbfbarchard@google.com}
23778f506332af217882648eed166a257557855b9fdbfbarchard@google.com
23781d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com// Attenuate 8 pixels at a time.
23791d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.comvoid ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) {
23801d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com  asm volatile (
23811d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com    // Attenuate 8 pixels.
23821d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com  "1:                                          \n"
23831d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 pixels of ARGB.
23841d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
23851d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com    "vmull.u8   q10, d0, d3                    \n"  // b * a
23861d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com    "vmull.u8   q11, d1, d3                    \n"  // g * a
23871d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com    "vmull.u8   q12, d2, d3                    \n"  // r * a
23881d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com    "vqrshrn.u16 d0, q10, #8                   \n"  // b >>= 8
23891d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com    "vqrshrn.u16 d1, q11, #8                   \n"  // g >>= 8
23901d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com    "vqrshrn.u16 d2, q12, #8                   \n"  // r >>= 8
23911d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com    "vst4.8     {d0, d1, d2, d3}, [%1]!        \n"  // store 8 pixels of ARGB.
23921d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com    "bgt        1b                             \n"
23931d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com  : "+r"(src_argb),   // %0
23941d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com    "+r"(dst_argb),   // %1
23951d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com    "+r"(width)       // %2
23961d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com  :
23971d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com  : "cc", "memory", "q0", "q1", "q10", "q11", "q12"
23981d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com  );
23991d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com}
24001d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com
2401ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com// Quantize 8 ARGB pixels (32 bytes).
2402ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com// dst = (dst * scale >> 16) * interval_size + interval_offset;
2403ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.comvoid ARGBQuantizeRow_NEON(uint8* dst_argb, int scale, int interval_size,
2404ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com                          int interval_offset, int width) {
2405ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com  asm volatile (
2406ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "vdup.u16   q8, %2                         \n"
2407ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "vshr.u16   q8, q8, #1                     \n"  // scale >>= 1
2408ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "vdup.u16   q9, %3                         \n"  // interval multiply.
2409ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "vdup.u16   q10, %4                        \n"  // interval add
2410ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com
2411ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    // 8 pixel loop.
2412ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    ".p2align   2                              \n"
2413ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com  "1:                                          \n"
2414ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "vld4.8     {d0, d2, d4, d6}, [%0]         \n"  // load 8 pixels of ARGB.
2415ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "subs       %1, %1, #8                     \n"  // 8 processed per loop.
2416ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "vmovl.u8   q0, d0                         \n"  // b (0 .. 255)
2417ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "vmovl.u8   q1, d2                         \n"
2418ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "vmovl.u8   q2, d4                         \n"
2419ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "vqdmulh.s16 q0, q0, q8                    \n"  // b * scale
2420ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "vqdmulh.s16 q1, q1, q8                    \n"  // g
2421ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "vqdmulh.s16 q2, q2, q8                    \n"  // r
2422ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "vmul.u16   q0, q0, q9                     \n"  // b * interval_size
2423ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "vmul.u16   q1, q1, q9                     \n"  // g
2424ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "vmul.u16   q2, q2, q9                     \n"  // r
2425ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "vadd.u16   q0, q0, q10                    \n"  // b + interval_offset
2426ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "vadd.u16   q1, q1, q10                    \n"  // g
2427ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "vadd.u16   q2, q2, q10                    \n"  // r
2428ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "vqmovn.u16 d0, q0                         \n"
2429ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "vqmovn.u16 d2, q1                         \n"
2430ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "vqmovn.u16 d4, q2                         \n"
2431ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "vst4.8     {d0, d2, d4, d6}, [%0]!        \n"  // store 8 pixels of ARGB.
2432ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "bgt        1b                             \n"
2433ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com  : "+r"(dst_argb),       // %0
2434ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "+r"(width)           // %1
2435ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com  : "r"(scale),           // %2
2436ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "r"(interval_size),   // %3
2437ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com    "r"(interval_offset)  // %4
2438b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9", "q10"
2439b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com  );
2440b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com}
2441b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com
2442b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com// Shade 8 pixels at a time by specified value.
2443b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com// NOTE vqrdmulh.s16 q10, q10, d0[0] must use a scaler register from 0 to 8.
2444fa5d5fb491b7b0a654871295bddbbe4e0e69542cfbarchard@google.com// Rounding in vqrdmulh does +1 to high if high bit of low s16 is set.
2445b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.comvoid ARGBShadeRow_NEON(const uint8* src_argb, uint8* dst_argb, int width,
2446b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com                       uint32 value) {
2447b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com  asm volatile (
2448b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com    "vdup.u32   q0, %3                         \n"  // duplicate scale value.
2449fa5d5fb491b7b0a654871295bddbbe4e0e69542cfbarchard@google.com    "vzip.u8    d0, d1                         \n"  // d0 aarrggbb.
2450fa5d5fb491b7b0a654871295bddbbe4e0e69542cfbarchard@google.com    "vshr.u16   q0, q0, #1                     \n"  // scale / 2.
2451b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com
2452b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com    // 8 pixel loop.
2453b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com    ".p2align   2                              \n"
2454b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com  "1:                                          \n"
2455b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com    "vld4.8     {d20, d22, d24, d26}, [%0]!    \n"  // load 8 pixels of ARGB.
2456b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
2457b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com    "vmovl.u8   q10, d20                       \n"  // b (0 .. 255)
2458b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com    "vmovl.u8   q11, d22                       \n"
2459b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com    "vmovl.u8   q12, d24                       \n"
2460b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com    "vmovl.u8   q13, d26                       \n"
2461578c88a9f7114b3ede887b3c6d9a11d8d06b043bfbarchard@google.com    "vqrdmulh.s16 q10, q10, d0[0]              \n"  // b * scale * 2
2462fa5d5fb491b7b0a654871295bddbbe4e0e69542cfbarchard@google.com    "vqrdmulh.s16 q11, q11, d0[1]              \n"  // g
2463fa5d5fb491b7b0a654871295bddbbe4e0e69542cfbarchard@google.com    "vqrdmulh.s16 q12, q12, d0[2]              \n"  // r
2464fa5d5fb491b7b0a654871295bddbbe4e0e69542cfbarchard@google.com    "vqrdmulh.s16 q13, q13, d0[3]              \n"  // a
2465b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com    "vqmovn.u16 d20, q10                       \n"
2466b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com    "vqmovn.u16 d22, q11                       \n"
2467b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com    "vqmovn.u16 d24, q12                       \n"
2468b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com    "vqmovn.u16 d26, q13                       \n"
2469b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com    "vst4.8     {d20, d22, d24, d26}, [%1]!    \n"  // store 8 pixels of ARGB.
2470b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com    "bgt        1b                             \n"
2471b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com  : "+r"(src_argb),       // %0
2472b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com    "+r"(dst_argb),       // %1
2473b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com    "+r"(width)           // %2
2474b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com  : "r"(value)            // %3
2475b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com  : "cc", "memory", "q0", "q10", "q11", "q12", "q13"
2476ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com  );
2477ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com}
2478ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com
247982375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels
2480050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com// Similar to ARGBToYJ but stores ARGB.
2481050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com// C code is (15 * b + 75 * g + 38 * r + 64) >> 7;
248282375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.comvoid ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) {
248382375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com  asm volatile (
2484050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "vmov.u8    d24, #15                       \n"  // B * 0.11400 coefficient
2485050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "vmov.u8    d25, #75                       \n"  // G * 0.58700 coefficient
2486050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "vmov.u8    d26, #38                       \n"  // R * 0.29900 coefficient
2487c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
248882375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com  "1:                                          \n"
248982375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 ARGB pixels.
249082375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
249182375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com    "vmull.u8   q2, d0, d24                    \n"  // B
249282375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com    "vmlal.u8   q2, d1, d25                    \n"  // G
249382375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com    "vmlal.u8   q2, d2, d26                    \n"  // R
2494050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com    "vqrshrun.s16 d0, q2, #7                   \n"  // 15 bit to 8 bit B
249582375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com    "vmov       d1, d0                         \n"  // G
249682375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com    "vmov       d2, d0                         \n"  // R
249782375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com    "vst4.8     {d0, d1, d2, d3}, [%1]!        \n"  // store 8 ARGB pixels.
249882375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com    "bgt        1b                             \n"
249982375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com  : "+r"(src_argb),  // %0
250082375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com    "+r"(dst_argb),  // %1
250182375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com    "+r"(width)      // %2
250282375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com  :
250382375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q12", "q13"
250482375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com  );
250582375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com}
250682375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com
2507c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com// Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels.
2508c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com//    b = (r * 35 + g * 68 + b * 17) >> 7
2509c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com//    g = (r * 45 + g * 88 + b * 22) >> 7
2510c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com//    r = (r * 50 + g * 98 + b * 24) >> 7
2511c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.comvoid ARGBSepiaRow_NEON(uint8* dst_argb, int width) {
2512c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com  asm volatile (
2513c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "vmov.u8    d20, #17                       \n"  // BB coefficient
2514c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "vmov.u8    d21, #68                       \n"  // BG coefficient
2515c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "vmov.u8    d22, #35                       \n"  // BR coefficient
2516c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "vmov.u8    d24, #22                       \n"  // GB coefficient
2517c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "vmov.u8    d25, #88                       \n"  // GG coefficient
2518c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "vmov.u8    d26, #45                       \n"  // GR coefficient
2519c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "vmov.u8    d28, #24                       \n"  // BB coefficient
2520c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "vmov.u8    d29, #98                       \n"  // BG coefficient
2521c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "vmov.u8    d30, #50                       \n"  // BR coefficient
2522c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
2523c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com  "1:                                          \n"
2524c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "vld4.8     {d0, d1, d2, d3}, [%0]         \n"  // load 8 ARGB pixels.
2525c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "subs       %1, %1, #8                     \n"  // 8 processed per loop.
2526c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "vmull.u8   q2, d0, d20                    \n"  // B to Sepia B
2527c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "vmlal.u8   q2, d1, d21                    \n"  // G
2528c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "vmlal.u8   q2, d2, d22                    \n"  // R
2529c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "vmull.u8   q3, d0, d24                    \n"  // B to Sepia G
2530c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "vmlal.u8   q3, d1, d25                    \n"  // G
2531c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "vmlal.u8   q3, d2, d26                    \n"  // R
2532c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "vmull.u8   q8, d0, d28                    \n"  // B to Sepia R
2533c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "vmlal.u8   q8, d1, d29                    \n"  // G
2534c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "vmlal.u8   q8, d2, d30                    \n"  // R
253587adfaa61ea7704874ad3494a3bce3e7364b146afbarchard@google.com    "vqshrn.u16 d0, q2, #7                     \n"  // 16 bit to 8 bit B
253687adfaa61ea7704874ad3494a3bce3e7364b146afbarchard@google.com    "vqshrn.u16 d1, q3, #7                     \n"  // 16 bit to 8 bit G
253787adfaa61ea7704874ad3494a3bce3e7364b146afbarchard@google.com    "vqshrn.u16 d2, q8, #7                     \n"  // 16 bit to 8 bit R
2538c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "vst4.8     {d0, d1, d2, d3}, [%0]!        \n"  // store 8 ARGB pixels.
2539c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "bgt        1b                             \n"
2540c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com  : "+r"(dst_argb),  // %0
2541c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "+r"(width)      // %1
2542c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com  :
2543c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3",
2544c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com    "q10", "q11", "q12", "q13", "q14", "q15"
2545c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com  );
2546c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com}
2547c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com
254862154e53a2820136f6a8cab8cc029622e3b100c1fbarchard@google.com// Tranform 8 ARGB pixels (32 bytes) with color matrix.
2549c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com// TODO(fbarchard): Was same as Sepia except matrix is provided.  This function
2550c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com// needs to saturate.  Consider doing a non-saturating version.
2551c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.comvoid ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb,
2552c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com                             const int8* matrix_argb, int width) {
255362154e53a2820136f6a8cab8cc029622e3b100c1fbarchard@google.com  asm volatile (
2554c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com    "vld1.8     {q2}, [%3]                     \n"  // load 3 ARGB vectors.
255562154e53a2820136f6a8cab8cc029622e3b100c1fbarchard@google.com    "vmovl.s8   q0, d4                         \n"  // B,G coefficients s16.
2556c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com    "vmovl.s8   q1, d5                         \n"  // R,A coefficients s16.
255762154e53a2820136f6a8cab8cc029622e3b100c1fbarchard@google.com
2558c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
255962154e53a2820136f6a8cab8cc029622e3b100c1fbarchard@google.com  "1:                                          \n"
2560c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com    "vld4.8     {d16, d18, d20, d22}, [%0]!    \n"  // load 8 ARGB pixels.
2561c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
25620cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vmovl.u8   q8, d16                        \n"  // b (0 .. 255) 16 bit
25630cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vmovl.u8   q9, d18                        \n"  // g
25640cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vmovl.u8   q10, d20                       \n"  // r
25650cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vmovl.u8   q15, d22                       \n"  // a
25660cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vmul.s16   q12, q8, d0[0]                 \n"  // B = B * Matrix B
25670cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vmul.s16   q13, q8, d1[0]                 \n"  // G = B * Matrix G
25680cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vmul.s16   q14, q8, d2[0]                 \n"  // R = B * Matrix R
2569c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com    "vmul.s16   q15, q8, d3[0]                 \n"  // A = B * Matrix A
25700cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vmul.s16   q4, q9, d0[1]                  \n"  // B += G * Matrix B
25710cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vmul.s16   q5, q9, d1[1]                  \n"  // G += G * Matrix G
25720cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vmul.s16   q6, q9, d2[1]                  \n"  // R += G * Matrix R
2573c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com    "vmul.s16   q7, q9, d3[1]                  \n"  // A += G * Matrix A
25740cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vqadd.s16  q12, q12, q4                   \n"  // Accumulate B
25750cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vqadd.s16  q13, q13, q5                   \n"  // Accumulate G
25760cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vqadd.s16  q14, q14, q6                   \n"  // Accumulate R
2577c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com    "vqadd.s16  q15, q15, q7                   \n"  // Accumulate A
25780cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vmul.s16   q4, q10, d0[2]                 \n"  // B += R * Matrix B
25790cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vmul.s16   q5, q10, d1[2]                 \n"  // G += R * Matrix G
25800cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vmul.s16   q6, q10, d2[2]                 \n"  // R += R * Matrix R
2581c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com    "vmul.s16   q7, q10, d3[2]                 \n"  // A += R * Matrix A
25820cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vqadd.s16  q12, q12, q4                   \n"  // Accumulate B
25830cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vqadd.s16  q13, q13, q5                   \n"  // Accumulate G
25840cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vqadd.s16  q14, q14, q6                   \n"  // Accumulate R
2585c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com    "vqadd.s16  q15, q15, q7                   \n"  // Accumulate A
25860cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vmul.s16   q4, q15, d0[3]                 \n"  // B += A * Matrix B
25870cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vmul.s16   q5, q15, d1[3]                 \n"  // G += A * Matrix G
25880cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vmul.s16   q6, q15, d2[3]                 \n"  // R += A * Matrix R
2589c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com    "vmul.s16   q7, q15, d3[3]                 \n"  // A += A * Matrix A
25900cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vqadd.s16  q12, q12, q4                   \n"  // Accumulate B
25910cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vqadd.s16  q13, q13, q5                   \n"  // Accumulate G
25920cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "vqadd.s16  q14, q14, q6                   \n"  // Accumulate R
2593c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com    "vqadd.s16  q15, q15, q7                   \n"  // Accumulate A
2594c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com    "vqshrun.s16 d16, q12, #6                  \n"  // 16 bit to 8 bit B
2595c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com    "vqshrun.s16 d18, q13, #6                  \n"  // 16 bit to 8 bit G
2596c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com    "vqshrun.s16 d20, q14, #6                  \n"  // 16 bit to 8 bit R
2597c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com    "vqshrun.s16 d22, q15, #6                  \n"  // 16 bit to 8 bit A
2598c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com    "vst4.8     {d16, d18, d20, d22}, [%1]!    \n"  // store 8 ARGB pixels.
2599c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com    "bgt        1b                             \n"
2600c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com  : "+r"(src_argb),   // %0
2601c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com    "+r"(dst_argb),   // %1
2602c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com    "+r"(width)       // %2
2603c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com  : "r"(matrix_argb)  // %3
2604c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9",
26050cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com    "q10", "q11", "q12", "q13", "q14", "q15"
260662154e53a2820136f6a8cab8cc029622e3b100c1fbarchard@google.com  );
260762154e53a2820136f6a8cab8cc029622e3b100c1fbarchard@google.com}
260862154e53a2820136f6a8cab8cc029622e3b100c1fbarchard@google.com
2609512bec91edaea60129d08c2d8053653b9fe51db4fbarchard@google.com// TODO(fbarchard): fix vqshrun in ARGBMultiplyRow_NEON and reenable.
2610512bec91edaea60129d08c2d8053653b9fe51db4fbarchard@google.com#ifdef HAS_ARGBMULTIPLYROW_NEON
26115b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com// Multiply 2 rows of ARGB pixels together, 8 pixels at a time.
26125b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.comvoid ARGBMultiplyRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
26135b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com                          uint8* dst_argb, int width) {
26145b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com  asm volatile (
26155b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    // 8 pixel loop.
2616c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
26175b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com  "1:                                          \n"
26185b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 ARGB pixels.
26195b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    "vld4.8     {d1, d3, d5, d7}, [%1]!        \n"  // load 8 more ARGB pixels.
26205b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
26215b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    "vmull.u8   q0, d0, d1                     \n"  // multiply B
26225b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    "vmull.u8   q1, d2, d3                     \n"  // multiply G
26235b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    "vmull.u8   q2, d4, d5                     \n"  // multiply R
26245b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    "vmull.u8   q3, d6, d7                     \n"  // multiply A
26256a352141ef2167c204a450cea179c65e4e34cb65fbarchard@google.com    "vrshrn.u16 d0, q0, #8                     \n"  // 16 bit to 8 bit B
26266a352141ef2167c204a450cea179c65e4e34cb65fbarchard@google.com    "vrshrn.u16 d1, q1, #8                     \n"  // 16 bit to 8 bit G
26276a352141ef2167c204a450cea179c65e4e34cb65fbarchard@google.com    "vrshrn.u16 d2, q2, #8                     \n"  // 16 bit to 8 bit R
26286a352141ef2167c204a450cea179c65e4e34cb65fbarchard@google.com    "vrshrn.u16 d3, q3, #8                     \n"  // 16 bit to 8 bit A
26295b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    "vst4.8     {d0, d1, d2, d3}, [%2]!        \n"  // store 8 ARGB pixels.
26305b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    "bgt        1b                             \n"
26315b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com
26325b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com  : "+r"(src_argb0),  // %0
26335b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    "+r"(src_argb1),  // %1
26345b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    "+r"(dst_argb),   // %2
26355b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    "+r"(width)       // %3
26365b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com  :
26375b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3"
26385b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com  );
26395b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com}
2640512bec91edaea60129d08c2d8053653b9fe51db4fbarchard@google.com#endif  // HAS_ARGBMULTIPLYROW_NEON
26415b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com
26425b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com// Add 2 rows of ARGB pixels together, 8 pixels at a time.
26435b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.comvoid ARGBAddRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
26445b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com                     uint8* dst_argb, int width) {
26455b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com  asm volatile (
26465b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    // 8 pixel loop.
2647c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
26485b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com  "1:                                          \n"
26495b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 ARGB pixels.
26505b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    "vld4.8     {d4, d5, d6, d7}, [%1]!        \n"  // load 8 more ARGB pixels.
26515b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
26525b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    "vqadd.u8   q0, q0, q2                     \n"  // add B, G
26535b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    "vqadd.u8   q1, q1, q3                     \n"  // add R, A
26545b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    "vst4.8     {d0, d1, d2, d3}, [%2]!        \n"  // store 8 ARGB pixels.
26555b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    "bgt        1b                             \n"
26565b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com
26575b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com  : "+r"(src_argb0),  // %0
26585b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    "+r"(src_argb1),  // %1
26595b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    "+r"(dst_argb),   // %2
26605b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com    "+r"(width)       // %3
26615b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com  :
2662573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3"
2663573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com  );
2664573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com}
2665573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com
2666573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com// Subtract 2 rows of ARGB pixels, 8 pixels at a time.
2667573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.comvoid ARGBSubtractRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
2668573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com                          uint8* dst_argb, int width) {
2669573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com  asm volatile (
2670573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com    // 8 pixel loop.
2671c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
2672573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com  "1:                                          \n"
2673573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 ARGB pixels.
2674573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com    "vld4.8     {d4, d5, d6, d7}, [%1]!        \n"  // load 8 more ARGB pixels.
2675573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
2676573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com    "vqsub.u8   q0, q0, q2                     \n"  // subtract B, G
2677573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com    "vqsub.u8   q1, q1, q3                     \n"  // subtract R, A
2678573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com    "vst4.8     {d0, d1, d2, d3}, [%2]!        \n"  // store 8 ARGB pixels.
2679573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com    "bgt        1b                             \n"
2680573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com
2681573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com  : "+r"(src_argb0),  // %0
2682573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com    "+r"(src_argb1),  // %1
2683573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com    "+r"(dst_argb),   // %2
2684573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com    "+r"(width)       // %3
2685573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com  :
2686573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com  : "cc", "memory", "q0", "q1", "q2", "q3"
26875b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com  );
26885b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com}
26895b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com
2690c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// Adds Sobel X and Sobel Y and stores Sobel into ARGB.
2691c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// A = 255
2692c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// R = Sobel
2693c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// G = Sobel
2694c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// B = Sobel
2695c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.comvoid SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
2696c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com                     uint8* dst_argb, int width) {
2697c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  asm volatile (
2698c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vmov.u8    d3, #255                       \n"  // alpha
2699c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    // 8 pixel loop.
2700c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
2701c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  "1:                                          \n"
2702c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vld1.8     {d0}, [%0]!                    \n"  // load 8 sobelx.
2703c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vld1.8     {d1}, [%1]!                    \n"  // load 8 sobely.
2704c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
2705c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vqadd.u8   d0, d0, d1                     \n"  // add
2706c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vmov.u8    d1, d0                         \n"
2707c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vmov.u8    d2, d0                         \n"
2708c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vst4.8     {d0, d1, d2, d3}, [%2]!        \n"  // store 8 ARGB pixels.
2709c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "bgt        1b                             \n"
2710c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  : "+r"(src_sobelx),  // %0
2711c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "+r"(src_sobely),  // %1
27128be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com    "+r"(dst_argb),    // %2
27138be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com    "+r"(width)        // %3
27148be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com  :
27158be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com  : "cc", "memory", "q0", "q1"
27168be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com  );
27178be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com}
27188be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com
27198be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com// Adds Sobel X and Sobel Y and stores Sobel into plane.
27208be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.comvoid SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
27218be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com                          uint8* dst_y, int width) {
27228be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com  asm volatile (
27238be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com    // 16 pixel loop.
2724c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
27258be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com  "1:                                          \n"
27268be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com    "vld1.8     {q0}, [%0]!                    \n"  // load 16 sobelx.
27278be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com    "vld1.8     {q1}, [%1]!                    \n"  // load 16 sobely.
27288be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com    "subs       %3, %3, #16                    \n"  // 16 processed per loop.
27298be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com    "vqadd.u8   q0, q0, q1                     \n"  // add
27308be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com    "vst1.8     {q0}, [%2]!                    \n"  // store 16 pixels.
27318be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com    "bgt        1b                             \n"
27328be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com  : "+r"(src_sobelx),  // %0
27338be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com    "+r"(src_sobely),  // %1
27348be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com    "+r"(dst_y),       // %2
27358be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com    "+r"(width)        // %3
2736c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  :
2737c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  : "cc", "memory", "q0", "q1"
2738c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  );
2739c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com}
2740c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com
2741c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// Mixes Sobel X, Sobel Y and Sobel into ARGB.
2742c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// A = 255
2743c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// R = Sobel X
2744c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// G = Sobel
2745c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// B = Sobel Y
2746c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.comvoid SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
2747c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com                     uint8* dst_argb, int width) {
2748c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  asm volatile (
2749c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vmov.u8    d3, #255                       \n"  // alpha
2750c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    // 8 pixel loop.
2751c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
2752c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  "1:                                          \n"
2753c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vld1.8     {d2}, [%0]!                    \n"  // load 8 sobelx.
2754c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vld1.8     {d0}, [%1]!                    \n"  // load 8 sobely.
2755c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
2756c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vqadd.u8   d1, d0, d2                     \n"  // add
2757c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vst4.8     {d0, d1, d2, d3}, [%2]!        \n"  // store 8 ARGB pixels.
2758c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "bgt        1b                             \n"
2759c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  : "+r"(src_sobelx),  // %0
2760c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "+r"(src_sobely),  // %1
2761ff4a84168d731c33ce7dedcfb497376a8669cecafbarchard@google.com    "+r"(dst_argb),    // %2
2762ff4a84168d731c33ce7dedcfb497376a8669cecafbarchard@google.com    "+r"(width)        // %3
2763c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  :
2764c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  : "cc", "memory", "q0", "q1"
2765c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  );
2766c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com}
2767c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com
2768c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// SobelX as a matrix is
2769c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// -1  0  1
2770c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// -2  0  2
2771c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// -1  0  1
2772c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.comvoid SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1,
2773c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com                    const uint8* src_y2, uint8* dst_sobelx, int width) {
2774c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  asm volatile (
2775c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
2776c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  "1:                                          \n"
27772c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d0}, [%0],%5                  \n"  // top
27782c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d1}, [%0],%6                  \n"
2779c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vsubl.u8   q0, d0, d1                     \n"
27802c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d2}, [%1],%5                  \n"  // center * 2
27812c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d3}, [%1],%6                  \n"
2782c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vsubl.u8   q1, d2, d3                     \n"
2783c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vadd.s16   q0, q0, q1                     \n"
2784c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vadd.s16   q0, q0, q1                     \n"
27852c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d2}, [%2],%5                  \n"  // bottom
27862c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d3}, [%2],%6                  \n"
2787c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "subs       %4, %4, #8                     \n"  // 8 pixels
2788c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vsubl.u8   q1, d2, d3                     \n"
2789c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vadd.s16   q0, q0, q1                     \n"
2790c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vabs.s16   q0, q0                         \n"
2791c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vqmovn.u16 d0, q0                         \n"
27922c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {d0}, [%3]!                    \n"  // store 8 sobelx
2793c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "bgt        1b                             \n"
2794c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  : "+r"(src_y0),      // %0
2795c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "+r"(src_y1),      // %1
2796c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "+r"(src_y2),      // %2
2797c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "+r"(dst_sobelx),  // %3
2798c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "+r"(width)        // %4
2799c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  : "r"(2),            // %5
2800c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "r"(6)             // %6
2801c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  : "cc", "memory", "q0", "q1"  // Clobber List
2802c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  );
2803c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com}
2804c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com
2805c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// SobelY as a matrix is
2806c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// -1 -2 -1
2807c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com//  0  0  0
2808c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com//  1  2  1
2809c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.comvoid SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1,
2810c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com                    uint8* dst_sobely, int width) {
2811c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  asm volatile (
2812c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
2813c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  "1:                                          \n"
28142c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d0}, [%0],%4                  \n"  // left
28152c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d1}, [%1],%4                  \n"
2816c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vsubl.u8   q0, d0, d1                     \n"
28172c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d2}, [%0],%4                  \n"  // center * 2
28182c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d3}, [%1],%4                  \n"
2819c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vsubl.u8   q1, d2, d3                     \n"
2820c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vadd.s16   q0, q0, q1                     \n"
2821c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vadd.s16   q0, q0, q1                     \n"
28222c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d2}, [%0],%5                  \n"  // right
28232c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {d3}, [%1],%5                  \n"
2824c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "subs       %3, %3, #8                     \n"  // 8 pixels
2825c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vsubl.u8   q1, d2, d3                     \n"
2826c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vadd.s16   q0, q0, q1                     \n"
2827c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vabs.s16   q0, q0                         \n"
2828c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "vqmovn.u16 d0, q0                         \n"
28292c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {d0}, [%2]!                    \n"  // store 8 sobely
2830c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "bgt        1b                             \n"
2831c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  : "+r"(src_y0),      // %0
2832c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "+r"(src_y1),      // %1
2833c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "+r"(dst_sobely),  // %2
2834c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "+r"(width)        // %3
2835c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  : "r"(1),            // %4
2836c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com    "r"(6)             // %5
2837c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  : "cc", "memory", "q0", "q1"  // Clobber List
2838c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com  );
2839c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com}
284019932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com#endif  // __ARM_NEON__
28412d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com
2842fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.com#ifdef __cplusplus
2843fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.com}  // extern "C"
2844fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.com}  // namespace libyuv
2845fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.com#endif
2846