141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org/*
241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org *
441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org *  Use of this source code is governed by a BSD-style license
541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org *  that can be found in the LICENSE file in the root of the source
641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org *  tree. An additional intellectual property rights grant can be found
741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org *  in the file PATENTS. All contributing project authors may
841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org *  be found in the AUTHORS file in the root of the source tree.
941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org */
1041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
1141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#include "libyuv/row.h"
1241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
1341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef __cplusplus
1441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgnamespace libyuv {
1541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgextern "C" {
1641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif
1741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
1841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// This module is for GCC Neon
1941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
2041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
2141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Read 8 Y, 4 U and 4 V from 422
2241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#define READYUV422                                                             \
2341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)                                                               \
2441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d0}, [%0]!                    \n"                             \
2541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)                                                               \
2641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.32    {d2[0]}, [%1]!                 \n"                             \
2741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(2)                                                               \
2841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.32    {d2[1]}, [%2]!                 \n"
2941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
3041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Read 8 Y, 2 U and 2 V from 422
3141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#define READYUV411                                                             \
3241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)                                                               \
3341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d0}, [%0]!                    \n"                             \
3441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)                                                               \
3541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.16    {d2[0]}, [%1]!                 \n"                             \
3641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(2)                                                               \
3741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.16    {d2[1]}, [%2]!                 \n"                             \
3841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d3, d2                         \n"                             \
3941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vzip.u8    d2, d3                         \n"
4041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
4141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Read 8 Y, 8 U and 8 V from 444
4241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#define READYUV444                                                             \
4341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)                                                               \
4441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d0}, [%0]!                    \n"                             \
4541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)                                                               \
4641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d2}, [%1]!                    \n"                             \
4741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(2)                                                               \
4841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d3}, [%2]!                    \n"                             \
4941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  q1, q1                         \n"                             \
5041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vrshrn.u16 d2, q1, #1                     \n"
5141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
5241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Read 8 Y, and set 4 U and 4 V to 128
5341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#define READYUV400                                                             \
5441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)                                                               \
5541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d0}, [%0]!                    \n"                             \
5641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d2, #128                       \n"
5741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
5841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Read 8 Y and 4 UV from NV12
5941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#define READNV12                                                               \
6041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)                                                               \
6141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d0}, [%0]!                    \n"                             \
6241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)                                                               \
6341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d2}, [%1]!                    \n"                             \
6441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d3, d2                         \n"/* split odd/even uv apart */\
6541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vuzp.u8    d2, d3                         \n"                             \
6641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vtrn.u32   d2, d3                         \n"
6741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
6841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Read 8 Y and 4 VU from NV21
6941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#define READNV21                                                               \
7041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)                                                               \
7141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d0}, [%0]!                    \n"                             \
7241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)                                                               \
7341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d2}, [%1]!                    \n"                             \
7441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d3, d2                         \n"/* split odd/even uv apart */\
7541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vuzp.u8    d3, d2                         \n"                             \
7641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vtrn.u32   d2, d3                         \n"
7741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
7841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Read 8 YUY2
7941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#define READYUY2                                                               \
8041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)                                                               \
8141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld2.8     {d0, d2}, [%0]!                \n"                             \
8241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d3, d2                         \n"                             \
8341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vuzp.u8    d2, d3                         \n"                             \
8441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vtrn.u32   d2, d3                         \n"
8541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
8641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Read 8 UYVY
8741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#define READUYVY                                                               \
8841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)                                                               \
8941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld2.8     {d2, d3}, [%0]!                \n"                             \
9041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d0, d3                         \n"                             \
9141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d3, d2                         \n"                             \
9241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vuzp.u8    d2, d3                         \n"                             \
9341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vtrn.u32   d2, d3                         \n"
9441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
9541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#define YUV422TORGB                                                            \
9641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "veor.u8    d2, d26                        \n"/*subtract 128 from u and v*/\
9741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmull.s8   q8, d2, d24                    \n"/*  u/v B/R component      */\
9841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmull.s8   q9, d2, d25                    \n"/*  u/v G component        */\
9941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d1, #0                         \n"/*  split odd/even y apart */\
10041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vtrn.u8    d0, d1                         \n"                             \
10141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vsub.s16   q0, q0, q15                    \n"/*  offset y               */\
10241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmul.s16   q0, q0, q14                    \n"                             \
10341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vadd.s16   d18, d19                       \n"                             \
10441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqadd.s16  d20, d0, d16                   \n" /* B */                     \
10541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqadd.s16  d21, d1, d16                   \n"                             \
10641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqadd.s16  d22, d0, d17                   \n" /* R */                     \
10741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqadd.s16  d23, d1, d17                   \n"                             \
10841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqadd.s16  d16, d0, d18                   \n" /* G */                     \
10941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqadd.s16  d17, d1, d18                   \n"                             \
11041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqshrun.s16 d0, q10, #6                   \n" /* B */                     \
11141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqshrun.s16 d1, q11, #6                   \n" /* G */                     \
11241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqshrun.s16 d2, q8, #6                    \n" /* R */                     \
11341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmovl.u8   q10, d0                        \n"/*  set up for reinterleave*/\
11441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmovl.u8   q11, d1                        \n"                             \
11541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmovl.u8   q8, d2                         \n"                             \
11641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vtrn.u8    d20, d21                       \n"                             \
11741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vtrn.u8    d22, d23                       \n"                             \
11841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vtrn.u8    d16, d17                       \n"                             \
11941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d21, d16                       \n"
12041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
12141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgstatic vec8 kUVToRB  = { 127, 127, 127, 127, 102, 102, 102, 102,
12241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                         0, 0, 0, 0, 0, 0, 0, 0 };
12341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgstatic vec8 kUVToG = { -25, -25, -25, -25, -52, -52, -52, -52,
12441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                       0, 0, 0, 0, 0, 0, 0, 0 };
12541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
12641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_I444TOARGBROW_NEON
12741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid I444ToARGBRow_NEON(const uint8* src_y,
12841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                        const uint8* src_u,
12941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                        const uint8* src_v,
13041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                        uint8* dst_argb,
13141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                        int width) {
13241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
13341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(5)
13441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d24}, [%5]                    \n"
13541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(6)
13641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d25}, [%6]                    \n"
13741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d26, #128                      \n"
13841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q14, #74                       \n"
13941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q15, #16                       \n"
14041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
14141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
14241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    READYUV444
14341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    YUV422TORGB
14441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %4, %4, #8                     \n"
14541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d23, #255                      \n"
14641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(3)
14741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst4.8     {d20, d21, d22, d23}, [%3]!    \n"
14841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
14941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "+r"(src_y),     // %0
15041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(src_u),     // %1
15141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(src_v),     // %2
15241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(dst_argb),  // %3
15341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(width)      // %4
15441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "r"(&kUVToRB),   // %5
15541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "r"(&kUVToG)     // %6
15641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "cc", "memory", "q0", "q1", "q2", "q3",
15741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
15841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
15941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
16041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_I444TOARGBROW_NEON
16141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
16241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_I422TOARGBROW_NEON
16341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid I422ToARGBRow_NEON(const uint8* src_y,
16441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                        const uint8* src_u,
16541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                        const uint8* src_v,
16641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                        uint8* dst_argb,
16741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                        int width) {
16841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
16941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(5)
17041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d24}, [%5]                    \n"
17141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(6)
17241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d25}, [%6]                    \n"
17341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d26, #128                      \n"
17441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q14, #74                       \n"
17541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q15, #16                       \n"
17641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
17741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
17841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    READYUV422
17941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    YUV422TORGB
18041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %4, %4, #8                     \n"
18141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d23, #255                      \n"
18241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(3)
18341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst4.8     {d20, d21, d22, d23}, [%3]!    \n"
18441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
18541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "+r"(src_y),     // %0
18641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(src_u),     // %1
18741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(src_v),     // %2
18841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(dst_argb),  // %3
18941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(width)      // %4
19041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "r"(&kUVToRB),   // %5
19141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "r"(&kUVToG)     // %6
19241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "cc", "memory", "q0", "q1", "q2", "q3",
19341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
19441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
19541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
19641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_I422TOARGBROW_NEON
19741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
19841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_I411TOARGBROW_NEON
19941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid I411ToARGBRow_NEON(const uint8* src_y,
20041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                        const uint8* src_u,
20141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                        const uint8* src_v,
20241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                        uint8* dst_argb,
20341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                        int width) {
20441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
20541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(5)
20641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d24}, [%5]                    \n"
20741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(6)
20841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d25}, [%6]                    \n"
20941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d26, #128                      \n"
21041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q14, #74                       \n"
21141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q15, #16                       \n"
21241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
21341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
21441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    READYUV411
21541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    YUV422TORGB
21641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %4, %4, #8                     \n"
21741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d23, #255                      \n"
21841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(3)
21941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst4.8     {d20, d21, d22, d23}, [%3]!    \n"
22041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
22141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "+r"(src_y),     // %0
22241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(src_u),     // %1
22341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(src_v),     // %2
22441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(dst_argb),  // %3
22541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(width)      // %4
22641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "r"(&kUVToRB),   // %5
22741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "r"(&kUVToG)     // %6
22841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "cc", "memory", "q0", "q1", "q2", "q3",
22941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
23041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
23141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
23241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_I411TOARGBROW_NEON
23341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
23441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_I422TOBGRAROW_NEON
23541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid I422ToBGRARow_NEON(const uint8* src_y,
23641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                        const uint8* src_u,
23741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                        const uint8* src_v,
23841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                        uint8* dst_bgra,
23941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                        int width) {
24041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
24141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(5)
24241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d24}, [%5]                    \n"
24341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(6)
24441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d25}, [%6]                    \n"
24541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d26, #128                      \n"
24641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q14, #74                       \n"
24741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q15, #16                       \n"
24841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
24941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
25041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    READYUV422
25141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    YUV422TORGB
25241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %4, %4, #8                     \n"
25341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vswp.u8    d20, d22                       \n"
25441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d19, #255                      \n"
25541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(3)
25641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst4.8     {d19, d20, d21, d22}, [%3]!    \n"
25741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
25841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "+r"(src_y),     // %0
25941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(src_u),     // %1
26041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(src_v),     // %2
26141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(dst_bgra),  // %3
26241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(width)      // %4
26341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "r"(&kUVToRB),   // %5
26441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "r"(&kUVToG)     // %6
26541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "cc", "memory", "q0", "q1", "q2", "q3",
26641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
26741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
26841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
26941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_I422TOBGRAROW_NEON
27041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
27141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_I422TOABGRROW_NEON
27241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid I422ToABGRRow_NEON(const uint8* src_y,
27341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                        const uint8* src_u,
27441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                        const uint8* src_v,
27541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                        uint8* dst_abgr,
27641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                        int width) {
27741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
27841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(5)
27941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d24}, [%5]                    \n"
28041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(6)
28141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d25}, [%6]                    \n"
28241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d26, #128                      \n"
28341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q14, #74                       \n"
28441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q15, #16                       \n"
28541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
28641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
28741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    READYUV422
28841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    YUV422TORGB
28941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %4, %4, #8                     \n"
29041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vswp.u8    d20, d22                       \n"
29141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d23, #255                      \n"
29241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(3)
29341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst4.8     {d20, d21, d22, d23}, [%3]!    \n"
29441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
29541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "+r"(src_y),     // %0
29641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(src_u),     // %1
29741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(src_v),     // %2
29841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(dst_abgr),  // %3
29941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(width)      // %4
30041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "r"(&kUVToRB),   // %5
30141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "r"(&kUVToG)     // %6
30241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "cc", "memory", "q0", "q1", "q2", "q3",
30341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
30441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
30541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
30641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_I422TOABGRROW_NEON
30741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
30841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_I422TORGBAROW_NEON
30941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid I422ToRGBARow_NEON(const uint8* src_y,
31041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                        const uint8* src_u,
31141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                        const uint8* src_v,
31241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                        uint8* dst_rgba,
31341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                        int width) {
31441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
31541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(5)
31641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d24}, [%5]                    \n"
31741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(6)
31841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d25}, [%6]                    \n"
31941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d26, #128                      \n"
32041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q14, #74                       \n"
32141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q15, #16                       \n"
32241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
32341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
32441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    READYUV422
32541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    YUV422TORGB
32641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %4, %4, #8                     \n"
32741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d19, #255                      \n"
32841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(3)
32941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst4.8     {d19, d20, d21, d22}, [%3]!    \n"
33041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
33141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "+r"(src_y),     // %0
33241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(src_u),     // %1
33341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(src_v),     // %2
33441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(dst_rgba),  // %3
33541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(width)      // %4
33641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "r"(&kUVToRB),   // %5
33741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "r"(&kUVToG)     // %6
33841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "cc", "memory", "q0", "q1", "q2", "q3",
33941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
34041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
34141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
34241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_I422TORGBAROW_NEON
34341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
34441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_I422TORGB24ROW_NEON
34541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid I422ToRGB24Row_NEON(const uint8* src_y,
34641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                         const uint8* src_u,
34741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                         const uint8* src_v,
34841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                         uint8* dst_rgb24,
34941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                         int width) {
35041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
35141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(5)
35241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d24}, [%5]                    \n"
35341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(6)
35441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d25}, [%6]                    \n"
35541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d26, #128                      \n"
35641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q14, #74                       \n"
35741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q15, #16                       \n"
35841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
35941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
36041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    READYUV422
36141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    YUV422TORGB
36241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %4, %4, #8                     \n"
36341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(3)
36441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst3.8     {d20, d21, d22}, [%3]!         \n"
36541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
36641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "+r"(src_y),      // %0
36741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(src_u),      // %1
36841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(src_v),      // %2
36941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(dst_rgb24),  // %3
37041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(width)       // %4
37141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "r"(&kUVToRB),    // %5
37241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "r"(&kUVToG)      // %6
37341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "cc", "memory", "q0", "q1", "q2", "q3",
37441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
37541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
37641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
37741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_I422TORGB24ROW_NEON
37841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
37941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_I422TORAWROW_NEON
38041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid I422ToRAWRow_NEON(const uint8* src_y,
38141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                       const uint8* src_u,
38241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                       const uint8* src_v,
38341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                       uint8* dst_raw,
38441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                       int width) {
38541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
38641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(5)
38741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d24}, [%5]                    \n"
38841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(6)
38941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d25}, [%6]                    \n"
39041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d26, #128                      \n"
39141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q14, #74                       \n"
39241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q15, #16                       \n"
39341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
39441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
39541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    READYUV422
39641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    YUV422TORGB
39741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %4, %4, #8                     \n"
39841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vswp.u8    d20, d22                       \n"
39941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(3)
40041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst3.8     {d20, d21, d22}, [%3]!         \n"
40141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
40241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "+r"(src_y),    // %0
40341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(src_u),    // %1
40441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(src_v),    // %2
40541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(dst_raw),  // %3
40641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(width)     // %4
40741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "r"(&kUVToRB),  // %5
40841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "r"(&kUVToG)    // %6
40941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "cc", "memory", "q0", "q1", "q2", "q3",
41041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
41141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
41241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
41341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_I422TORAWROW_NEON
41441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
41541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#define ARGBTORGB565                                                           \
41641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vshr.u8    d20, d20, #3                   \n"  /* B                    */ \
41741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vshr.u8    d21, d21, #2                   \n"  /* G                    */ \
41841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vshr.u8    d22, d22, #3                   \n"  /* R                    */ \
41941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmovl.u8   q8, d20                        \n"  /* B                    */ \
42041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmovl.u8   q9, d21                        \n"  /* G                    */ \
42141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmovl.u8   q10, d22                       \n"  /* R                    */ \
42241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vshl.u16   q9, q9, #5                     \n"  /* G                    */ \
42341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vshl.u16   q10, q10, #11                  \n"  /* R                    */ \
42441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vorr       q0, q8, q9                     \n"  /* BG                   */ \
42541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vorr       q0, q0, q10                    \n"  /* BGR                  */
42641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
42741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_I422TORGB565ROW_NEON
42841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid I422ToRGB565Row_NEON(const uint8* src_y,
42941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                          const uint8* src_u,
43041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                          const uint8* src_v,
43141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                          uint8* dst_rgb565,
43241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                          int width) {
43341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
43441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(5)
43541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d24}, [%5]                    \n"
43641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(6)
43741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d25}, [%6]                    \n"
43841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d26, #128                      \n"
43941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q14, #74                       \n"
44041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q15, #16                       \n"
44141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
44241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
44341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    READYUV422
44441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    YUV422TORGB
44541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %4, %4, #8                     \n"
44641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ARGBTORGB565
44741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(3)
44841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {q0}, [%3]!                    \n"  // store 8 pixels RGB565.
44941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
45041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "+r"(src_y),    // %0
45141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(src_u),    // %1
45241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(src_v),    // %2
45341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(dst_rgb565),  // %3
45441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(width)     // %4
45541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "r"(&kUVToRB),  // %5
45641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "r"(&kUVToG)    // %6
45741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "cc", "memory", "q0", "q1", "q2", "q3",
45841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
45941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
46041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
46141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_I422TORGB565ROW_NEON
46241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
46341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#define ARGBTOARGB1555                                                         \
46441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vshr.u8    q10, q10, #3                   \n"  /* B                    */ \
46541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vshr.u8    d22, d22, #3                   \n"  /* R                    */ \
46641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vshr.u8    d23, d23, #7                   \n"  /* A                    */ \
46741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmovl.u8   q8, d20                        \n"  /* B                    */ \
46841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmovl.u8   q9, d21                        \n"  /* G                    */ \
46941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmovl.u8   q10, d22                       \n"  /* R                    */ \
47041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmovl.u8   q11, d23                       \n"  /* A                    */ \
47141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vshl.u16   q9, q9, #5                     \n"  /* G                    */ \
47241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vshl.u16   q10, q10, #10                  \n"  /* R                    */ \
47341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vshl.u16   q11, q11, #15                  \n"  /* A                    */ \
47441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vorr       q0, q8, q9                     \n"  /* BG                   */ \
47541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vorr       q1, q10, q11                   \n"  /* RA                   */ \
47641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vorr       q0, q0, q1                     \n"  /* BGRA                 */
47741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
47841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_I422TOARGB1555ROW_NEON
47941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid I422ToARGB1555Row_NEON(const uint8* src_y,
48041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                            const uint8* src_u,
48141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                            const uint8* src_v,
48241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                            uint8* dst_argb1555,
48341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                            int width) {
48441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
48541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(5)
48641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d24}, [%5]                    \n"
48741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(6)
48841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d25}, [%6]                    \n"
48941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d26, #128                      \n"
49041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q14, #74                       \n"
49141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q15, #16                       \n"
49241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
49341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
49441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    READYUV422
49541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    YUV422TORGB
49641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %4, %4, #8                     \n"
49741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d23, #255                      \n"
49841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ARGBTOARGB1555
49941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(3)
50041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {q0}, [%3]!                    \n"  // store 8 pixels ARGB1555.
50141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
50241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "+r"(src_y),    // %0
50341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(src_u),    // %1
50441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(src_v),    // %2
50541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(dst_argb1555),  // %3
50641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(width)     // %4
50741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "r"(&kUVToRB),  // %5
50841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "r"(&kUVToG)    // %6
50941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "cc", "memory", "q0", "q1", "q2", "q3",
51041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
51141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
51241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
51341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_I422TOARGB1555ROW_NEON
51441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
51541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#define ARGBTOARGB4444                                                         \
51641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vshr.u8    d20, d20, #4                   \n"  /* B                    */ \
51741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vbic.32    d21, d21, d4                   \n"  /* G                    */ \
51841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vshr.u8    d22, d22, #4                   \n"  /* R                    */ \
51941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vbic.32    d23, d23, d4                   \n"  /* A                    */ \
52041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vorr       d0, d20, d21                   \n"  /* BG                   */ \
52141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vorr       d1, d22, d23                   \n"  /* RA                   */ \
52241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vzip.u8    d0, d1                         \n"  /* BGRA                 */
52341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
52441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_I422TOARGB4444ROW_NEON
52541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid I422ToARGB4444Row_NEON(const uint8* src_y,
52641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                            const uint8* src_u,
52741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                            const uint8* src_v,
52841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                            uint8* dst_argb4444,
52941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                            int width) {
53041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
53141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(5)
53241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d24}, [%5]                    \n"
53341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(6)
53441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d25}, [%6]                    \n"
53541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d26, #128                      \n"
53641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q14, #74                       \n"
53741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q15, #16                       \n"
53841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d4, #0x0f                      \n"  // bits to clear with vbic.
53941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
54041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
54141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    READYUV422
54241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    YUV422TORGB
54341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %4, %4, #8                     \n"
54441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d23, #255                      \n"
54541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ARGBTOARGB4444
54641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(3)
54741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {q0}, [%3]!                    \n"  // store 8 pixels ARGB4444.
54841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
54941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "+r"(src_y),    // %0
55041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(src_u),    // %1
55141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(src_v),    // %2
55241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(dst_argb4444),  // %3
55341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(width)     // %4
55441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "r"(&kUVToRB),  // %5
55541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "r"(&kUVToG)    // %6
55641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "cc", "memory", "q0", "q1", "q2", "q3",
55741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
55841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
55941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
56041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_I422TOARGB4444ROW_NEON
56141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
56241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_YTOARGBROW_NEON
56341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid YToARGBRow_NEON(const uint8* src_y,
56441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                     uint8* dst_argb,
56541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                     int width) {
56641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
56741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(3)
56841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d24}, [%3]                    \n"
56941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(4)
57041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d25}, [%4]                    \n"
57141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d26, #128                      \n"
57241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q14, #74                       \n"
57341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q15, #16                       \n"
57441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
57541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
57641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    READYUV400
57741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    YUV422TORGB
57841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %2, %2, #8                     \n"
57941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d23, #255                      \n"
58041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
58141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst4.8     {d20, d21, d22, d23}, [%1]!    \n"
58241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
58341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "+r"(src_y),     // %0
58441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(dst_argb),  // %1
58541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(width)      // %2
58641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "r"(&kUVToRB),   // %3
58741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "r"(&kUVToG)     // %4
58841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "cc", "memory", "q0", "q1", "q2", "q3",
58941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
59041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
59141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
59241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_YTOARGBROW_NEON
59341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
59441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_I400TOARGBROW_NEON
59541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid I400ToARGBRow_NEON(const uint8* src_y,
59641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                        uint8* dst_argb,
59741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                        int width) {
59841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
59941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
60041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d23, #255                      \n"
60141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
60241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
60341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d20}, [%0]!                   \n"
60441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov       d21, d20                       \n"
60541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov       d22, d20                       \n"
60641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %2, %2, #8                     \n"
60741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
60841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst4.8     {d20, d21, d22, d23}, [%1]!    \n"
60941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
61041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "+r"(src_y),     // %0
61141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(dst_argb),  // %1
61241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(width)      // %2
61341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    :
61441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "cc", "memory", "d20", "d21", "d22", "d23"
61541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
61641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
61741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_I400TOARGBROW_NEON
61841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
61941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_NV12TOARGBROW_NEON
62041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid NV12ToARGBRow_NEON(const uint8* src_y,
62141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                        const uint8* src_uv,
62241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                        uint8* dst_argb,
62341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                        int width) {
62441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
62541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(4)
62641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d24}, [%4]                    \n"
62741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(5)
62841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d25}, [%5]                    \n"
62941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d26, #128                      \n"
63041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q14, #74                       \n"
63141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q15, #16                       \n"
63241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
63341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
63441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    READNV12
63541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    YUV422TORGB
63641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %3, %3, #8                     \n"
63741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d23, #255                      \n"
63841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(2)
63941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst4.8     {d20, d21, d22, d23}, [%2]!    \n"
64041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
64141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "+r"(src_y),     // %0
64241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(src_uv),    // %1
64341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(dst_argb),  // %2
64441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(width)      // %3
64541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "r"(&kUVToRB),   // %4
64641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "r"(&kUVToG)     // %5
64741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "cc", "memory", "q0", "q1", "q2", "q3",
64841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
64941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
65041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
65141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_NV12TOARGBROW_NEON
65241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
65341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_NV21TOARGBROW_NEON
65441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid NV21ToARGBRow_NEON(const uint8* src_y,
65541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                        const uint8* src_uv,
65641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                        uint8* dst_argb,
65741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                        int width) {
65841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
65941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(4)
66041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d24}, [%4]                    \n"
66141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(5)
66241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d25}, [%5]                    \n"
66341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d26, #128                      \n"
66441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q14, #74                       \n"
66541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q15, #16                       \n"
66641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
66741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
66841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    READNV21
66941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    YUV422TORGB
67041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %3, %3, #8                     \n"
67141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d23, #255                      \n"
67241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(2)
67341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst4.8     {d20, d21, d22, d23}, [%2]!    \n"
67441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
67541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "+r"(src_y),     // %0
67641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(src_uv),    // %1
67741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(dst_argb),  // %2
67841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(width)      // %3
67941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "r"(&kUVToRB),   // %4
68041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "r"(&kUVToG)     // %5
68141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "cc", "memory", "q0", "q1", "q2", "q3",
68241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
68341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
68441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
68541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_NV21TOARGBROW_NEON
68641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
68741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_NV12TORGB565ROW_NEON
68841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid NV12ToRGB565Row_NEON(const uint8* src_y,
68941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                          const uint8* src_uv,
69041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                          uint8* dst_rgb565,
69141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                          int width) {
69241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
69341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(4)
69441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d24}, [%4]                    \n"
69541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(5)
69641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d25}, [%5]                    \n"
69741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d26, #128                      \n"
69841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q14, #74                       \n"
69941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q15, #16                       \n"
70041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
70141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
70241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    READNV12
70341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    YUV422TORGB
70441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %3, %3, #8                     \n"
70541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ARGBTORGB565
70641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(2)
70741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {q0}, [%2]!                    \n"  // store 8 pixels RGB565.
70841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
70941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "+r"(src_y),     // %0
71041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(src_uv),    // %1
71141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(dst_rgb565),  // %2
71241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(width)      // %3
71341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "r"(&kUVToRB),   // %4
71441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "r"(&kUVToG)     // %5
71541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "cc", "memory", "q0", "q1", "q2", "q3",
71641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
71741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
71841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
71941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_NV12TORGB565ROW_NEON
72041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
72141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_NV21TORGB565ROW_NEON
72241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid NV21ToRGB565Row_NEON(const uint8* src_y,
72341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                          const uint8* src_uv,
72441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                          uint8* dst_rgb565,
72541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                          int width) {
72641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
72741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(4)
72841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d24}, [%4]                    \n"
72941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(5)
73041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d25}, [%5]                    \n"
73141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d26, #128                      \n"
73241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q14, #74                       \n"
73341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q15, #16                       \n"
73441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
73541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
73641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    READNV21
73741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    YUV422TORGB
73841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %3, %3, #8                     \n"
73941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ARGBTORGB565
74041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(2)
74141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {q0}, [%2]!                    \n"  // store 8 pixels RGB565.
74241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
74341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "+r"(src_y),     // %0
74441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(src_uv),    // %1
74541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(dst_rgb565),  // %2
74641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(width)      // %3
74741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "r"(&kUVToRB),   // %4
74841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "r"(&kUVToG)     // %5
74941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "cc", "memory", "q0", "q1", "q2", "q3",
75041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
75141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
75241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
75341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_NV21TORGB565ROW_NEON
75441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
75541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_YUY2TOARGBROW_NEON
75641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid YUY2ToARGBRow_NEON(const uint8* src_yuy2,
75741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                        uint8* dst_argb,
75841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                        int width) {
75941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
76041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(3)
76141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d24}, [%3]                    \n"
76241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(4)
76341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d25}, [%4]                    \n"
76441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d26, #128                      \n"
76541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q14, #74                       \n"
76641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q15, #16                       \n"
76741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
76841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
76941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    READYUY2
77041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    YUV422TORGB
77141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %2, %2, #8                     \n"
77241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d23, #255                      \n"
77341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
77441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst4.8     {d20, d21, d22, d23}, [%1]!    \n"
77541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
77641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "+r"(src_yuy2),  // %0
77741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(dst_argb),  // %1
77841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(width)      // %2
77941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "r"(&kUVToRB),   // %3
78041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "r"(&kUVToG)     // %4
78141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "cc", "memory", "q0", "q1", "q2", "q3",
78241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
78341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
78441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
78541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_YUY2TOARGBROW_NEON
78641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
78741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_UYVYTOARGBROW_NEON
78841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid UYVYToARGBRow_NEON(const uint8* src_uyvy,
78941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                        uint8* dst_argb,
79041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                        int width) {
79141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
79241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(3)
79341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d24}, [%3]                    \n"
79441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(4)
79541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {d25}, [%4]                    \n"
79641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d26, #128                      \n"
79741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q14, #74                       \n"
79841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q15, #16                       \n"
79941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
80041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
80141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    READUYVY
80241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    YUV422TORGB
80341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %2, %2, #8                     \n"
80441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d23, #255                      \n"
80541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
80641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst4.8     {d20, d21, d22, d23}, [%1]!    \n"
80741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
80841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "+r"(src_uyvy),  // %0
80941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(dst_argb),  // %1
81041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(width)      // %2
81141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "r"(&kUVToRB),   // %3
81241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "r"(&kUVToG)     // %4
81341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "cc", "memory", "q0", "q1", "q2", "q3",
81441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
81541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
81641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
81741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_UYVYTOARGBROW_NEON
81841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
81941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v.
82041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_SPLITUVROW_NEON
82141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
82241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                     int width) {
82341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
82441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
82541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
82641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
827d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld2        {v0.16b, v1.16b}, [%0], #32    \n"  // load 16 pairs of UV
82841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %3, %3, #16                    \n"  // 16 processed per loop
82941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
830d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "st1        {v0.16b}, [%1], #16            \n"  // store U
83141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(2)
832d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "st1        {v1.16b}, [%2], #16            \n"  // store V
83341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
83441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    : "+r"(src_uv),  // %0
83541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(dst_u),   // %1
83641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(dst_v),   // %2
83741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(width)    // %3  // Output registers
83841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    :                       // Input registers
839d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    : "cc", "memory", "v0", "v1"  // Clobber List
84041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
84141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
84241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_SPLITUVROW_NEON
84341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
84441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Reads 16 U's and V's and writes out 16 pairs of UV.
84541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_MERGEUVROW_NEON
84641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
84741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                     int width) {
84841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
84941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
85041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
85141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
852d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld1        {v0.16b}, [%0], #16            \n"  // load U
85341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
854d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld1        {v1.16b}, [%1], #16            \n"  // load V
85541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %3, %3, #16                    \n"  // 16 processed per loop
85641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(2)
857d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "st2        {v0.16b, v1.16b}, [%2], #32    \n"  // store 16 pairs of UV
85841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
85941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    :
86041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(src_u),   // %0
86141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(src_v),   // %1
86241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(dst_uv),  // %2
86341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org      "+r"(width)    // %3  // Output registers
86441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    :                       // Input registers
865d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    : "cc", "memory", "v0", "v1"  // Clobber List
86641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
86741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
86841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_MERGEUVROW_NEON
86941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
87041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Copy multiple of 32.  vld4.8  allow unaligned and is fastest on a15.
87141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_COPYROW_NEON
87241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid CopyRow_NEON(const uint8* src, uint8* dst, int count) {
87341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
87441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
87541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
87641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
877d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld1        {v0.8b-v3.8b}, [%0], #32       \n"  // load 32
87841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %2, %2, #32                    \n"  // 32 processed per loop
87941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
880d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "st1        {v0.8b-v3.8b}, [%1], #32       \n"  // store 32
88141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
88241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src),   // %0
88341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst),   // %1
88441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(count)  // %2  // Output registers
88541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :                     // Input registers
886d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org  : "cc", "memory", "v0", "v1", "v2", "v3"  // Clobber List
88741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
88841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
88941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_COPYROW_NEON
89041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
89141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// SetRow8 writes 'count' bytes using a 32 bit value repeated.
89241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_SETROW_NEON
89341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid SetRow_NEON(uint8* dst, uint32 v32, int count) {
89441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
895d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "dup        v0.4s, %w2                     \n"  // duplicate 4 ints
89641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "1:                                        \n"
89741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs      %1, %1, #16                     \n"  // 16 bytes per loop
89841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
899d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "st1        {v0.16b}, [%0], #16            \n"  // store
90041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt       1b                              \n"
90141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(dst),   // %0
90241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(count)  // %1
90341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "r"(v32)     // %2
904d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org  : "cc", "memory", "v0"
90541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
90641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
90741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_SETROW_NEON
90841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
90941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// TODO(fbarchard): Make fully assembler
91041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// SetRow32 writes 'count' words using a 32 bit value repeated.
91141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBSETROWS_NEON
91241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBSetRows_NEON(uint8* dst, uint32 v32, int width,
91341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                      int dst_stride, int height) {
91441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  for (int y = 0; y < height; ++y) {
91541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    SetRow_NEON(dst, v32, width << 2);
91641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    dst += dst_stride;
91741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  }
91841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
91941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_ARGBSETROWS_NEON
92041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
92141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_MIRRORROW_NEON
92241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
92341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
92441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    // Start at end of source row.
92541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "add        %0, %0, %2                     \n"
926d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "sub        %0, %0, #16                    \n"
92741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
92841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
92941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
93041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
931d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld1        {v0.16b}, [%0], %3             \n"  // src -= 16
932d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "subs       %2, %2, #16                    \n"  // 16 pixels per loop.
933d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "rev64      v0.16b, v0.16b                 \n"
93441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
935d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "st1        {v0.D}[1], [%1], #8            \n"  // dst += 16
93641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
937d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "st1        {v0.D}[0], [%1], #8            \n"
93841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
93941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src),   // %0
94041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst),   // %1
94141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(width)  // %2
942d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org  : "r"((ptrdiff_t)-16)    // %3
943d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org  : "cc", "memory", "v0"
94441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
94541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
94641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_MIRRORROW_NEON
94741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
94841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_MIRRORUVROW_NEON
94941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
95041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                      int width) {
95141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
95241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    // Start at end of source row.
95341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "add        %0, %0, %3, lsl #1             \n"
954d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "sub        %0, %0, #16                    \n"
95541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
95641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
95741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
95841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
959d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld2        {v0.8b, v1.8b}, [%0], %4       \n"  // src -= 16
960d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "subs       %3, %3, #8                     \n"  // 8 pixels per loop.
961d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "rev64      v0.8b, v0.8b                   \n"
962d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "rev64      v1.8b, v1.8b                   \n"
96341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
964d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "st1        {v0.8b}, [%1], #8               \n"  // dst += 8
96541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(2)
966d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "st1        {v1.8b}, [%2], #8               \n"
96741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
96841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_uv),  // %0
96941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_u),   // %1
97041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_v),   // %2
97141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(width)    // %3
972d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org  : "r"((ptrdiff_t)-16)      // %4
973d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org  : "cc", "memory", "v0", "v1"
97441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
97541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
97641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_MIRRORUVROW_NEON
97741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
97841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBMIRRORROW_NEON
97941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) {
98041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
98141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    // Start at end of source row.
98241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "add        %0, %0, %2, lsl #2             \n"
983d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "sub        %0, %0, #16                    \n"
98441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
98541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
98641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
98741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
988d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld1        {v0.16b}, [%0], %3             \n"  // src -= 16
989d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "subs       %2, %2, #4                     \n"  // 4 pixels per loop.
990d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "rev64      v0.4s, v0.4s                   \n"
99141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
992d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "st1        {v0.D}[1], [%1], #8            \n"  // dst += 16
99341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
994d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "st1        {v0.D}[0], [%1], #8            \n"
99541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
99641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src),   // %0
99741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst),   // %1
99841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(width)  // %2
999d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org  : "r"((ptrdiff_t)-16)    // %3
1000d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org  : "cc", "memory", "v0"
100141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
100241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
100341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_ARGBMIRRORROW_NEON
100441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
100541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_RGB24TOARGBROW_NEON
100641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix) {
100741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
1008d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "movi       v4.8b, #255                    \n"  // Alpha
100941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
101041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
101141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
1012d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld3        {v1.8b-v3.8b}, [%0], #24       \n"  // load 8 pixels of RGB24.
101341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
101441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
1015d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "st4        {v1.8b-v4.8b}, [%1], #32       \n"  // store 8 pixels of ARGB.
101641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
101741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_rgb24),  // %0
101841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_argb),   // %1
101941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(pix)         // %2
102041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
1021d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org  : "cc", "memory", "v1", "v2", "v3", "v4"  // Clobber List
102241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
102341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
102441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_RGB24TOARGBROW_NEON
102541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
102641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_RAWTOARGBROW_NEON
102741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int pix) {
102841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
1029d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "movi       v5.8b, #255                    \n"  // Alpha
103041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
103141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
103241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
1033d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld3        {v0.8b-v2.8b}, [%0], #24       \n"  // read r g b
103441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
1035d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "mov        v3.8b, v1.8b                   \n"  // move g
1036d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "mov        v4.8b, v0.8b                   \n"  // move r
103741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
1038d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "st4        {v2.8b-v5.8b}, [%1], #32       \n"  // store b g r a
103941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
104041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_raw),   // %0
104141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_argb),  // %1
104241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(pix)        // %2
104341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
1044d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org  : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5"  // Clobber List
104541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
104641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
104741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_RAWTOARGBROW_NEON
104841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
104941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#define RGB565TOARGB                                                           \
105041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vshrn.u16  d6, q0, #5                     \n"  /* G xxGGGGGG           */ \
105141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vuzp.u8    d0, d1                         \n"  /* d0 xxxBBBBB RRRRRxxx */ \
105241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vshl.u8    d6, d6, #2                     \n"  /* G GGGGGG00 upper 6   */ \
105341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vshr.u8    d1, d1, #3                     \n"  /* R 000RRRRR lower 5   */ \
105441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vshl.u8    q0, q0, #3                     \n"  /* B,R BBBBB000 upper 5 */ \
105541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vshr.u8    q2, q0, #5                     \n"  /* B,R 00000BBB lower 3 */ \
105641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vorr.u8    d0, d0, d4                     \n"  /* B                    */ \
105741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vshr.u8    d4, d6, #6                     \n"  /* G 000000GG lower 2   */ \
105841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vorr.u8    d2, d1, d5                     \n"  /* R                    */ \
105941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vorr.u8    d1, d4, d6                     \n"  /* G                    */
106041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
106141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_RGB565TOARGBROW_NEON
106241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int pix) {
106341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
106441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d3, #255                       \n"  // Alpha
106541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
106641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
106741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
106841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {q0}, [%0]!                    \n"  // load 8 RGB565 pixels.
106941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
107041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    RGB565TOARGB
107141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
107241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst4.8     {d0, d1, d2, d3}, [%1]!        \n"  // store 8 pixels of ARGB.
107341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
107441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_rgb565),  // %0
107541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_argb),    // %1
107641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(pix)          // %2
107741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
107841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "cc", "memory", "q0", "q1", "q2", "q3"  // Clobber List
107941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
108041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
108141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_RGB565TOARGBROW_NEON
108241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
108341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#define ARGB1555TOARGB                                                         \
108441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vshrn.u16  d7, q0, #8                     \n"  /* A Arrrrrxx           */ \
108541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vshr.u8    d6, d7, #2                     \n"  /* R xxxRRRRR           */ \
108641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vshrn.u16  d5, q0, #5                     \n"  /* G xxxGGGGG           */ \
108741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmovn.u16  d4, q0                         \n"  /* B xxxBBBBB           */ \
108841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vshr.u8    d7, d7, #7                     \n"  /* A 0000000A           */ \
108941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vneg.s8    d7, d7                         \n"  /* A AAAAAAAA upper 8   */ \
109041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vshl.u8    d6, d6, #3                     \n"  /* R RRRRR000 upper 5   */ \
109141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vshr.u8    q1, q3, #5                     \n"  /* R,A 00000RRR lower 3 */ \
109241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vshl.u8    q0, q2, #3                     \n"  /* B,G BBBBB000 upper 5 */ \
109341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vshr.u8    q2, q0, #5                     \n"  /* B,G 00000BBB lower 3 */ \
109441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vorr.u8    q1, q1, q3                     \n"  /* R,A                  */ \
109541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vorr.u8    q0, q0, q2                     \n"  /* B,G                  */ \
109641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
109741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// RGB555TOARGB is same as ARGB1555TOARGB but ignores alpha.
109841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#define RGB555TOARGB                                                           \
109941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vshrn.u16  d6, q0, #5                     \n"  /* G xxxGGGGG           */ \
110041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vuzp.u8    d0, d1                         \n"  /* d0 xxxBBBBB xRRRRRxx */ \
110141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vshl.u8    d6, d6, #3                     \n"  /* G GGGGG000 upper 5   */ \
110241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vshr.u8    d1, d1, #2                     \n"  /* R 00xRRRRR lower 5   */ \
110341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vshl.u8    q0, q0, #3                     \n"  /* B,R BBBBB000 upper 5 */ \
110441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vshr.u8    q2, q0, #5                     \n"  /* B,R 00000BBB lower 3 */ \
110541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vorr.u8    d0, d0, d4                     \n"  /* B                    */ \
110641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vshr.u8    d4, d6, #5                     \n"  /* G 00000GGG lower 3   */ \
110741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vorr.u8    d2, d1, d5                     \n"  /* R                    */ \
110841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vorr.u8    d1, d4, d6                     \n"  /* G                    */
110941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
111041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGB1555TOARGBROW_NEON
111141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb,
111241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                            int pix) {
111341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
111441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d3, #255                       \n"  // Alpha
111541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
111641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
111741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
111841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {q0}, [%0]!                    \n"  // load 8 ARGB1555 pixels.
111941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
112041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ARGB1555TOARGB
112141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
112241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst4.8     {d0, d1, d2, d3}, [%1]!        \n"  // store 8 pixels of ARGB.
112341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
112441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_argb1555),  // %0
112541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_argb),    // %1
112641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(pix)          // %2
112741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
112841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "cc", "memory", "q0", "q1", "q2", "q3"  // Clobber List
112941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
113041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
113141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_ARGB1555TOARGBROW_NEON
113241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
113341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#define ARGB4444TOARGB                                                         \
113441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vuzp.u8    d0, d1                         \n"  /* d0 BG, d1 RA         */ \
113541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vshl.u8    q2, q0, #4                     \n"  /* B,R BBBB0000         */ \
113641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vshr.u8    q1, q0, #4                     \n"  /* G,A 0000GGGG         */ \
113741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vshr.u8    q0, q2, #4                     \n"  /* B,R 0000BBBB         */ \
113841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vorr.u8    q0, q0, q2                     \n"  /* B,R BBBBBBBB         */ \
113941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vshl.u8    q2, q1, #4                     \n"  /* G,A GGGG0000         */ \
114041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vorr.u8    q1, q1, q2                     \n"  /* G,A GGGGGGGG         */ \
114141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vswp.u8    d1, d2                         \n"  /* B,R,G,A -> B,G,R,A   */
114241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
114341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGB4444TOARGBROW_NEON
114441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb,
114541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                            int pix) {
114641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
114741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d3, #255                       \n"  // Alpha
114841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
114941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
115041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
115141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {q0}, [%0]!                    \n"  // load 8 ARGB4444 pixels.
115241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
115341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ARGB4444TOARGB
115441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
115541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst4.8     {d0, d1, d2, d3}, [%1]!        \n"  // store 8 pixels of ARGB.
115641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
115741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_argb4444),  // %0
115841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_argb),    // %1
115941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(pix)          // %2
116041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
116141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "cc", "memory", "q0", "q1", "q2"  // Clobber List
116241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
116341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
116441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_ARGB4444TOARGBROW_NEON
116541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
116641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBTORGB24ROW_NEON
116741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb24, int pix) {
116841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
116941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
117041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
117141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
1172d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld4        {v1.8b-v4.8b}, [%0], #32       \n"  // load 8 pixels of ARGB.
117341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
117441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
1175d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "st3        {v1.8b-v3.8b}, [%1], #24       \n"  // store 8 pixels of RGB24.
117641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
117741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_argb),   // %0
117841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_rgb24),  // %1
117941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(pix)         // %2
118041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
1181d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org  : "cc", "memory", "v1", "v2", "v3", "v4"  // Clobber List
118241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
118341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
118441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_ARGBTORGB24ROW_NEON
118541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
118641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBTORAWROW_NEON
118741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_raw, int pix) {
118841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
118941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
119041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
119141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
1192d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld4        {v1.8b-v4.8b}, [%0], #32       \n"  // load b g r a
119341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
1194d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "mov        v4.8b, v2.8b                   \n"  // mov g
1195d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "mov        v5.8b, v1.8b                   \n"  // mov b
119641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
1197d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "st3        {v3.8b-v5.8b}, [%1], #24       \n"  // store r g b
119841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
119941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_argb),  // %0
120041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_raw),   // %1
120141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(pix)        // %2
120241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
1203d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org  : "cc", "memory", "v1", "v2", "v3", "v4", "v5"  // Clobber List
120441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
120541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
120641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_ARGBTORAWROW_NEON
120741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
120841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_YUY2TOYROW_NEON
120941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int pix) {
121041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
121141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
121241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
121341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
1214d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld2        {v0.16b, v1.16b}, [%0], #32    \n"  // load 16 pixels of YUY2.
121541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %2, %2, #16                    \n"  // 16 processed per loop.
121641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
1217d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "st1        {v0.16b}, [%1], #16            \n"  // store 16 pixels of Y.
121841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
121941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_yuy2),  // %0
122041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_y),     // %1
122141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(pix)        // %2
122241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
1223d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org  : "cc", "memory", "v0", "v1"  // Clobber List
122441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
122541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
122641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_YUY2TOYROW_NEON
122741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
122841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_UYVYTOYROW_NEON
122941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int pix) {
123041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
123141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
123241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
123341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
1234d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld2        {v0.16b, v1.16b}, [%0], #32    \n"  // load 16 pixels of UYVY.
123541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %2, %2, #16                    \n"  // 16 processed per loop.
123641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
1237d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "st1        {v1.16b}, [%1], #16            \n"  // store 16 pixels of Y.
123841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
123941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_uyvy),  // %0
124041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_y),     // %1
124141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(pix)        // %2
124241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
1243d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org  : "cc", "memory", "v0", "v1"  // Clobber List
124441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
124541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
124641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_UYVYTOYROW_NEON
124741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
124841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_YUY2TOUV422ROW_NEON
124941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid YUY2ToUV422Row_NEON(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v,
125041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                         int pix) {
125141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
125241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
125341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
125441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
1255d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld4        {v0.8b-v3.8b}, [%0], #32       \n"  // load 16 pixels of YUY2.
125641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %3, %3, #16                    \n"  // 16 pixels = 8 UVs.
125741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
1258d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "st1        {v1.8b}, [%1], #8              \n"  // store 8 U.
125941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(2)
1260d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "st1        {v3.8b}, [%2], #8              \n"  // store 8 V.
126141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
126241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_yuy2),  // %0
126341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_u),     // %1
126441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_v),     // %2
126541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(pix)        // %3
126641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
1267d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org  : "cc", "memory", "v0", "v1", "v2", "v3"  // Clobber List
126841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
126941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
127041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_YUY2TOUV422ROW_NEON
127141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
127241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_UYVYTOUV422ROW_NEON
127341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid UYVYToUV422Row_NEON(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v,
127441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                         int pix) {
127541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
127641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
127741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
127841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
1279d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld4        {v0.8b-v3.8b}, [%0], #32       \n"  // load 16 pixels of UYVY.
128041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %3, %3, #16                    \n"  // 16 pixels = 8 UVs.
128141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
1282d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "st1        {v0.8b}, [%1], #8              \n"  // store 8 U.
128341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(2)
1284d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "st1        {v2.8b}, [%2], #8              \n"  // store 8 V.
128541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
128641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_uyvy),  // %0
128741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_u),     // %1
128841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_v),     // %2
128941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(pix)        // %3
129041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
1291d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org  : "cc", "memory", "v0", "v1", "v2", "v3"  // Clobber List
129241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
129341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
129441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_UYVYTOUV422ROW_NEON
129541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
129641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_YUY2TOUVROW_NEON
129741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2,
129841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                      uint8* dst_u, uint8* dst_v, int pix) {
129941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
1300d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "add        %x1, %x0, %w1, sxtw            \n"  // stride + src_yuy2
130141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
130241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
130341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
1304d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld4        {v0.8b-v3.8b}, [%0], #32       \n"  // load 16 pixels of YUY2.
130541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %4, %4, #16                    \n"  // 16 pixels = 8 UVs.
130641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
1307d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld4        {v4.8b-v7.8b}, [%1], #32       \n"  // load next row YUY2.
1308d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "urhadd     v1.8b, v1.8b, v5.8b            \n"  // average rows of U
1309d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "urhadd     v3.8b, v3.8b, v7.8b            \n"  // average rows of V
131041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(2)
1311d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "st1        {v1.8b}, [%2], #8              \n"  // store 8 U.
131241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(3)
1313d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "st1        {v3.8b}, [%3], #8              \n"  // store 8 V.
131441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
131541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_yuy2),     // %0
131641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(stride_yuy2),  // %1
131741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_u),        // %2
131841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_v),        // %3
131941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(pix)           // %4
132041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
1321d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org  : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"  // Clobber List
132241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
132341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
132441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_YUY2TOUVROW_NEON
132541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
132641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_UYVYTOUVROW_NEON
132741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy,
132841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                      uint8* dst_u, uint8* dst_v, int pix) {
132941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
1330d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "add        %x1, %x0, %w1, sxtw            \n"  // stride + src_uyvy
133141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
133241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
133341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
1334d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld4        {v0.8b-v3.8b}, [%0], #32       \n"  // load 16 pixels of UYVY.
133541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %4, %4, #16                    \n"  // 16 pixels = 8 UVs.
133641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
1337d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld4        {v4.8b-v7.8b}, [%1], #32       \n"  // load next row UYVY.
1338d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "urhadd     v0.8b, v0.8b, v4.8b            \n"  // average rows of U
1339d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "urhadd     v2.8b, v2.8b, v6.8b            \n"  // average rows of V
134041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(2)
1341d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "st1        {v0.8b}, [%2], #8              \n"  // store 8 U.
134241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(3)
1343d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "st1        {v2.8b}, [%3], #8              \n"  // store 8 V.
134441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
134541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_uyvy),     // %0
134641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(stride_uyvy),  // %1
134741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_u),        // %2
134841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_v),        // %3
134941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(pix)           // %4
135041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
1351d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org  : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"  // Clobber List
135241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
135341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
135441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_UYVYTOUVROW_NEON
135541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
135641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_HALFROW_NEON
135741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid HalfRow_NEON(const uint8* src_uv, int src_uv_stride,
135841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                  uint8* dst_uv, int pix) {
135941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
136041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    // change the stride to row 2 pointer
1361d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "add        %x1, %x0, %w1, sxtw            \n"
136241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
136341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
1364d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld1        {v0.16b}, [%0], #16            \n"  // load row 1 16 pixels.
136541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %3, %3, #16                    \n"  // 16 processed per loop
136641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
1367d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld1        {v1.16b}, [%1], #16            \n"  // load row 2 16 pixels.
1368d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "urhadd     v0.16b, v0.16b, v1.16b         \n"  // average row 1 and 2
136941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(2)
1370d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "st1        {v0.16b}, [%2], #16            \n"
137141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
137241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_uv),         // %0
137341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(src_uv_stride),  // %1
137441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_uv),         // %2
137541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(pix)             // %3
137641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
1377d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org  : "cc", "memory", "v0", "v1"  // Clobber List
137841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
137941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
138041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_HALFROW_NEON
138141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
138241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Select 2 channels from ARGB on alternating pixels.  e.g.  BGBGBGBG
138341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBTOBAYERROW_NEON
138441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBToBayerRow_NEON(const uint8* src_argb, uint8* dst_bayer,
138541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                         uint32 selector, int pix) {
138641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
1387d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "mov        v2.s[0], %w3                   \n"  // selector
138841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
138941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
1390d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld1        {v0.16b, v1.16b}, [%0], 32     \n"  // load row 8 pixels.
139141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %2, %2, #8                     \n"  // 8 processed per loop
1392d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "tbl        v4.8b, {v0.16b}, v2.8b         \n"  // look up 4 pixels
1393d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "tbl        v5.8b, {v1.16b}, v2.8b         \n"  // look up 4 pixels
1394d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "trn1       v4.4s, v4.4s, v5.4s            \n"  // combine 8 pixels
139541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
1396d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "st1        {v4.8b}, [%1], #8              \n"  // store 8.
139741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
139841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_argb),   // %0
139941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_bayer),  // %1
140041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(pix)         // %2
140141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "r"(selector)     // %3
1402d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org  : "cc", "memory", "v0", "v1", "v2", "v4", "v5"   // Clobber List
140341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
140441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
140541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_ARGBTOBAYERROW_NEON
140641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
140741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Select G channels from ARGB.  e.g.  GGGGGGGG
140841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBTOBAYERGGROW_NEON
140941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBToBayerGGRow_NEON(const uint8* src_argb, uint8* dst_bayer,
141041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                           uint32 /*selector*/, int pix) {
141141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
141241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
141341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
1414d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld4        {v0.8b-v3.8b}, [%0], #32       \n"  // load row 8 pixels.
141541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %2, %2, #8                     \n"  // 8 processed per loop
141641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
1417d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "st1        {v1.8b}, [%1], #8              \n"  // store 8 G's.
141841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
141941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_argb),   // %0
142041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_bayer),  // %1
142141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(pix)         // %2
142241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
1423d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org  : "cc", "memory", "v0", "v1", "v2", "v3"  // Clobber List
142441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
142541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
142641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_ARGBTOBAYERGGROW_NEON
142741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
142841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
142941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBSHUFFLEROW_NEON
143041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb,
143141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                         const uint8* shuffler, int pix) {
143241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
143341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(3)
1434d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld1        {v2.16b}, [%3]                 \n"  // shuffler
143541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
143641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
1437d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld1        {v0.16b}, [%0], #16            \n"  // load 4 pixels.
143841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %2, %2, #4                     \n"  // 4 processed per loop
1439d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "tbl        v1.16b, {v0.16b}, v2.16b       \n"  // look up 4 pixels
144041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
1441d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "st1        {v1.16b}, [%1], #16            \n"  // store 4.
144241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
144341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_argb),  // %0
144441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_argb),  // %1
144541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(pix)        // %2
144641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "r"(shuffler)    // %3
1447d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org  : "cc", "memory", "v0", "v1", "v2"  // Clobber List
144841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
144941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
145041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_ARGBSHUFFLEROW_NEON
145141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
145241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_I422TOYUY2ROW_NEON
145341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid I422ToYUY2Row_NEON(const uint8* src_y,
145441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                        const uint8* src_u,
145541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                        const uint8* src_v,
145641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                        uint8* dst_yuy2, int width) {
145741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
145841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
145941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
146041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
1461d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld2        {v0.8b, v1.8b}, [%0], #16      \n"  // load 16 Ys
1462d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "mov        v2.8b, v1.8b                   \n"
146341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
1464d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld1        {v1.8b}, [%1], #8              \n"  // load 8 Us
146541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(2)
1466d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld1        {v3.8b}, [%2], #8              \n"  // load 8 Vs
146741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %4, %4, #16                    \n"  // 16 pixels
146841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(3)
1469d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "st4        {v0.8b-v3.8b}, [%3], #32       \n"  // Store 8 YUY2/16 pixels.
147041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
147141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_y),     // %0
147241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(src_u),     // %1
147341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(src_v),     // %2
147441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_yuy2),  // %3
147541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(width)      // %4
147641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
1477d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org  : "cc", "memory", "v0", "v1", "v2", "v3"
147841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
147941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
148041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_I422TOYUY2ROW_NEON
148141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
148241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_I422TOUYVYROW_NEON
148341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid I422ToUYVYRow_NEON(const uint8* src_y,
148441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                        const uint8* src_u,
148541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                        const uint8* src_v,
148641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                        uint8* dst_uyvy, int width) {
148741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
148841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
148941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
149041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
1491d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld2        {v1.8b, v2.8b}, [%0], #16      \n"  // load 16 Ys
1492d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "mov        v3.8b, v2.8b                   \n"
149341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
1494d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld1        {v0.8b}, [%1], #8              \n"  // load 8 Us
149541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(2)
1496d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld1        {v2.8b}, [%2], #8              \n"  // load 8 Vs
149741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %4, %4, #16                    \n"  // 16 pixels
149841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(3)
1499d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "st4        {v0.8b-v3.8b}, [%3], #32       \n"  // Store 8 UYVY/16 pixels.
150041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
150141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_y),     // %0
150241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(src_u),     // %1
150341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(src_v),     // %2
150441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_uyvy),  // %3
150541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(width)      // %4
150641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
1507d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org  : "cc", "memory", "v0", "v1", "v2", "v3"
150841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
150941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
151041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_I422TOUYVYROW_NEON
151141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
151241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBTORGB565ROW_NEON
151341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb565, int pix) {
151441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
151541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
151641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
151741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
151841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld4.8     {d20, d21, d22, d23}, [%0]!    \n"  // load 8 pixels of ARGB.
151941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
152041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ARGBTORGB565
152141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
152241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {q0}, [%1]!                    \n"  // store 8 pixels RGB565.
152341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
152441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_argb),  // %0
152541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_rgb565),  // %1
152641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(pix)        // %2
152741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
152841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "cc", "memory", "q0", "q8", "q9", "q10", "q11"
152941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
153041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
153141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_ARGBTORGB565ROW_NEON
153241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
153341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBTOARGB1555ROW_NEON
153441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_argb1555,
153541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                            int pix) {
153641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
153741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
153841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
153941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
154041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld4.8     {d20, d21, d22, d23}, [%0]!    \n"  // load 8 pixels of ARGB.
154141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
154241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ARGBTOARGB1555
154341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
154441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {q0}, [%1]!                    \n"  // store 8 pixels ARGB1555.
154541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
154641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_argb),  // %0
154741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_argb1555),  // %1
154841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(pix)        // %2
154941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
155041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "cc", "memory", "q0", "q8", "q9", "q10", "q11"
155141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
155241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
155341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_ARGBTOARGB1555ROW_NEON
155441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
155541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBTOARGB4444ROW_NEON
155641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_argb4444,
155741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                            int pix) {
155841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
155941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d4, #0x0f                      \n"  // bits to clear with vbic.
156041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
156141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
156241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
156341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld4.8     {d20, d21, d22, d23}, [%0]!    \n"  // load 8 pixels of ARGB.
156441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
156541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ARGBTOARGB4444
156641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
156741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {q0}, [%1]!                    \n"  // store 8 pixels ARGB4444.
156841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
156941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_argb),      // %0
157041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_argb4444),  // %1
157141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(pix)            // %2
157241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
157341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "cc", "memory", "q0", "q8", "q9", "q10", "q11"
157441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
157541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
157641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_ARGBTOARGB4444ROW_NEON
157741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
157841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBTOYROW_NEON
157941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
158041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
1581d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "movi       v4.8b, #13                     \n"  // B * 0.1016 coefficient
1582d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "movi       v5.8b, #65                     \n"  // G * 0.5078 coefficient
1583d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "movi       v6.8b, #33                     \n"  // R * 0.2578 coefficient
1584d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "movi       v7.8b, #16                     \n"  // Add 16 constant
158541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
158641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
158741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
1588d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld4        {v0.8b-v3.8b}, [%0], #32       \n"  // load 8 ARGB pixels.
158941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
1590d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "umull      v3.8h, v0.8b, v4.8b            \n"  // B
1591d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "umlal      v3.8h, v1.8b, v5.8b            \n"  // G
1592d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "umlal      v3.8h, v2.8b, v6.8b            \n"  // R
1593d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "sqrshrun   v0.8b, v3.8h, #7               \n"  // 16 bit to 8 bit Y
1594d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "uqadd      v0.8b, v0.8b, v7.8b            \n"
159541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
1596d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "st1        {v0.8b}, [%1], #8              \n"  // store 8 pixels Y.
159741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
159841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_argb),  // %0
159941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_y),     // %1
160041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(pix)        // %2
160141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
1602d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org  : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"
160341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
160441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
160541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_ARGBTOYROW_NEON
160641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
160741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBTOYJROW_NEON
160841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) {
160941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
1610d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "movi       v4.8b, #15                     \n"  // B * 0.11400 coefficient
1611d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "movi       v5.8b, #75                     \n"  // G * 0.58700 coefficient
1612d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "movi       v6.8b, #38                     \n"  // R * 0.29900 coefficient
161341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
161441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
161541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
1616d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld4        {v0.8b-v3.8b}, [%0], #32       \n"  // load 8 ARGB pixels.
161741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
1618d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "umull      v3.8h, v0.8b, v4.8b            \n"  // B
1619d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "umlal      v3.8h, v1.8b, v5.8b            \n"  // G
1620d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "umlal      v3.8h, v2.8b, v6.8b            \n"  // R
1621d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "sqrshrun   v0.8b, v3.8h, #7               \n"  // 15 bit to 8 bit Y
162241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
1623d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "st1        {v0.8b}, [%1], #8              \n"  // store 8 pixels Y.
162441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
162541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_argb),  // %0
162641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_y),     // %1
162741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(pix)        // %2
162841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
1629d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org  : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6"
163041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
163141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
163241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_ARGBTOYJROW_NEON
163341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
163441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// 8x1 pixels.
163541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBTOUV444ROW_NEON
163641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
163741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                         int pix) {
163841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
163941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d24, #112                      \n"  // UB / VR 0.875 coefficient
164041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d25, #74                       \n"  // UG -0.5781 coefficient
164141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d26, #38                       \n"  // UR -0.2969 coefficient
164241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d27, #18                       \n"  // VB -0.1406 coefficient
164341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d28, #94                       \n"  // VG -0.7344 coefficient
164441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q15, #0x8080                   \n"  // 128.5
164541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
164641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
164741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
164841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 ARGB pixels.
164941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
165041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmull.u8   q2, d0, d24                    \n"  // B
165141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmlsl.u8   q2, d1, d25                    \n"  // G
165241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmlsl.u8   q2, d2, d26                    \n"  // R
165341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vadd.u16   q2, q2, q15                    \n"  // +128 -> unsigned
165441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
165541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmull.u8   q3, d2, d24                    \n"  // R
165641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmlsl.u8   q3, d1, d28                    \n"  // G
165741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmlsl.u8   q3, d0, d27                    \n"  // B
165841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vadd.u16   q3, q3, q15                    \n"  // +128 -> unsigned
165941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
166041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqshrn.u16  d0, q2, #8                    \n"  // 16 bit to 8 bit U
166141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqshrn.u16  d1, q3, #8                    \n"  // 16 bit to 8 bit V
166241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
166341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
166441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels U.
166541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(2)
166641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {d1}, [%2]!                    \n"  // store 8 pixels V.
166741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
166841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_argb),  // %0
166941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_u),     // %1
167041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_v),     // %2
167141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(pix)        // %3
167241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
167341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q12", "q13", "q14", "q15"
167441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
167541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
167641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_ARGBTOUV444ROW_NEON
167741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
167841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// 16x1 pixels -> 8x1.  pix is number of argb pixels. e.g. 16.
167941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBTOUV422ROW_NEON
168041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
168141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                         int pix) {
168241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
168341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
168441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
168541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
168641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
168741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
168841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q15, #0x8080                   \n"  // 128.5
168941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
169041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
169141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
169241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 ARGB pixels.
169341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
169441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld4.8     {d1, d3, d5, d7}, [%0]!        \n"  // load next 8 ARGB pixels.
169541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
169641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  q0, q0                         \n"  // B 16 bytes -> 8 shorts.
169741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  q1, q1                         \n"  // G 16 bytes -> 8 shorts.
169841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  q2, q2                         \n"  // R 16 bytes -> 8 shorts.
169941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
170041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %3, %3, #16                    \n"  // 16 processed per loop.
170141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmul.s16   q8, q0, q10                    \n"  // B
170241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmls.s16   q8, q1, q11                    \n"  // G
170341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmls.s16   q8, q2, q12                    \n"  // R
170441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vadd.u16   q8, q8, q15                    \n"  // +128 -> unsigned
170541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
170641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmul.s16   q9, q2, q10                    \n"  // R
170741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmls.s16   q9, q1, q14                    \n"  // G
170841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmls.s16   q9, q0, q13                    \n"  // B
170941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vadd.u16   q9, q9, q15                    \n"  // +128 -> unsigned
171041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
171141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqshrn.u16  d0, q8, #8                    \n"  // 16 bit to 8 bit U
171241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqshrn.u16  d1, q9, #8                    \n"  // 16 bit to 8 bit V
171341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
171441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
171541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels U.
171641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(2)
171741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {d1}, [%2]!                    \n"  // store 8 pixels V.
171841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
171941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_argb),  // %0
172041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_u),     // %1
172141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_v),     // %2
172241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(pix)        // %3
172341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
172441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "cc", "memory", "q0", "q1", "q2", "q3",
172541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
172641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
172741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
172841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_ARGBTOUV422ROW_NEON
172941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
173041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// 32x1 pixels -> 8x1.  pix is number of argb pixels. e.g. 32.
173141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBTOUV411ROW_NEON
173241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
173341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                         int pix) {
173441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
173541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
173641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
173741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
173841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
173941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
174041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q15, #0x8080                   \n"  // 128.5
174141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
174241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
174341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
174441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 ARGB pixels.
174541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
174641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld4.8     {d1, d3, d5, d7}, [%0]!        \n"  // load next 8 ARGB pixels.
174741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  q0, q0                         \n"  // B 16 bytes -> 8 shorts.
174841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  q1, q1                         \n"  // G 16 bytes -> 8 shorts.
174941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  q2, q2                         \n"  // R 16 bytes -> 8 shorts.
175041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
175141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld4.8     {d8, d10, d12, d14}, [%0]!     \n"  // load 8 more ARGB pixels.
175241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
175341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld4.8     {d9, d11, d13, d15}, [%0]!     \n"  // load last 8 ARGB pixels.
175441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  q4, q4                         \n"  // B 16 bytes -> 8 shorts.
175541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  q5, q5                         \n"  // G 16 bytes -> 8 shorts.
175641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  q6, q6                         \n"  // R 16 bytes -> 8 shorts.
175741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
175841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpadd.u16  d0, d0, d1                     \n"  // B 16 shorts -> 8 shorts.
175941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpadd.u16  d1, d8, d9                     \n"  // B
176041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpadd.u16  d2, d2, d3                     \n"  // G 16 shorts -> 8 shorts.
176141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpadd.u16  d3, d10, d11                   \n"  // G
176241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpadd.u16  d4, d4, d5                     \n"  // R 16 shorts -> 8 shorts.
176341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpadd.u16  d5, d12, d13                   \n"  // R
176441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
176541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vrshr.u16  q0, q0, #1                     \n"  // 2x average
176641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vrshr.u16  q1, q1, #1                     \n"
176741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vrshr.u16  q2, q2, #1                     \n"
176841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
176941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %3, %3, #32                    \n"  // 32 processed per loop.
177041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmul.s16   q8, q0, q10                    \n"  // B
177141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmls.s16   q8, q1, q11                    \n"  // G
177241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmls.s16   q8, q2, q12                    \n"  // R
177341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vadd.u16   q8, q8, q15                    \n"  // +128 -> unsigned
177441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmul.s16   q9, q2, q10                    \n"  // R
177541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmls.s16   q9, q1, q14                    \n"  // G
177641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmls.s16   q9, q0, q13                    \n"  // B
177741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vadd.u16   q9, q9, q15                    \n"  // +128 -> unsigned
177841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqshrn.u16  d0, q8, #8                    \n"  // 16 bit to 8 bit U
177941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqshrn.u16  d1, q9, #8                    \n"  // 16 bit to 8 bit V
178041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
178141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels U.
178241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(2)
178341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {d1}, [%2]!                    \n"  // store 8 pixels V.
178441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
178541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_argb),  // %0
178641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_u),     // %1
178741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_v),     // %2
178841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(pix)        // %3
178941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
179041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
179141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
179241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
179341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
179441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_ARGBTOUV411ROW_NEON
179541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
179641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// 16x2 pixels -> 8x1.  pix is number of argb pixels. e.g. 16.
179741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#define RGBTOUV(QB, QG, QR) \
179841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmul.s16   q8, " #QB ", q10               \n"  /* B                    */ \
179941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmls.s16   q8, " #QG ", q11               \n"  /* G                    */ \
180041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmls.s16   q8, " #QR ", q12               \n"  /* R                    */ \
180141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vadd.u16   q8, q8, q15                    \n"  /* +128 -> unsigned     */ \
180241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmul.s16   q9, " #QR ", q10               \n"  /* R                    */ \
180341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmls.s16   q9, " #QG ", q14               \n"  /* G                    */ \
180441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmls.s16   q9, " #QB ", q13               \n"  /* B                    */ \
180541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vadd.u16   q9, q9, q15                    \n"  /* +128 -> unsigned     */ \
180641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqshrn.u16  d0, q8, #8                    \n"  /* 16 bit to 8 bit U    */ \
180741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqshrn.u16  d1, q9, #8                    \n"  /* 16 bit to 8 bit V    */
180841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
180941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// TODO(fbarchard): Consider vhadd vertical, then vpaddl horizontal, avoid shr.
181041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBTOUVROW_NEON
181141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
181241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                      uint8* dst_u, uint8* dst_v, int pix) {
181341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
181441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "add        %1, %0, %1                     \n"  // src_stride + src_argb
181541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
181641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
181741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
181841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
181941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
182041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q15, #0x8080                   \n"  // 128.5
182141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
182241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
182341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
182441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 ARGB pixels.
182541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
182641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld4.8     {d1, d3, d5, d7}, [%0]!        \n"  // load next 8 ARGB pixels.
182741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  q0, q0                         \n"  // B 16 bytes -> 8 shorts.
182841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  q1, q1                         \n"  // G 16 bytes -> 8 shorts.
182941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  q2, q2                         \n"  // R 16 bytes -> 8 shorts.
183041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
183141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld4.8     {d8, d10, d12, d14}, [%1]!     \n"  // load 8 more ARGB pixels.
183241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
183341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld4.8     {d9, d11, d13, d15}, [%1]!     \n"  // load last 8 ARGB pixels.
183441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpadal.u8  q0, q4                         \n"  // B 16 bytes -> 8 shorts.
183541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpadal.u8  q1, q5                         \n"  // G 16 bytes -> 8 shorts.
183641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpadal.u8  q2, q6                         \n"  // R 16 bytes -> 8 shorts.
183741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
183841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vrshr.u16  q0, q0, #1                     \n"  // 2x average
183941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vrshr.u16  q1, q1, #1                     \n"
184041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vrshr.u16  q2, q2, #1                     \n"
184141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
184241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %4, %4, #16                    \n"  // 32 processed per loop.
184341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    RGBTOUV(q0, q1, q2)
184441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(2)
184541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
184641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(3)
184741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
184841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
184941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_argb),  // %0
185041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(src_stride_argb),  // %1
185141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_u),     // %2
185241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_v),     // %3
185341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(pix)        // %4
185441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
185541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
185641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
185741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
185841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
185941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_ARGBTOUVROW_NEON
186041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
186141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// TODO(fbarchard): Subsample match C code.
186241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBTOUVJROW_NEON
186341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb,
186441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                       uint8* dst_u, uint8* dst_v, int pix) {
186541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
186641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "add        %1, %0, %1                     \n"  // src_stride + src_argb
186741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q10, #127 / 2                  \n"  // UB / VR 0.500 coefficient
186841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q11, #84 / 2                   \n"  // UG -0.33126 coefficient
186941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q12, #43 / 2                   \n"  // UR -0.16874 coefficient
187041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q13, #20 / 2                   \n"  // VB -0.08131 coefficient
187141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q14, #107 / 2                  \n"  // VG -0.41869 coefficient
187241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q15, #0x8080                   \n"  // 128.5
187341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
187441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
187541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
187641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 ARGB pixels.
187741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
187841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld4.8     {d1, d3, d5, d7}, [%0]!        \n"  // load next 8 ARGB pixels.
187941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  q0, q0                         \n"  // B 16 bytes -> 8 shorts.
188041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  q1, q1                         \n"  // G 16 bytes -> 8 shorts.
188141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  q2, q2                         \n"  // R 16 bytes -> 8 shorts.
188241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
188341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld4.8     {d8, d10, d12, d14}, [%1]!     \n"  // load 8 more ARGB pixels.
188441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
188541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld4.8     {d9, d11, d13, d15}, [%1]!     \n"  // load last 8 ARGB pixels.
188641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpadal.u8  q0, q4                         \n"  // B 16 bytes -> 8 shorts.
188741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpadal.u8  q1, q5                         \n"  // G 16 bytes -> 8 shorts.
188841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpadal.u8  q2, q6                         \n"  // R 16 bytes -> 8 shorts.
188941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
189041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vrshr.u16  q0, q0, #1                     \n"  // 2x average
189141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vrshr.u16  q1, q1, #1                     \n"
189241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vrshr.u16  q2, q2, #1                     \n"
189341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
189441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %4, %4, #16                    \n"  // 32 processed per loop.
189541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    RGBTOUV(q0, q1, q2)
189641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(2)
189741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
189841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(3)
189941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
190041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
190141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_argb),  // %0
190241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(src_stride_argb),  // %1
190341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_u),     // %2
190441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_v),     // %3
190541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(pix)        // %4
190641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
190741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
190841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
190941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
191041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
191141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_ARGBTOUVJROW_NEON
191241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
191341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_BGRATOUVROW_NEON
191441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra,
191541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                      uint8* dst_u, uint8* dst_v, int pix) {
191641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
191741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "add        %1, %0, %1                     \n"  // src_stride + src_bgra
191841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
191941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
192041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
192141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
192241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
192341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q15, #0x8080                   \n"  // 128.5
192441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
192541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
192641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
192741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 BGRA pixels.
192841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
192941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld4.8     {d1, d3, d5, d7}, [%0]!        \n"  // load next 8 BGRA pixels.
193041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  q3, q3                         \n"  // B 16 bytes -> 8 shorts.
193141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  q2, q2                         \n"  // G 16 bytes -> 8 shorts.
193241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  q1, q1                         \n"  // R 16 bytes -> 8 shorts.
193341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
193441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld4.8     {d8, d10, d12, d14}, [%1]!     \n"  // load 8 more BGRA pixels.
193541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
193641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld4.8     {d9, d11, d13, d15}, [%1]!     \n"  // load last 8 BGRA pixels.
193741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpadal.u8  q3, q7                         \n"  // B 16 bytes -> 8 shorts.
193841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpadal.u8  q2, q6                         \n"  // G 16 bytes -> 8 shorts.
193941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpadal.u8  q1, q5                         \n"  // R 16 bytes -> 8 shorts.
194041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
194141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vrshr.u16  q1, q1, #1                     \n"  // 2x average
194241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vrshr.u16  q2, q2, #1                     \n"
194341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vrshr.u16  q3, q3, #1                     \n"
194441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
194541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %4, %4, #16                    \n"  // 32 processed per loop.
194641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    RGBTOUV(q3, q2, q1)
194741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(2)
194841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
194941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(3)
195041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
195141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
195241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_bgra),  // %0
195341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(src_stride_bgra),  // %1
195441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_u),     // %2
195541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_v),     // %3
195641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(pix)        // %4
195741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
195841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
195941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
196041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
196141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
196241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_BGRATOUVROW_NEON
196341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
196441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ABGRTOUVROW_NEON
196541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr,
196641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                      uint8* dst_u, uint8* dst_v, int pix) {
196741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
196841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "add        %1, %0, %1                     \n"  // src_stride + src_abgr
196941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
197041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
197141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
197241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
197341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
197441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q15, #0x8080                   \n"  // 128.5
197541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
197641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
197741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
197841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 ABGR pixels.
197941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
198041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld4.8     {d1, d3, d5, d7}, [%0]!        \n"  // load next 8 ABGR pixels.
198141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  q2, q2                         \n"  // B 16 bytes -> 8 shorts.
198241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  q1, q1                         \n"  // G 16 bytes -> 8 shorts.
198341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  q0, q0                         \n"  // R 16 bytes -> 8 shorts.
198441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
198541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld4.8     {d8, d10, d12, d14}, [%1]!     \n"  // load 8 more ABGR pixels.
198641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
198741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld4.8     {d9, d11, d13, d15}, [%1]!     \n"  // load last 8 ABGR pixels.
198841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpadal.u8  q2, q6                         \n"  // B 16 bytes -> 8 shorts.
198941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpadal.u8  q1, q5                         \n"  // G 16 bytes -> 8 shorts.
199041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpadal.u8  q0, q4                         \n"  // R 16 bytes -> 8 shorts.
199141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
199241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vrshr.u16  q0, q0, #1                     \n"  // 2x average
199341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vrshr.u16  q1, q1, #1                     \n"
199441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vrshr.u16  q2, q2, #1                     \n"
199541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
199641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %4, %4, #16                    \n"  // 32 processed per loop.
199741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    RGBTOUV(q2, q1, q0)
199841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(2)
199941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
200041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(3)
200141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
200241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
200341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_abgr),  // %0
200441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(src_stride_abgr),  // %1
200541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_u),     // %2
200641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_v),     // %3
200741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(pix)        // %4
200841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
200941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
201041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
201141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
201241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
201341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_ABGRTOUVROW_NEON
201441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
201541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_RGBATOUVROW_NEON
201641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba,
201741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                      uint8* dst_u, uint8* dst_v, int pix) {
201841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
201941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "add        %1, %0, %1                     \n"  // src_stride + src_rgba
202041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
202141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
202241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
202341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
202441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
202541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q15, #0x8080                   \n"  // 128.5
202641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
202741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
202841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
202941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 RGBA pixels.
203041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
203141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld4.8     {d1, d3, d5, d7}, [%0]!        \n"  // load next 8 RGBA pixels.
203241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  q0, q1                         \n"  // B 16 bytes -> 8 shorts.
203341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  q1, q2                         \n"  // G 16 bytes -> 8 shorts.
203441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  q2, q3                         \n"  // R 16 bytes -> 8 shorts.
203541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
203641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld4.8     {d8, d10, d12, d14}, [%1]!     \n"  // load 8 more RGBA pixels.
203741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
203841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld4.8     {d9, d11, d13, d15}, [%1]!     \n"  // load last 8 RGBA pixels.
203941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpadal.u8  q0, q5                         \n"  // B 16 bytes -> 8 shorts.
204041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpadal.u8  q1, q6                         \n"  // G 16 bytes -> 8 shorts.
204141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpadal.u8  q2, q7                         \n"  // R 16 bytes -> 8 shorts.
204241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
204341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vrshr.u16  q0, q0, #1                     \n"  // 2x average
204441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vrshr.u16  q1, q1, #1                     \n"
204541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vrshr.u16  q2, q2, #1                     \n"
204641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
204741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %4, %4, #16                    \n"  // 32 processed per loop.
204841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    RGBTOUV(q0, q1, q2)
204941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(2)
205041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
205141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(3)
205241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
205341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
205441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_rgba),  // %0
205541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(src_stride_rgba),  // %1
205641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_u),     // %2
205741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_v),     // %3
205841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(pix)        // %4
205941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
206041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
206141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
206241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
206341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
206441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_RGBATOUVROW_NEON
206541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
206641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_RGB24TOUVROW_NEON
206741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24,
206841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                       uint8* dst_u, uint8* dst_v, int pix) {
206941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
207041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "add        %1, %0, %1                     \n"  // src_stride + src_rgb24
207141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
207241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
207341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
207441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
207541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
207641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q15, #0x8080                   \n"  // 128.5
207741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
207841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
207941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
208041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld3.8     {d0, d2, d4}, [%0]!            \n"  // load 8 RGB24 pixels.
208141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
208241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld3.8     {d1, d3, d5}, [%0]!            \n"  // load next 8 RGB24 pixels.
208341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  q0, q0                         \n"  // B 16 bytes -> 8 shorts.
208441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  q1, q1                         \n"  // G 16 bytes -> 8 shorts.
208541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  q2, q2                         \n"  // R 16 bytes -> 8 shorts.
208641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
208741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld3.8     {d8, d10, d12}, [%1]!          \n"  // load 8 more RGB24 pixels.
208841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
208941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld3.8     {d9, d11, d13}, [%1]!          \n"  // load last 8 RGB24 pixels.
209041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpadal.u8  q0, q4                         \n"  // B 16 bytes -> 8 shorts.
209141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpadal.u8  q1, q5                         \n"  // G 16 bytes -> 8 shorts.
209241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpadal.u8  q2, q6                         \n"  // R 16 bytes -> 8 shorts.
209341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
209441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vrshr.u16  q0, q0, #1                     \n"  // 2x average
209541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vrshr.u16  q1, q1, #1                     \n"
209641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vrshr.u16  q2, q2, #1                     \n"
209741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
209841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %4, %4, #16                    \n"  // 32 processed per loop.
209941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    RGBTOUV(q0, q1, q2)
210041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(2)
210141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
210241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(3)
210341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
210441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
210541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_rgb24),  // %0
210641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(src_stride_rgb24),  // %1
210741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_u),     // %2
210841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_v),     // %3
210941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(pix)        // %4
211041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
211141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
211241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
211341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
211441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
211541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_RGB24TOUVROW_NEON
211641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
211741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_RAWTOUVROW_NEON
211841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw,
211941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                     uint8* dst_u, uint8* dst_v, int pix) {
212041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
212141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "add        %1, %0, %1                     \n"  // src_stride + src_raw
212241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
212341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
212441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
212541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
212641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
212741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q15, #0x8080                   \n"  // 128.5
212841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
212941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
213041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
213141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld3.8     {d0, d2, d4}, [%0]!            \n"  // load 8 RAW pixels.
213241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
213341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld3.8     {d1, d3, d5}, [%0]!            \n"  // load next 8 RAW pixels.
213441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  q2, q2                         \n"  // B 16 bytes -> 8 shorts.
213541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  q1, q1                         \n"  // G 16 bytes -> 8 shorts.
213641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  q0, q0                         \n"  // R 16 bytes -> 8 shorts.
213741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
213841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld3.8     {d8, d10, d12}, [%1]!          \n"  // load 8 more RAW pixels.
213941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
214041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld3.8     {d9, d11, d13}, [%1]!          \n"  // load last 8 RAW pixels.
214141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpadal.u8  q2, q6                         \n"  // B 16 bytes -> 8 shorts.
214241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpadal.u8  q1, q5                         \n"  // G 16 bytes -> 8 shorts.
214341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpadal.u8  q0, q4                         \n"  // R 16 bytes -> 8 shorts.
214441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
214541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vrshr.u16  q0, q0, #1                     \n"  // 2x average
214641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vrshr.u16  q1, q1, #1                     \n"
214741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vrshr.u16  q2, q2, #1                     \n"
214841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
214941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %4, %4, #16                    \n"  // 32 processed per loop.
215041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    RGBTOUV(q2, q1, q0)
215141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(2)
215241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
215341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(3)
215441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
215541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
215641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_raw),  // %0
215741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(src_stride_raw),  // %1
215841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_u),     // %2
215941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_v),     // %3
216041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(pix)        // %4
216141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
216241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
216341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
216441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
216541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
216641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_RAWTOUVROW_NEON
216741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
216841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// 16x2 pixels -> 8x1.  pix is number of argb pixels. e.g. 16.
216941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_RGB565TOUVROW_NEON
217041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565,
217141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                        uint8* dst_u, uint8* dst_v, int pix) {
217241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
217341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "add        %1, %0, %1                     \n"  // src_stride + src_argb
217441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
217541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
217641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
217741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
217841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
217941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q15, #0x8080                   \n"  // 128.5
218041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
218141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
218241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
218341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {q0}, [%0]!                    \n"  // load 8 RGB565 pixels.
218441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    RGB565TOARGB
218541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  d8, d0                         \n"  // B 8 bytes -> 4 shorts.
218641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  d10, d1                        \n"  // G 8 bytes -> 4 shorts.
218741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  d12, d2                        \n"  // R 8 bytes -> 4 shorts.
218841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
218941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {q0}, [%0]!                    \n"  // next 8 RGB565 pixels.
219041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    RGB565TOARGB
219141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  d9, d0                         \n"  // B 8 bytes -> 4 shorts.
219241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  d11, d1                        \n"  // G 8 bytes -> 4 shorts.
219341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  d13, d2                        \n"  // R 8 bytes -> 4 shorts.
219441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
219541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
219641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {q0}, [%1]!                    \n"  // load 8 RGB565 pixels.
219741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    RGB565TOARGB
219841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpadal.u8  d8, d0                         \n"  // B 8 bytes -> 4 shorts.
219941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpadal.u8  d10, d1                        \n"  // G 8 bytes -> 4 shorts.
220041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpadal.u8  d12, d2                        \n"  // R 8 bytes -> 4 shorts.
220141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
220241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {q0}, [%1]!                    \n"  // next 8 RGB565 pixels.
220341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    RGB565TOARGB
220441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpadal.u8  d9, d0                         \n"  // B 8 bytes -> 4 shorts.
220541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpadal.u8  d11, d1                        \n"  // G 8 bytes -> 4 shorts.
220641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpadal.u8  d13, d2                        \n"  // R 8 bytes -> 4 shorts.
220741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
220841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vrshr.u16  q4, q4, #1                     \n"  // 2x average
220941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vrshr.u16  q5, q5, #1                     \n"
221041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vrshr.u16  q6, q6, #1                     \n"
221141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
221241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %4, %4, #16                    \n"  // 16 processed per loop.
221341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmul.s16   q8, q4, q10                    \n"  // B
221441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmls.s16   q8, q5, q11                    \n"  // G
221541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmls.s16   q8, q6, q12                    \n"  // R
221641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vadd.u16   q8, q8, q15                    \n"  // +128 -> unsigned
221741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmul.s16   q9, q6, q10                    \n"  // R
221841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmls.s16   q9, q5, q14                    \n"  // G
221941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmls.s16   q9, q4, q13                    \n"  // B
222041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vadd.u16   q9, q9, q15                    \n"  // +128 -> unsigned
222141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqshrn.u16  d0, q8, #8                    \n"  // 16 bit to 8 bit U
222241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqshrn.u16  d1, q9, #8                    \n"  // 16 bit to 8 bit V
222341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(2)
222441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
222541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(3)
222641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
222741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
222841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_rgb565),  // %0
222941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(src_stride_rgb565),  // %1
223041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_u),     // %2
223141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_v),     // %3
223241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(pix)        // %4
223341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
223441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
223541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
223641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
223741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
223841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_RGB565TOUVROW_NEON
223941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
224041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// 16x2 pixels -> 8x1.  pix is number of argb pixels. e.g. 16.
224141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGB1555TOUVROW_NEON
224241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555,
224341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                        uint8* dst_u, uint8* dst_v, int pix) {
224441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
224541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "add        %1, %0, %1                     \n"  // src_stride + src_argb
224641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
224741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
224841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
224941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
225041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
225141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q15, #0x8080                   \n"  // 128.5
225241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
225341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
225441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
225541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {q0}, [%0]!                    \n"  // load 8 ARGB1555 pixels.
225641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    RGB555TOARGB
225741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  d8, d0                         \n"  // B 8 bytes -> 4 shorts.
225841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  d10, d1                        \n"  // G 8 bytes -> 4 shorts.
225941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  d12, d2                        \n"  // R 8 bytes -> 4 shorts.
226041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
226141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {q0}, [%0]!                    \n"  // next 8 ARGB1555 pixels.
226241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    RGB555TOARGB
226341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  d9, d0                         \n"  // B 8 bytes -> 4 shorts.
226441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  d11, d1                        \n"  // G 8 bytes -> 4 shorts.
226541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  d13, d2                        \n"  // R 8 bytes -> 4 shorts.
226641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
226741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
226841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {q0}, [%1]!                    \n"  // load 8 ARGB1555 pixels.
226941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    RGB555TOARGB
227041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpadal.u8  d8, d0                         \n"  // B 8 bytes -> 4 shorts.
227141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpadal.u8  d10, d1                        \n"  // G 8 bytes -> 4 shorts.
227241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpadal.u8  d12, d2                        \n"  // R 8 bytes -> 4 shorts.
227341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
227441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {q0}, [%1]!                    \n"  // next 8 ARGB1555 pixels.
227541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    RGB555TOARGB
227641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpadal.u8  d9, d0                         \n"  // B 8 bytes -> 4 shorts.
227741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpadal.u8  d11, d1                        \n"  // G 8 bytes -> 4 shorts.
227841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpadal.u8  d13, d2                        \n"  // R 8 bytes -> 4 shorts.
227941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
228041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vrshr.u16  q4, q4, #1                     \n"  // 2x average
228141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vrshr.u16  q5, q5, #1                     \n"
228241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vrshr.u16  q6, q6, #1                     \n"
228341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
228441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %4, %4, #16                    \n"  // 16 processed per loop.
228541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmul.s16   q8, q4, q10                    \n"  // B
228641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmls.s16   q8, q5, q11                    \n"  // G
228741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmls.s16   q8, q6, q12                    \n"  // R
228841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vadd.u16   q8, q8, q15                    \n"  // +128 -> unsigned
228941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmul.s16   q9, q6, q10                    \n"  // R
229041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmls.s16   q9, q5, q14                    \n"  // G
229141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmls.s16   q9, q4, q13                    \n"  // B
229241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vadd.u16   q9, q9, q15                    \n"  // +128 -> unsigned
229341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqshrn.u16  d0, q8, #8                    \n"  // 16 bit to 8 bit U
229441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqshrn.u16  d1, q9, #8                    \n"  // 16 bit to 8 bit V
229541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(2)
229641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
229741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(3)
229841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
229941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
230041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_argb1555),  // %0
230141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(src_stride_argb1555),  // %1
230241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_u),     // %2
230341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_v),     // %3
230441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(pix)        // %4
230541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
230641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
230741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
230841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
230941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
231041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_ARGB1555TOUVROW_NEON
231141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
231241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// 16x2 pixels -> 8x1.  pix is number of argb pixels. e.g. 16.
231341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGB4444TOUVROW_NEON
231441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444,
231541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                          uint8* dst_u, uint8* dst_v, int pix) {
231641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
231741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "add        %1, %0, %1                     \n"  // src_stride + src_argb
231841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q10, #112 / 2                  \n"  // UB / VR 0.875 coefficient
231941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q11, #74 / 2                   \n"  // UG -0.5781 coefficient
232041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q12, #38 / 2                   \n"  // UR -0.2969 coefficient
232141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q13, #18 / 2                   \n"  // VB -0.1406 coefficient
232241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.s16   q14, #94 / 2                   \n"  // VG -0.7344 coefficient
232341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u16   q15, #0x8080                   \n"  // 128.5
232441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
232541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
232641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
232741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {q0}, [%0]!                    \n"  // load 8 ARGB4444 pixels.
232841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ARGB4444TOARGB
232941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  d8, d0                         \n"  // B 8 bytes -> 4 shorts.
233041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  d10, d1                        \n"  // G 8 bytes -> 4 shorts.
233141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  d12, d2                        \n"  // R 8 bytes -> 4 shorts.
233241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
233341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {q0}, [%0]!                    \n"  // next 8 ARGB4444 pixels.
233441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ARGB4444TOARGB
233541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  d9, d0                         \n"  // B 8 bytes -> 4 shorts.
233641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  d11, d1                        \n"  // G 8 bytes -> 4 shorts.
233741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpaddl.u8  d13, d2                        \n"  // R 8 bytes -> 4 shorts.
233841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
233941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
234041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {q0}, [%1]!                    \n"  // load 8 ARGB4444 pixels.
234141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ARGB4444TOARGB
234241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpadal.u8  d8, d0                         \n"  // B 8 bytes -> 4 shorts.
234341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpadal.u8  d10, d1                        \n"  // G 8 bytes -> 4 shorts.
234441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpadal.u8  d12, d2                        \n"  // R 8 bytes -> 4 shorts.
234541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
234641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {q0}, [%1]!                    \n"  // next 8 ARGB4444 pixels.
234741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ARGB4444TOARGB
234841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpadal.u8  d9, d0                         \n"  // B 8 bytes -> 4 shorts.
234941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpadal.u8  d11, d1                        \n"  // G 8 bytes -> 4 shorts.
235041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vpadal.u8  d13, d2                        \n"  // R 8 bytes -> 4 shorts.
235141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
235241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vrshr.u16  q4, q4, #1                     \n"  // 2x average
235341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vrshr.u16  q5, q5, #1                     \n"
235441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vrshr.u16  q6, q6, #1                     \n"
235541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
235641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %4, %4, #16                    \n"  // 16 processed per loop.
235741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmul.s16   q8, q4, q10                    \n"  // B
235841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmls.s16   q8, q5, q11                    \n"  // G
235941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmls.s16   q8, q6, q12                    \n"  // R
236041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vadd.u16   q8, q8, q15                    \n"  // +128 -> unsigned
236141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmul.s16   q9, q6, q10                    \n"  // R
236241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmls.s16   q9, q5, q14                    \n"  // G
236341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmls.s16   q9, q4, q13                    \n"  // B
236441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vadd.u16   q9, q9, q15                    \n"  // +128 -> unsigned
236541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqshrn.u16  d0, q8, #8                    \n"  // 16 bit to 8 bit U
236641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqshrn.u16  d1, q9, #8                    \n"  // 16 bit to 8 bit V
236741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(2)
236841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {d0}, [%2]!                    \n"  // store 8 pixels U.
236941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(3)
237041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {d1}, [%3]!                    \n"  // store 8 pixels V.
237141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
237241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_argb4444),  // %0
237341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(src_stride_argb4444),  // %1
237441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_u),     // %2
237541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_v),     // %3
237641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(pix)        // %4
237741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
237841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
237941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
238041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
238141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
238241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_ARGB4444TOUVROW_NEON
238341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
238441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_RGB565TOYROW_NEON
238541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int pix) {
238641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
238741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d24, #13                       \n"  // B * 0.1016 coefficient
238841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d25, #65                       \n"  // G * 0.5078 coefficient
238941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d26, #33                       \n"  // R * 0.2578 coefficient
239041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d27, #16                       \n"  // Add 16 constant
239141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
239241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
239341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
239441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {q0}, [%0]!                    \n"  // load 8 RGB565 pixels.
239541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
239641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    RGB565TOARGB
239741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmull.u8   q2, d0, d24                    \n"  // B
239841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmlal.u8   q2, d1, d25                    \n"  // G
239941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmlal.u8   q2, d2, d26                    \n"  // R
240041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqrshrun.s16 d0, q2, #7                   \n"  // 16 bit to 8 bit Y
240141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqadd.u8   d0, d27                        \n"
240241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
240341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
240441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
240541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_rgb565),  // %0
240641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_y),       // %1
240741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(pix)          // %2
240841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
240941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13"
241041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
241141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
241241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_RGB565TOYROW_NEON
241341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
241441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGB1555TOYROW_NEON
241541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int pix) {
241641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
241741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d24, #13                       \n"  // B * 0.1016 coefficient
241841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d25, #65                       \n"  // G * 0.5078 coefficient
241941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d26, #33                       \n"  // R * 0.2578 coefficient
242041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d27, #16                       \n"  // Add 16 constant
242141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
242241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
242341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
242441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {q0}, [%0]!                    \n"  // load 8 ARGB1555 pixels.
242541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
242641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ARGB1555TOARGB
242741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmull.u8   q2, d0, d24                    \n"  // B
242841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmlal.u8   q2, d1, d25                    \n"  // G
242941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmlal.u8   q2, d2, d26                    \n"  // R
243041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqrshrun.s16 d0, q2, #7                   \n"  // 16 bit to 8 bit Y
243141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqadd.u8   d0, d27                        \n"
243241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
243341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
243441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
243541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_argb1555),  // %0
243641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_y),         // %1
243741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(pix)            // %2
243841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
243941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13"
244041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
244141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
244241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_ARGB1555TOYROW_NEON
244341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
244441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGB4444TOYROW_NEON
244541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int pix) {
244641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
244741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d24, #13                       \n"  // B * 0.1016 coefficient
244841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d25, #65                       \n"  // G * 0.5078 coefficient
244941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d26, #33                       \n"  // R * 0.2578 coefficient
245041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d27, #16                       \n"  // Add 16 constant
245141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
245241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
245341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
245441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {q0}, [%0]!                    \n"  // load 8 ARGB4444 pixels.
245541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
245641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ARGB4444TOARGB
245741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmull.u8   q2, d0, d24                    \n"  // B
245841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmlal.u8   q2, d1, d25                    \n"  // G
245941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmlal.u8   q2, d2, d26                    \n"  // R
246041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqrshrun.s16 d0, q2, #7                   \n"  // 16 bit to 8 bit Y
246141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqadd.u8   d0, d27                        \n"
246241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
246341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
246441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
246541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_argb4444),  // %0
246641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_y),         // %1
246741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(pix)            // %2
246841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
246941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13"
247041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
247141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
247241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_ARGB4444TOYROW_NEON
247341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
247441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_BGRATOYROW_NEON
247541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int pix) {
247641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
247741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d4, #33                        \n"  // R * 0.2578 coefficient
247841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d5, #65                        \n"  // G * 0.5078 coefficient
247941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d6, #13                        \n"  // B * 0.1016 coefficient
248041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d7, #16                        \n"  // Add 16 constant
248141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
248241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
248341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
248441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 pixels of BGRA.
248541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
248641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmull.u8   q8, d1, d4                     \n"  // R
248741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmlal.u8   q8, d2, d5                     \n"  // G
248841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmlal.u8   q8, d3, d6                     \n"  // B
248941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqrshrun.s16 d0, q8, #7                   \n"  // 16 bit to 8 bit Y
249041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqadd.u8   d0, d7                         \n"
249141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
249241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
249341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
249441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_bgra),  // %0
249541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_y),     // %1
249641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(pix)        // %2
249741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
249841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
249941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
250041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
250141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_BGRATOYROW_NEON
250241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
250341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ABGRTOYROW_NEON
250441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int pix) {
250541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
250641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d4, #33                        \n"  // R * 0.2578 coefficient
250741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d5, #65                        \n"  // G * 0.5078 coefficient
250841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d6, #13                        \n"  // B * 0.1016 coefficient
250941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d7, #16                        \n"  // Add 16 constant
251041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
251141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
251241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
251341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 pixels of ABGR.
251441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
251541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmull.u8   q8, d0, d4                     \n"  // R
251641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmlal.u8   q8, d1, d5                     \n"  // G
251741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmlal.u8   q8, d2, d6                     \n"  // B
251841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqrshrun.s16 d0, q8, #7                   \n"  // 16 bit to 8 bit Y
251941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqadd.u8   d0, d7                         \n"
252041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
252141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
252241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
252341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_abgr),  // %0
252441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_y),  // %1
252541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(pix)        // %2
252641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
252741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
252841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
252941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
253041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_ABGRTOYROW_NEON
253141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
253241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_RGBATOYROW_NEON
253341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int pix) {
253441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
253541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d4, #13                        \n"  // B * 0.1016 coefficient
253641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d5, #65                        \n"  // G * 0.5078 coefficient
253741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d6, #33                        \n"  // R * 0.2578 coefficient
253841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d7, #16                        \n"  // Add 16 constant
253941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
254041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
254141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
254241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 pixels of RGBA.
254341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
254441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmull.u8   q8, d1, d4                     \n"  // B
254541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmlal.u8   q8, d2, d5                     \n"  // G
254641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmlal.u8   q8, d3, d6                     \n"  // R
254741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqrshrun.s16 d0, q8, #7                   \n"  // 16 bit to 8 bit Y
254841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqadd.u8   d0, d7                         \n"
254941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
255041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
255141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
255241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_rgba),  // %0
255341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_y),  // %1
255441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(pix)        // %2
255541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
255641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
255741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
255841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
255941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_RGBATOYROW_NEON
256041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
256141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_RGB24TOYROW_NEON
256241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int pix) {
256341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
256441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d4, #13                        \n"  // B * 0.1016 coefficient
256541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d5, #65                        \n"  // G * 0.5078 coefficient
256641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d6, #33                        \n"  // R * 0.2578 coefficient
256741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d7, #16                        \n"  // Add 16 constant
256841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
256941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
257041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
257141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld3.8     {d0, d1, d2}, [%0]!            \n"  // load 8 pixels of RGB24.
257241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
257341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmull.u8   q8, d0, d4                     \n"  // B
257441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmlal.u8   q8, d1, d5                     \n"  // G
257541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmlal.u8   q8, d2, d6                     \n"  // R
257641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqrshrun.s16 d0, q8, #7                   \n"  // 16 bit to 8 bit Y
257741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqadd.u8   d0, d7                         \n"
257841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
257941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
258041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
258141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_rgb24),  // %0
258241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_y),  // %1
258341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(pix)        // %2
258441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
258541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
258641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
258741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
258841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_RGB24TOYROW_NEON
258941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
259041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_RAWTOYROW_NEON
259141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int pix) {
259241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
259341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d4, #33                        \n"  // R * 0.2578 coefficient
259441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d5, #65                        \n"  // G * 0.5078 coefficient
259541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d6, #13                        \n"  // B * 0.1016 coefficient
259641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d7, #16                        \n"  // Add 16 constant
259741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
259841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
259941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
260041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld3.8     {d0, d1, d2}, [%0]!            \n"  // load 8 pixels of RAW.
260141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
260241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmull.u8   q8, d0, d4                     \n"  // B
260341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmlal.u8   q8, d1, d5                     \n"  // G
260441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmlal.u8   q8, d2, d6                     \n"  // R
260541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqrshrun.s16 d0, q8, #7                   \n"  // 16 bit to 8 bit Y
260641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqadd.u8   d0, d7                         \n"
260741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
260841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {d0}, [%1]!                    \n"  // store 8 pixels Y.
260941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
261041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_raw),  // %0
261141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_y),  // %1
261241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(pix)        // %2
261341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
261441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
261541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
261641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
261741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_RAWTOYROW_NEON
261841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
261941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Bilinear filter 16x2 -> 16x1
262041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_INTERPOLATEROW_NEON
262141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid InterpolateRow_NEON(uint8* dst_ptr,
262241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                         const uint8* src_ptr, ptrdiff_t src_stride,
262341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                         int dst_width, int source_y_fraction) {
262441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
262541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "cmp        %4, #0                         \n"
262641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "beq        100f                           \n"
262741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "add        %2, %1                         \n"
262841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "cmp        %4, #64                        \n"
262941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "beq        75f                            \n"
263041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "cmp        %4, #128                       \n"
263141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "beq        50f                            \n"
263241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "cmp        %4, #192                       \n"
263341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "beq        25f                            \n"
263441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
263541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vdup.8     d5, %4                         \n"
263641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "rsb        %4, #256                       \n"
263741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vdup.8     d4, %4                         \n"
263841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    // General purpose row blend.
263941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
264041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
264141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {q0}, [%1]!                    \n"
264241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(2)
264341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {q1}, [%2]!                    \n"
264441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %3, %3, #16                    \n"
264541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmull.u8   q13, d0, d4                    \n"
264641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmull.u8   q14, d1, d4                    \n"
264741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmlal.u8   q13, d2, d5                    \n"
264841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmlal.u8   q14, d3, d5                    \n"
264941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vrshrn.u16 d0, q13, #8                    \n"
265041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vrshrn.u16 d1, q14, #8                    \n"
265141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
265241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {q0}, [%0]!                    \n"
265341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
265441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "b          99f                            \n"
265541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
265641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    // Blend 25 / 75.
265741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "25:                                         \n"
265841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
265941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {q0}, [%1]!                    \n"
266041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(2)
266141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {q1}, [%2]!                    \n"
266241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %3, %3, #16                    \n"
266341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vrhadd.u8  q0, q1                         \n"
266441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vrhadd.u8  q0, q1                         \n"
266541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
266641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {q0}, [%0]!                    \n"
266741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        25b                            \n"
266841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "b          99f                            \n"
266941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
267041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    // Blend 50 / 50.
267141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "50:                                         \n"
267241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
267341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {q0}, [%1]!                    \n"
267441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(2)
267541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {q1}, [%2]!                    \n"
267641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %3, %3, #16                    \n"
267741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vrhadd.u8  q0, q1                         \n"
267841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
267941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {q0}, [%0]!                    \n"
268041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        50b                            \n"
268141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "b          99f                            \n"
268241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
268341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    // Blend 75 / 25.
268441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "75:                                         \n"
268541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
268641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {q1}, [%1]!                    \n"
268741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(2)
268841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {q0}, [%2]!                    \n"
268941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %3, %3, #16                    \n"
269041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vrhadd.u8  q0, q1                         \n"
269141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vrhadd.u8  q0, q1                         \n"
269241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
269341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {q0}, [%0]!                    \n"
269441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        75b                            \n"
269541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "b          99f                            \n"
269641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
269741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    // Blend 100 / 0 - Copy row unchanged.
269841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "100:                                        \n"
269941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
270041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {q0}, [%1]!                    \n"
270141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %3, %3, #16                    \n"
270241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
270341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst1.8     {q0}, [%0]!                    \n"
270441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        100b                           \n"
270541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
270641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "99:                                         \n"
270741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(dst_ptr),          // %0
270841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(src_ptr),          // %1
270941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(src_stride),       // %2
271041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_width),        // %3
271141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(source_y_fraction) // %4
271241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
271341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "cc", "memory", "q0", "q1", "d4", "d5", "q13", "q14"
271441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
271541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
271641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_INTERPOLATEROW_NEON
271741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
271841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr
271941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBBLENDROW_NEON
272041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBBlendRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
272141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                       uint8* dst_argb, int width) {
272241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
272341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %3, #8                         \n"
272441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "blt        89f                            \n"
272541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    // Blend 8 pixels.
272641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "8:                                          \n"
272741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
272841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 pixels of ARGB0.
272941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
273041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld4.8     {d4, d5, d6, d7}, [%1]!        \n"  // load 8 pixels of ARGB1.
273141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
273241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmull.u8   q10, d4, d3                    \n"  // db * a
273341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmull.u8   q11, d5, d3                    \n"  // dg * a
273441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmull.u8   q12, d6, d3                    \n"  // dr * a
273541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqrshrn.u16 d20, q10, #8                  \n"  // db >>= 8
273641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqrshrn.u16 d21, q11, #8                  \n"  // dg >>= 8
273741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqrshrn.u16 d22, q12, #8                  \n"  // dr >>= 8
273841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqsub.u8   q2, q2, q10                    \n"  // dbg - dbg * a / 256
273941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqsub.u8   d6, d6, d22                    \n"  // dr - dr * a / 256
274041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqadd.u8   q0, q0, q2                     \n"  // + sbg
274141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqadd.u8   d2, d2, d6                     \n"  // + sr
274241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d3, #255                       \n"  // a = 255
274341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(2)
274441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst4.8     {d0, d1, d2, d3}, [%2]!        \n"  // store 8 pixels of ARGB.
274541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bge        8b                             \n"
274641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
274741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "89:                                         \n"
274841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "adds       %3, #8-1                       \n"
274941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "blt        99f                            \n"
275041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
275141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    // Blend 1 pixels.
275241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
275341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
275441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld4.8     {d0[0],d1[0],d2[0],d3[0]}, [%0]! \n"  // load 1 pixel ARGB0.
275541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
275641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld4.8     {d4[0],d5[0],d6[0],d7[0]}, [%1]! \n"  // load 1 pixel ARGB1.
275741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %3, %3, #1                     \n"  // 1 processed per loop.
275841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmull.u8   q10, d4, d3                    \n"  // db * a
275941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmull.u8   q11, d5, d3                    \n"  // dg * a
276041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmull.u8   q12, d6, d3                    \n"  // dr * a
276141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqrshrn.u16 d20, q10, #8                  \n"  // db >>= 8
276241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqrshrn.u16 d21, q11, #8                  \n"  // dg >>= 8
276341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqrshrn.u16 d22, q12, #8                  \n"  // dr >>= 8
276441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqsub.u8   q2, q2, q10                    \n"  // dbg - dbg * a / 256
276541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqsub.u8   d6, d6, d22                    \n"  // dr - dr * a / 256
276641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqadd.u8   q0, q0, q2                     \n"  // + sbg
276741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqadd.u8   d2, d2, d6                     \n"  // + sr
276841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d3, #255                       \n"  // a = 255
276941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(2)
277041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst4.8     {d0[0],d1[0],d2[0],d3[0]}, [%2]! \n"  // store 1 pixel.
277141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bge        1b                             \n"
277241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
277341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "99:                                         \n"
277441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
277541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_argb0),    // %0
277641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(src_argb1),    // %1
277741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_argb),     // %2
277841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(width)         // %3
277941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
278041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "cc", "memory", "q0", "q1", "q2", "q3", "q10", "q11", "q12"
278141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
278241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
278341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_ARGBBLENDROW_NEON
278441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
278541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Attenuate 8 pixels at a time.
278641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBATTENUATEROW_NEON
278741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) {
278841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
278941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    // Attenuate 8 pixels.
279041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
279141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
279241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 pixels of ARGB.
279341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
279441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmull.u8   q10, d0, d3                    \n"  // b * a
279541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmull.u8   q11, d1, d3                    \n"  // g * a
279641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmull.u8   q12, d2, d3                    \n"  // r * a
279741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqrshrn.u16 d0, q10, #8                   \n"  // b >>= 8
279841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqrshrn.u16 d1, q11, #8                   \n"  // g >>= 8
279941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqrshrn.u16 d2, q12, #8                   \n"  // r >>= 8
280041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
280141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst4.8     {d0, d1, d2, d3}, [%1]!        \n"  // store 8 pixels of ARGB.
280241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
280341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_argb),   // %0
280441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_argb),   // %1
280541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(width)       // %2
280641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
280741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "cc", "memory", "q0", "q1", "q10", "q11", "q12"
280841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
280941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
281041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_ARGBATTENUATEROW_NEON
281141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
281241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Quantize 8 ARGB pixels (32 bytes).
281341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// dst = (dst * scale >> 16) * interval_size + interval_offset;
281441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBQUANTIZEROW_NEON
281541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBQuantizeRow_NEON(uint8* dst_argb, int scale, int interval_size,
281641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                          int interval_offset, int width) {
281741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
281841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vdup.u16   q8, %2                         \n"
281941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vshr.u16   q8, q8, #1                     \n"  // scale >>= 1
282041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vdup.u16   q9, %3                         \n"  // interval multiply.
282141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vdup.u16   q10, %4                        \n"  // interval add
282241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
282341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    // 8 pixel loop.
282441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
282541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
282641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
282741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld4.8     {d0, d2, d4, d6}, [%0]         \n"  // load 8 pixels of ARGB.
282841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %1, %1, #8                     \n"  // 8 processed per loop.
282941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmovl.u8   q0, d0                         \n"  // b (0 .. 255)
283041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmovl.u8   q1, d2                         \n"
283141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmovl.u8   q2, d4                         \n"
283241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqdmulh.s16 q0, q0, q8                    \n"  // b * scale
283341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqdmulh.s16 q1, q1, q8                    \n"  // g
283441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqdmulh.s16 q2, q2, q8                    \n"  // r
283541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmul.u16   q0, q0, q9                     \n"  // b * interval_size
283641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmul.u16   q1, q1, q9                     \n"  // g
283741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmul.u16   q2, q2, q9                     \n"  // r
283841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vadd.u16   q0, q0, q10                    \n"  // b + interval_offset
283941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vadd.u16   q1, q1, q10                    \n"  // g
284041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vadd.u16   q2, q2, q10                    \n"  // r
284141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqmovn.u16 d0, q0                         \n"
284241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqmovn.u16 d2, q1                         \n"
284341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqmovn.u16 d4, q2                         \n"
284441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
284541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst4.8     {d0, d2, d4, d6}, [%0]!        \n"  // store 8 pixels of ARGB.
284641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
284741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(dst_argb),       // %0
284841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(width)           // %1
284941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "r"(scale),           // %2
285041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "r"(interval_size),   // %3
285141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "r"(interval_offset)  // %4
285241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9", "q10"
285341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
285441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
285541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_ARGBQUANTIZEROW_NEON
285641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
285741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Shade 8 pixels at a time by specified value.
285841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// NOTE vqrdmulh.s16 q10, q10, d0[0] must use a scaler register from 0 to 8.
285941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Rounding in vqrdmulh does +1 to high if high bit of low s16 is set.
286041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBSHADEROW_NEON
286141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBShadeRow_NEON(const uint8* src_argb, uint8* dst_argb, int width,
286241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                       uint32 value) {
286341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
286441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vdup.u32   q0, %3                         \n"  // duplicate scale value.
286541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vzip.u8    d0, d1                         \n"  // d0 aarrggbb.
286641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vshr.u16   q0, q0, #1                     \n"  // scale / 2.
286741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
286841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    // 8 pixel loop.
286941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
287041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
287141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
287241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld4.8     {d20, d22, d24, d26}, [%0]!    \n"  // load 8 pixels of ARGB.
287341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
287441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmovl.u8   q10, d20                       \n"  // b (0 .. 255)
287541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmovl.u8   q11, d22                       \n"
287641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmovl.u8   q12, d24                       \n"
287741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmovl.u8   q13, d26                       \n"
287841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqrdmulh.s16 q10, q10, d0[0]              \n"  // b * scale * 2
287941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqrdmulh.s16 q11, q11, d0[1]              \n"  // g
288041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqrdmulh.s16 q12, q12, d0[2]              \n"  // r
288141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqrdmulh.s16 q13, q13, d0[3]              \n"  // a
288241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqmovn.u16 d20, q10                       \n"
288341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqmovn.u16 d22, q11                       \n"
288441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqmovn.u16 d24, q12                       \n"
288541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqmovn.u16 d26, q13                       \n"
288641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
288741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst4.8     {d20, d22, d24, d26}, [%1]!    \n"  // store 8 pixels of ARGB.
288841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
288941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_argb),       // %0
289041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_argb),       // %1
289141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(width)           // %2
289241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "r"(value)            // %3
289341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "cc", "memory", "q0", "q10", "q11", "q12", "q13"
289441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
289541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
289641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_ARGBSHADEROW_NEON
289741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
289841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels
289941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Similar to ARGBToYJ but stores ARGB.
290041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// C code is (15 * b + 75 * g + 38 * r + 64) >> 7;
290141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBGRAYROW_NEON
290241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) {
290341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
290441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d24, #15                       \n"  // B * 0.11400 coefficient
290541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d25, #75                       \n"  // G * 0.58700 coefficient
290641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d26, #38                       \n"  // R * 0.29900 coefficient
290741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
290841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
290941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
291041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n"  // load 8 ARGB pixels.
291141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
291241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmull.u8   q2, d0, d24                    \n"  // B
291341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmlal.u8   q2, d1, d25                    \n"  // G
291441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmlal.u8   q2, d2, d26                    \n"  // R
291541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqrshrun.s16 d0, q2, #7                   \n"  // 15 bit to 8 bit B
291641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov       d1, d0                         \n"  // G
291741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov       d2, d0                         \n"  // R
291841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
291941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst4.8     {d0, d1, d2, d3}, [%1]!        \n"  // store 8 ARGB pixels.
292041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
292141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_argb),  // %0
292241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_argb),  // %1
292341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(width)      // %2
292441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
292541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "cc", "memory", "q0", "q1", "q2", "q12", "q13"
292641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
292741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
292841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_ARGBGRAYROW_NEON
292941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
293041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels.
293141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org//    b = (r * 35 + g * 68 + b * 17) >> 7
293241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org//    g = (r * 45 + g * 88 + b * 22) >> 7
293341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org//    r = (r * 50 + g * 98 + b * 24) >> 7
293441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
293541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBSEPIAROW_NEON
293641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBSepiaRow_NEON(uint8* dst_argb, int width) {
293741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
293841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d20, #17                       \n"  // BB coefficient
293941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d21, #68                       \n"  // BG coefficient
294041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d22, #35                       \n"  // BR coefficient
294141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d24, #22                       \n"  // GB coefficient
294241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d25, #88                       \n"  // GG coefficient
294341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d26, #45                       \n"  // GR coefficient
294441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d28, #24                       \n"  // BB coefficient
294541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d29, #98                       \n"  // BG coefficient
294641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmov.u8    d30, #50                       \n"  // BR coefficient
294741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
294841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
294941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
295041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld4.8     {d0, d1, d2, d3}, [%0]         \n"  // load 8 ARGB pixels.
295141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %1, %1, #8                     \n"  // 8 processed per loop.
295241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmull.u8   q2, d0, d20                    \n"  // B to Sepia B
295341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmlal.u8   q2, d1, d21                    \n"  // G
295441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmlal.u8   q2, d2, d22                    \n"  // R
295541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmull.u8   q3, d0, d24                    \n"  // B to Sepia G
295641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmlal.u8   q3, d1, d25                    \n"  // G
295741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmlal.u8   q3, d2, d26                    \n"  // R
295841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmull.u8   q8, d0, d28                    \n"  // B to Sepia R
295941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmlal.u8   q8, d1, d29                    \n"  // G
296041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmlal.u8   q8, d2, d30                    \n"  // R
296141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqshrn.u16 d0, q2, #7                     \n"  // 16 bit to 8 bit B
296241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqshrn.u16 d1, q3, #7                     \n"  // 16 bit to 8 bit G
296341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqshrn.u16 d2, q8, #7                     \n"  // 16 bit to 8 bit R
296441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
296541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst4.8     {d0, d1, d2, d3}, [%0]!        \n"  // store 8 ARGB pixels.
296641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
296741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(dst_argb),  // %0
296841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(width)      // %1
296941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
297041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "cc", "memory", "q0", "q1", "q2", "q3",
297141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "q10", "q11", "q12", "q13", "q14", "q15"
297241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
297341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
297441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_ARGBSEPIAROW_NEON
297541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
297641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Tranform 8 ARGB pixels (32 bytes) with color matrix.
297741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// TODO(fbarchard): Was same as Sepia except matrix is provided.  This function
297841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// needs to saturate.  Consider doing a non-saturating version.
297941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBCOLORMATRIXROW_NEON
298041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb,
298141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                             const int8* matrix_argb, int width) {
298241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
298341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(3)
298441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld1.8     {q2}, [%3]                     \n"  // load 3 ARGB vectors.
298541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmovl.s8   q0, d4                         \n"  // B,G coefficients s16.
298641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmovl.s8   q1, d5                         \n"  // R,A coefficients s16.
298741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
298841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
298941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
299041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
299141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vld4.8     {d16, d18, d20, d22}, [%0]!    \n"  // load 8 ARGB pixels.
299241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %2, %2, #8                     \n"  // 8 processed per loop.
299341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmovl.u8   q8, d16                        \n"  // b (0 .. 255) 16 bit
299441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmovl.u8   q9, d18                        \n"  // g
299541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmovl.u8   q10, d20                       \n"  // r
299641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmovl.u8   q15, d22                       \n"  // a
299741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmul.s16   q12, q8, d0[0]                 \n"  // B = B * Matrix B
299841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmul.s16   q13, q8, d1[0]                 \n"  // G = B * Matrix G
299941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmul.s16   q14, q8, d2[0]                 \n"  // R = B * Matrix R
300041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmul.s16   q15, q8, d3[0]                 \n"  // A = B * Matrix A
300141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmul.s16   q4, q9, d0[1]                  \n"  // B += G * Matrix B
300241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmul.s16   q5, q9, d1[1]                  \n"  // G += G * Matrix G
300341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmul.s16   q6, q9, d2[1]                  \n"  // R += G * Matrix R
300441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmul.s16   q7, q9, d3[1]                  \n"  // A += G * Matrix A
300541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqadd.s16  q12, q12, q4                   \n"  // Accumulate B
300641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqadd.s16  q13, q13, q5                   \n"  // Accumulate G
300741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqadd.s16  q14, q14, q6                   \n"  // Accumulate R
300841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqadd.s16  q15, q15, q7                   \n"  // Accumulate A
300941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmul.s16   q4, q10, d0[2]                 \n"  // B += R * Matrix B
301041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmul.s16   q5, q10, d1[2]                 \n"  // G += R * Matrix G
301141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmul.s16   q6, q10, d2[2]                 \n"  // R += R * Matrix R
301241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmul.s16   q7, q10, d3[2]                 \n"  // A += R * Matrix A
301341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqadd.s16  q12, q12, q4                   \n"  // Accumulate B
301441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqadd.s16  q13, q13, q5                   \n"  // Accumulate G
301541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqadd.s16  q14, q14, q6                   \n"  // Accumulate R
301641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqadd.s16  q15, q15, q7                   \n"  // Accumulate A
301741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmul.s16   q4, q15, d0[3]                 \n"  // B += A * Matrix B
301841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmul.s16   q5, q15, d1[3]                 \n"  // G += A * Matrix G
301941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmul.s16   q6, q15, d2[3]                 \n"  // R += A * Matrix R
302041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vmul.s16   q7, q15, d3[3]                 \n"  // A += A * Matrix A
302141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqadd.s16  q12, q12, q4                   \n"  // Accumulate B
302241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqadd.s16  q13, q13, q5                   \n"  // Accumulate G
302341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqadd.s16  q14, q14, q6                   \n"  // Accumulate R
302441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqadd.s16  q15, q15, q7                   \n"  // Accumulate A
302541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqshrun.s16 d16, q12, #6                  \n"  // 16 bit to 8 bit B
302641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqshrun.s16 d18, q13, #6                  \n"  // 16 bit to 8 bit G
302741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqshrun.s16 d20, q14, #6                  \n"  // 16 bit to 8 bit R
302841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vqshrun.s16 d22, q15, #6                  \n"  // 16 bit to 8 bit A
302941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
303041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "vst4.8     {d16, d18, d20, d22}, [%1]!    \n"  // store 8 ARGB pixels.
303141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
303241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_argb),   // %0
303341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_argb),   // %1
303441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(width)       // %2
303541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "r"(matrix_argb)  // %3
303641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9",
303741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "q10", "q11", "q12", "q13", "q14", "q15"
303841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
303941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
304041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_ARGBCOLORMATRIXROW_NEON
304141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
304241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// TODO(fbarchard): fix vqshrun in ARGBMultiplyRow_NEON and reenable.
304341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Multiply 2 rows of ARGB pixels together, 8 pixels at a time.
304441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBMULTIPLYROW_NEON
304541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBMultiplyRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
304641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                          uint8* dst_argb, int width) {
304741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
304841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    // 8 pixel loop.
304941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
305041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
305141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
3052d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld4        {v0.8b-v3.8b}, [%0], #32       \n"  // load 8 ARGB pixels.
305341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
3054d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld4        {v4.8b-v7.8b}, [%1], #32       \n"  // load 8 more ARGB pixels.
305541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
3056d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "umull      v0.8h, v0.8b, v4.8b            \n"  // multiply B
3057d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "umull      v1.8h, v1.8b, v5.8b            \n"  // multiply G
3058d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "umull      v2.8h, v2.8b, v6.8b            \n"  // multiply R
3059d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "umull      v3.8h, v3.8b, v7.8b            \n"  // multiply A
3060d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "rshrn      v0.8b, v0.8h, #8               \n"  // 16 bit to 8 bit B
3061d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "rshrn      v1.8b, v1.8h, #8               \n"  // 16 bit to 8 bit G
3062d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "rshrn      v2.8b, v2.8h, #8               \n"  // 16 bit to 8 bit R
3063d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "rshrn      v3.8b, v3.8h, #8               \n"  // 16 bit to 8 bit A
306441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(2)
3065d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "st4        {v0.8b-v3.8b}, [%2], #32       \n"  // store 8 ARGB pixels.
306641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
306741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
306841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_argb0),  // %0
306941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(src_argb1),  // %1
307041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_argb),   // %2
307141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(width)       // %3
307241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
3073d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org  : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"
307441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
307541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
307641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_ARGBMULTIPLYROW_NEON
307741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
307841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Add 2 rows of ARGB pixels together, 8 pixels at a time.
307941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBADDROW_NEON
308041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBAddRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
308141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                     uint8* dst_argb, int width) {
308241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
308341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    // 8 pixel loop.
308441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
308541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
308641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
3087d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld4        {v0.8b-v3.8b}, [%0], #32       \n"  // load 8 ARGB pixels.
308841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
3089d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld4        {v4.8b-v7.8b}, [%1], #32       \n"  // load 8 more ARGB pixels.
309041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
3091d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "uqadd      v0.8b, v0.8b, v4.8b            \n"
3092d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "uqadd      v1.8b, v1.8b, v5.8b            \n"
3093d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "uqadd      v2.8b, v2.8b, v6.8b            \n"
3094d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "uqadd      v3.8b, v3.8b, v7.8b            \n"
309541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(2)
3096d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "st4        {v0.8b-v3.8b}, [%2], #32       \n"  // store 8 ARGB pixels.
309741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
309841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
309941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_argb0),  // %0
310041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(src_argb1),  // %1
310141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_argb),   // %2
310241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(width)       // %3
310341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
3104d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org  : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"
310541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
310641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
310741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_ARGBADDROW_NEON
310841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
310941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Subtract 2 rows of ARGB pixels, 8 pixels at a time.
311041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBSUBTRACTROW_NEON
311141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBSubtractRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
311241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                          uint8* dst_argb, int width) {
311341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
311441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    // 8 pixel loop.
311541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
311641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
311741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
3118d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld4        {v0.8b-v3.8b}, [%0], #32       \n"  // load 8 ARGB pixels.
311941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
3120d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld4        {v4.8b-v7.8b}, [%1], #32       \n"  // load 8 more ARGB pixels.
312141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
3122d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "uqsub      v0.8b, v0.8b, v4.8b            \n"
3123d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "uqsub      v1.8b, v1.8b, v5.8b            \n"
3124d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "uqsub      v2.8b, v2.8b, v6.8b            \n"
3125d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "uqsub      v3.8b, v3.8b, v7.8b            \n"
312641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(2)
3127d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "st4        {v0.8b-v3.8b}, [%2], #32       \n"  // store 8 ARGB pixels.
312841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
312941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
313041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_argb0),  // %0
313141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(src_argb1),  // %1
313241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_argb),   // %2
313341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(width)       // %3
313441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
3135d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org  : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"
313641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
313741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
313841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_ARGBSUBTRACTROW_NEON
313941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
314041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Adds Sobel X and Sobel Y and stores Sobel into ARGB.
314141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// A = 255
314241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// R = Sobel
314341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// G = Sobel
314441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// B = Sobel
314541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_SOBELROW_NEON
314641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
314741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                     uint8* dst_argb, int width) {
314841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
3149d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "movi       v3.8b, #255                    \n"  // alpha
315041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    // 8 pixel loop.
315141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
315241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
315341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
3154d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld1        {v0.8b}, [%0], #8              \n"  // load 8 sobelx.
315541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
3156d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld1        {v1.8b}, [%1], #8              \n"  // load 8 sobely.
315741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
3158d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "uqadd      v0.8b, v0.8b, v1.8b            \n"  // add
3159d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "mov        v1.8b, v0.8b                   \n"
3160d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "mov        v2.8b, v0.8b                   \n"
316141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(2)
3162d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "st4        {v0.8b-v3.8b}, [%2], #32       \n"  // store 8 ARGB pixels.
316341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
316441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_sobelx),  // %0
316541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(src_sobely),  // %1
316641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_argb),    // %2
316741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(width)        // %3
316841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
3169d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org  : "cc", "memory", "v0", "v1", "v2", "v3"
317041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
317141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
317241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_SOBELROW_NEON
317341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
317441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Adds Sobel X and Sobel Y and stores Sobel into plane.
317541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_SOBELTOPLANEROW_NEON
317641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
317741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                          uint8* dst_y, int width) {
317841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
317941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    // 16 pixel loop.
318041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
318141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
318241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
3183d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld1        {v0.16b}, [%0], #16            \n"  // load 16 sobelx.
318441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
3185d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld1        {v1.16b}, [%1], #16            \n"  // load 16 sobely.
318641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %3, %3, #16                    \n"  // 16 processed per loop.
3187d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "uqadd      v0.16b, v0.16b, v1.16b         \n"  // add
318841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(2)
3189d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "st1        {v0.16b}, [%2], #16            \n"  // store 16 pixels.
319041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
319141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_sobelx),  // %0
319241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(src_sobely),  // %1
319341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_y),       // %2
319441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(width)        // %3
319541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
3196d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org  : "cc", "memory", "v0", "v1"
319741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
319841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
319941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_SOBELTOPLANEROW_NEON
320041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
320141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Mixes Sobel X, Sobel Y and Sobel into ARGB.
320241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// A = 255
320341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// R = Sobel X
320441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// G = Sobel
320541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// B = Sobel Y
320641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_SOBELXYROW_NEON
320741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
320841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                     uint8* dst_argb, int width) {
320941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
3210d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "movi       v3.8b, #255                    \n"  // alpha
321141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    // 8 pixel loop.
321241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
321341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
321441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
3215d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld1        {v2.8b}, [%0], #8              \n"  // load 8 sobelx.
321641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
3217d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld1        {v0.8b}, [%1], #8              \n"  // load 8 sobely.
321841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
3219d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "uqadd      v1.8b, v0.8b, v2.8b            \n"  // add
322041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(2)
3221d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "st4        {v0.8b-v3.8b}, [%2], #32       \n"  // store 8 ARGB pixels.
322241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
322341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_sobelx),  // %0
322441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(src_sobely),  // %1
322541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_argb),    // %2
322641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(width)        // %3
322741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  :
3228d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org  : "cc", "memory", "v0", "v1", "v2", "v3"
322941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
323041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
323141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_SOBELXYROW_NEON
323241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
323341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// SobelX as a matrix is
323441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// -1  0  1
323541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// -2  0  2
323641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// -1  0  1
323741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_SOBELXROW_NEON
323841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1,
323941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                    const uint8* src_y2, uint8* dst_sobelx, int width) {
324041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
324141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
324241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
324341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
3244d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld1        {v0.8b}, [%0],%5               \n"  // top
324541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
3246d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld1        {v1.8b}, [%0],%6               \n"
3247d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "usubl      v0.8h, v0.8b, v1.8b            \n"
324841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
3249d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld1        {v2.8b}, [%1],%5               \n"  // center * 2
325041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
3251d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld1        {v3.8b}, [%1],%6               \n"
3252d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "usubl      v1.8h, v2.8b, v3.8b            \n"
3253d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "add        v0.8h, v0.8h, v1.8h            \n"
3254d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "add        v0.8h, v0.8h, v1.8h            \n"
325541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(2)
3256d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld1        {v2.8b}, [%2],%5               \n"  // bottom
325741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(2)
3258d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld1        {v3.8b}, [%2],%6               \n"
325941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %4, %4, #8                     \n"  // 8 pixels
3260d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "usubl      v1.8h, v2.8b, v3.8b            \n"
3261d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "add        v0.8h, v0.8h, v1.8h            \n"
3262d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "abs        v0.8h, v0.8h                   \n"
3263d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "uqxtn      v0.8b, v0.8h                   \n"
326441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(3)
3265d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "st1        {v0.8b}, [%3], #8              \n"  // store 8 sobelx
326641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
326741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_y0),      // %0
326841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(src_y1),      // %1
326941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(src_y2),      // %2
327041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_sobelx),  // %3
327141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(width)        // %4
327241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "r"(2),            // %5
327341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "r"(6)             // %6
3274d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org  : "cc", "memory", "v0", "v1", "v2", "v3"  // Clobber List
327541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
327641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
327741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_SOBELXROW_NEON
327841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
327941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// SobelY as a matrix is
328041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// -1 -2 -1
328141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org//  0  0  0
328241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org//  1  2  1
328341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_SOBELYROW_NEON
328441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1,
328541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org                    uint8* dst_sobely, int width) {
328641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  asm volatile (
328741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    ".p2align   2                              \n"
328841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  "1:                                          \n"
328941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
3290d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld1        {v0.8b}, [%0],%4               \n"  // left
329141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
3292d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld1        {v1.8b}, [%1],%4               \n"
3293d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "usubl      v0.8h, v0.8b, v1.8b            \n"
329441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
3295d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld1        {v2.8b}, [%0],%4               \n"  // center * 2
329641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
3297d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld1        {v3.8b}, [%1],%4               \n"
3298d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "usubl      v1.8h, v2.8b, v3.8b            \n"
3299d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "add        v0.8h, v0.8h, v1.8h            \n"
3300d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "add        v0.8h, v0.8h, v1.8h            \n"
330141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(0)
3302d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld1        {v2.8b}, [%0],%5               \n"  // right
330341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(1)
3304d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "ld1        {v3.8b}, [%1],%5               \n"
330541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "subs       %3, %3, #8                     \n"  // 8 pixels
3306d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "usubl      v1.8h, v2.8b, v3.8b            \n"
3307d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "add        v0.8h, v0.8h, v1.8h            \n"
3308d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "abs        v0.8h, v0.8h                   \n"
3309d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "uqxtn      v0.8b, v0.8h                   \n"
331041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    MEMACCESS(2)
3311d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    "st1        {v0.8b}, [%2], #8              \n"  // store 8 sobely
331241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "bgt        1b                             \n"
331341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "+r"(src_y0),      // %0
331441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(src_y1),      // %1
331541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(dst_sobely),  // %2
331641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "+r"(width)        // %3
331741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  : "r"(1),            // %4
331841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org    "r"(6)             // %5
3319d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org  : "cc", "memory", "v0", "v1", "v2", "v3"  // Clobber List
332041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org  );
332141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}
332241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // HAS_SOBELYROW_NEON
332341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif  // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
332441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org
332541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef __cplusplus
332641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}  // extern "C"
332741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org}  // namespace libyuv
332841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif
3329