164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com/*
264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com *
464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com *  Use of this source code is governed by a BSD-style license
564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com *  that can be found in the LICENSE file in the root of the source
664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com *  tree. An additional intellectual property rights grant can be found
7cde587092fef0dbed2c35602f30b79e7b892e766fbarchard@google.com *  in the file PATENTS. All contributing project authors may
864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com *  be found in the AUTHORS file in the root of the source tree.
964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com */
1064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
1164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com#include "libyuv/row.h"
1264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
1364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com#ifdef __cplusplus
1464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.comnamespace libyuv {
1564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.comextern "C" {
1664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com#endif
1764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
18dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com// This module is for GCC Neon.
1937ad8b650717568e34a5ac807b63cc9f072c96b6fbarchard@google.com#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \
2037ad8b650717568e34a5ac807b63cc9f072c96b6fbarchard@google.com  !defined(__native_client__)
2164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
2283a63e65a6bcbf59f7dc617d0c7275d50625884cfbarchard@google.com// NEON downscalers with interpolation.
2383a63e65a6bcbf59f7dc617d0c7275d50625884cfbarchard@google.com// Provided by Fritz Koenig
2464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
2506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com// Read 32x1 throw away even pixels, and write 16x1.
261f923e3ea6de7afd9380c73f60a2f3e7b0588811fbarchard@google.comvoid ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
2764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com                        uint8* dst, int dst_width) {
2864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com  asm volatile (
29c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
3031d05032f7354baa4e84e0efe3321798c61e73bdfbarchard@google.com  "1:                                          \n"
3164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // load even pixels into q0, odd into q1
322c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld2.8     {q0, q1}, [%0]!                \n"
3364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "subs       %2, %2, #16                    \n"  // 16 processed per loop
342c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {q1}, [%1]!                    \n"  // store odd pixels
3564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "bgt        1b                             \n"
368e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com  : "+r"(src_ptr),          // %0
378e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com    "+r"(dst),              // %1
388e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com    "+r"(dst_width)         // %2
398e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com  :
408e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com  : "q0", "q1"              // Clobber List
4164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com  );
4264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com}
4364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
4406ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com// Read 32x2 average down and write 16x1.
458ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.comvoid ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
4664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com                           uint8* dst, int dst_width) {
4764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com  asm volatile (
4864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // change the stride to row 2 pointer
4964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "add        %1, %0                         \n"
50c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
5131d05032f7354baa4e84e0efe3321798c61e73bdfbarchard@google.com  "1:                                          \n"
522c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {q0, q1}, [%0]!                \n"  // load row 1 and post inc
532c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {q2, q3}, [%1]!                \n"  // load row 2 and post inc
548e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com    "subs       %3, %3, #16                    \n"  // 16 processed per loop
5564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vpaddl.u8  q0, q0                         \n"  // row 1 add adjacent
5664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vpaddl.u8  q1, q1                         \n"
5764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vpadal.u8  q0, q2                         \n"  // row 2 add adjacent + row1
5864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vpadal.u8  q1, q3                         \n"
5964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vrshrn.u16 d0, q0, #2                     \n"  // downshift, round and pack
6064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vrshrn.u16 d1, q1, #2                     \n"
612c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8     {q0}, [%2]!                    \n"
6264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "bgt        1b                             \n"
638e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com  : "+r"(src_ptr),          // %0
648e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com    "+r"(src_stride),       // %1
658e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com    "+r"(dst),              // %2
668e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com    "+r"(dst_width)         // %3
678e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com  :
688e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com  : "q0", "q1", "q2", "q3"     // Clobber List
698e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com  );
7064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com}
7164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
721f923e3ea6de7afd9380c73f60a2f3e7b0588811fbarchard@google.comvoid ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
7364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com                        uint8* dst_ptr, int dst_width) {
7464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com  asm volatile (
75c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
7631d05032f7354baa4e84e0efe3321798c61e73bdfbarchard@google.com  "1:                                          \n"
77c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "vld4.8     {d0, d1, d2, d3}, [%0]!        \n" // src line 0
78c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "subs       %2, %2, #8                     \n" // 8 processed per loop
79c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "vst1.8     {d2}, [%1]!                    \n"
80c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "bgt        1b                             \n"
818e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com  : "+r"(src_ptr),          // %0
828e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com    "+r"(dst_ptr),          // %1
838e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com    "+r"(dst_width)         // %2
848e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com  :
858e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com  : "q0", "q1", "memory", "cc"
8664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com  );
8764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com}
8864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
898ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.comvoid ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
9064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com                           uint8* dst_ptr, int dst_width) {
9164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com  asm volatile (
9264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "add        r4, %0, %3                     \n"
9364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "add        r5, r4, %3                     \n"
9464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "add        %3, r5, %3                     \n"
95c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
9631d05032f7354baa4e84e0efe3321798c61e73bdfbarchard@google.com  "1:                                          \n"
972c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {q0}, [%0]!                    \n"   // load up 16x4
982c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {q1}, [r4]!                    \n"
992c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {q2}, [r5]!                    \n"
1002c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8     {q3}, [%3]!                    \n"
1018ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.com    "subs       %2, %2, #4                     \n"
10264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vpaddl.u8  q0, q0                         \n"
10364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vpadal.u8  q0, q1                         \n"
10464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vpadal.u8  q0, q2                         \n"
10564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vpadal.u8  q0, q3                         \n"
10664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vpaddl.u16 q0, q0                         \n"
10764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vrshrn.u32 d0, q0, #4                     \n"   // divide by 16 w/rounding
10864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vmovn.u16  d0, q0                         \n"
1092c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.32    {d0[0]}, [%1]!                 \n"
11064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "bgt        1b                             \n"
1118e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com  : "+r"(src_ptr),          // %0
1128e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com    "+r"(dst_ptr),          // %1
1138e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com    "+r"(dst_width)         // %2
1148e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com  : "r"(src_stride)         // %3
1158e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com  : "r4", "r5", "q0", "q1", "q2", "q3", "memory", "cc"
11664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com  );
11764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com}
11864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
11964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com// Down scale from 4 to 3 pixels. Use the neon multilane read/write
12064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com// to load up the every 4th pixel into a 4 different registers.
12164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com// Point samples 32 pixels to 24 pixels.
12264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.comvoid ScaleRowDown34_NEON(const uint8* src_ptr,
1231f923e3ea6de7afd9380c73f60a2f3e7b0588811fbarchard@google.com                         ptrdiff_t src_stride,
12464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com                         uint8* dst_ptr, int dst_width) {
12564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com  asm volatile (
126c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
12731d05032f7354baa4e84e0efe3321798c61e73bdfbarchard@google.com  "1:                                          \n"
128c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "vld4.8     {d0, d1, d2, d3}, [%0]!      \n" // src line 0
129c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "subs       %2, %2, #24                  \n"
130c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "vmov       d2, d3                       \n" // order d0, d1, d2
131c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "vst3.8     {d0, d1, d2}, [%1]!          \n"
132c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "bgt        1b                           \n"
1338e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com  : "+r"(src_ptr),          // %0
1348e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com    "+r"(dst_ptr),          // %1
1358e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com    "+r"(dst_width)         // %2
1368e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com  :
1378e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com  : "d0", "d1", "d2", "d3", "memory", "cc"
13864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com  );
13964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com}
14064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
1418ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.comvoid ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
14264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com                               ptrdiff_t src_stride,
14364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com                               uint8* dst_ptr, int dst_width) {
14464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com  asm volatile (
145c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "vmov.u8    d24, #3                        \n"
146c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "add        %3, %0                         \n"
147c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
14831d05032f7354baa4e84e0efe3321798c61e73bdfbarchard@google.com  "1:                                          \n"
1492c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld4.8       {d0, d1, d2, d3}, [%0]!      \n" // src line 0
1502c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld4.8       {d4, d5, d6, d7}, [%3]!      \n" // src line 1
1518ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.com    "subs         %2, %2, #24                  \n"
15264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
15364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // filter src line 0 with src line 1
15464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // expand chars to shorts to allow for room
15564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // when adding lines together
15664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vmovl.u8     q8, d4                       \n"
15764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vmovl.u8     q9, d5                       \n"
15864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vmovl.u8     q10, d6                      \n"
15964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vmovl.u8     q11, d7                      \n"
16064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
16164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // 3 * line_0 + line_1
16264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vmlal.u8     q8, d0, d24                  \n"
16364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vmlal.u8     q9, d1, d24                  \n"
16464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vmlal.u8     q10, d2, d24                 \n"
16564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vmlal.u8     q11, d3, d24                 \n"
16664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
16764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // (3 * line_0 + line_1) >> 2
16864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vqrshrn.u16  d0, q8, #2                   \n"
16964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vqrshrn.u16  d1, q9, #2                   \n"
17064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vqrshrn.u16  d2, q10, #2                  \n"
17164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vqrshrn.u16  d3, q11, #2                  \n"
17264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
17364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // a0 = (src[0] * 3 + s[1] * 1) >> 2
17464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vmovl.u8     q8, d1                       \n"
17564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vmlal.u8     q8, d0, d24                  \n"
17664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vqrshrn.u16  d0, q8, #2                   \n"
17764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
17864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // a1 = (src[1] * 1 + s[2] * 1) >> 1
17964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vrhadd.u8    d1, d1, d2                   \n"
18064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
18164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // a2 = (src[2] * 1 + s[3] * 3) >> 2
18264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vmovl.u8     q8, d2                       \n"
18364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vmlal.u8     q8, d3, d24                  \n"
18464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vqrshrn.u16  d2, q8, #2                   \n"
18564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
1862c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst3.8       {d0, d1, d2}, [%1]!          \n"
18764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
18864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "bgt          1b                           \n"
1898e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com  : "+r"(src_ptr),          // %0
1908e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com    "+r"(dst_ptr),          // %1
1918e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com    "+r"(dst_width),        // %2
1928e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com    "+r"(src_stride)        // %3
1938e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com  :
1948e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com  : "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "d24", "memory", "cc"
19564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com  );
19664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com}
19764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
1988ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.comvoid ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
19964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com                               ptrdiff_t src_stride,
20064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com                               uint8* dst_ptr, int dst_width) {
20164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com  asm volatile (
202c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "vmov.u8    d24, #3                        \n"
203c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "add        %3, %0                         \n"
204c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
20531d05032f7354baa4e84e0efe3321798c61e73bdfbarchard@google.com  "1:                                          \n"
2062c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld4.8       {d0, d1, d2, d3}, [%0]!      \n" // src line 0
2072c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld4.8       {d4, d5, d6, d7}, [%3]!      \n" // src line 1
2088ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.com    "subs         %2, %2, #24                  \n"
20964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // average src line 0 with src line 1
21064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vrhadd.u8    q0, q0, q2                   \n"
21164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vrhadd.u8    q1, q1, q3                   \n"
21264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
21364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // a0 = (src[0] * 3 + s[1] * 1) >> 2
21464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vmovl.u8     q3, d1                       \n"
21564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vmlal.u8     q3, d0, d24                  \n"
21664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vqrshrn.u16  d0, q3, #2                   \n"
21764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
21864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // a1 = (src[1] * 1 + s[2] * 1) >> 1
21964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vrhadd.u8    d1, d1, d2                   \n"
22064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
22164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // a2 = (src[2] * 1 + s[3] * 3) >> 2
22264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vmovl.u8     q3, d2                       \n"
22364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vmlal.u8     q3, d3, d24                  \n"
22464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vqrshrn.u16  d2, q3, #2                   \n"
22564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
2262c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst3.8       {d0, d1, d2}, [%1]!          \n"
22764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "bgt          1b                           \n"
2288e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com  : "+r"(src_ptr),          // %0
2298e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com    "+r"(dst_ptr),          // %1
2308e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com    "+r"(dst_width),        // %2
2318e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com    "+r"(src_stride)        // %3
2328e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com  :
2338e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com  : "r4", "q0", "q1", "q2", "q3", "d24", "memory", "cc"
23464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com  );
23564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com}
23664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
23764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com#define HAS_SCALEROWDOWN38_NEON
238f2aa91a1ac08703d5a22af7fa48c59eba8eb397afbarchard@google.comstatic uvec8 kShuf38 =
23964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com  { 0, 3, 6, 8, 11, 14, 16, 19, 22, 24, 27, 30, 0, 0, 0, 0 };
240f2aa91a1ac08703d5a22af7fa48c59eba8eb397afbarchard@google.comstatic uvec8 kShuf38_2 =
24164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com  { 0, 8, 16, 2, 10, 17, 4, 12, 18, 6, 14, 19, 0, 0, 0, 0 };
242f2aa91a1ac08703d5a22af7fa48c59eba8eb397afbarchard@google.comstatic vec16 kMult38_Div6 =
24364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com  { 65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12,
24464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12 };
245f2aa91a1ac08703d5a22af7fa48c59eba8eb397afbarchard@google.comstatic vec16 kMult38_Div9 =
24664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com  { 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18,
24764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18 };
24864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
24964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com// 32 -> 12
25064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.comvoid ScaleRowDown38_NEON(const uint8* src_ptr,
2511f923e3ea6de7afd9380c73f60a2f3e7b0588811fbarchard@google.com                         ptrdiff_t src_stride,
25264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com                         uint8* dst_ptr, int dst_width) {
25364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com  asm volatile (
254c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "vld1.8     {q3}, [%3]                     \n"
255c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
25631d05032f7354baa4e84e0efe3321798c61e73bdfbarchard@google.com  "1:                                          \n"
257c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "vld1.8     {d0, d1, d2, d3}, [%0]!        \n"
258c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "subs       %2, %2, #12                    \n"
259c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "vtbl.u8    d4, {d0, d1, d2, d3}, d6       \n"
260c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "vtbl.u8    d5, {d0, d1, d2, d3}, d7       \n"
261c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "vst1.8     {d4}, [%1]!                    \n"
262c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "vst1.32    {d5[0]}, [%1]!                 \n"
263c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "bgt        1b                             \n"
2648e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com  : "+r"(src_ptr),          // %0
2658e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com    "+r"(dst_ptr),          // %1
2668e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com    "+r"(dst_width)         // %2
2678e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com  : "r"(&kShuf38)           // %3
2688e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com  : "d0", "d1", "d2", "d3", "d4", "d5", "memory", "cc"
26964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com  );
27064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com}
27164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
27264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com// 32x3 -> 12x1
2738ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.comvoid OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
27464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com                                      ptrdiff_t src_stride,
27564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com                                      uint8* dst_ptr, int dst_width) {
27664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com  asm volatile (
277c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "vld1.16    {q13}, [%4]                    \n"
278c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "vld1.8     {q14}, [%5]                    \n"
279c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "vld1.8     {q15}, [%6]                    \n"
280c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "add        r4, %0, %3, lsl #1             \n"
281c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "add        %3, %0                         \n"
282c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
28331d05032f7354baa4e84e0efe3321798c61e73bdfbarchard@google.com  "1:                                          \n"
28464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
28564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // d0 = 00 40 01 41 02 42 03 43
28664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // d1 = 10 50 11 51 12 52 13 53
28764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // d2 = 20 60 21 61 22 62 23 63
28864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // d3 = 30 70 31 71 32 72 33 73
2892c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld4.8       {d0, d1, d2, d3}, [%0]!      \n"
2902c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld4.8       {d4, d5, d6, d7}, [%3]!      \n"
2912c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld4.8       {d16, d17, d18, d19}, [r4]!  \n"
2928ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.com    "subs         %2, %2, #12                  \n"
29364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
29464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // Shuffle the input data around to get align the data
29564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    //  so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7
29664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // d0 = 00 10 01 11 02 12 03 13
29764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // d1 = 40 50 41 51 42 52 43 53
29864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vtrn.u8      d0, d1                       \n"
29964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vtrn.u8      d4, d5                       \n"
30064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vtrn.u8      d16, d17                     \n"
30164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
30264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // d2 = 20 30 21 31 22 32 23 33
30364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // d3 = 60 70 61 71 62 72 63 73
30464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vtrn.u8      d2, d3                       \n"
30564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vtrn.u8      d6, d7                       \n"
30664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vtrn.u8      d18, d19                     \n"
30764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
30864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // d0 = 00+10 01+11 02+12 03+13
30964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // d2 = 40+50 41+51 42+52 43+53
31064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vpaddl.u8    q0, q0                       \n"
31164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vpaddl.u8    q2, q2                       \n"
31264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vpaddl.u8    q8, q8                       \n"
31364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
31464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // d3 = 60+70 61+71 62+72 63+73
31564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vpaddl.u8    d3, d3                       \n"
31664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vpaddl.u8    d7, d7                       \n"
31764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vpaddl.u8    d19, d19                     \n"
31864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
31964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // combine source lines
32064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vadd.u16     q0, q2                       \n"
32164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vadd.u16     q0, q8                       \n"
32264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vadd.u16     d4, d3, d7                   \n"
32364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vadd.u16     d4, d19                      \n"
32464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
32564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // dst_ptr[3] = (s[6 + st * 0] + s[7 + st * 0]
32664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    //             + s[6 + st * 1] + s[7 + st * 1]
32764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    //             + s[6 + st * 2] + s[7 + st * 2]) / 6
32864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vqrdmulh.s16 q2, q2, q13                  \n"
32964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vmovn.u16    d4, q2                       \n"
33064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
33164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // Shuffle 2,3 reg around so that 2 can be added to the
33264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    //  0,1 reg and 3 can be added to the 4,5 reg. This
33364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    //  requires expanding from u8 to u16 as the 0,1 and 4,5
33464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    //  registers are already expanded. Then do transposes
33564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    //  to get aligned.
33664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // q2 = xx 20 xx 30 xx 21 xx 31 xx 22 xx 32 xx 23 xx 33
33764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vmovl.u8     q1, d2                       \n"
33864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vmovl.u8     q3, d6                       \n"
33964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vmovl.u8     q9, d18                      \n"
34064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
34164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // combine source lines
34264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vadd.u16     q1, q3                       \n"
34364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vadd.u16     q1, q9                       \n"
34464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
34564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // d4 = xx 20 xx 30 xx 22 xx 32
34664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // d5 = xx 21 xx 31 xx 23 xx 33
34764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vtrn.u32     d2, d3                       \n"
34864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
34964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // d4 = xx 20 xx 21 xx 22 xx 23
35064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // d5 = xx 30 xx 31 xx 32 xx 33
35164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vtrn.u16     d2, d3                       \n"
35264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
35364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // 0+1+2, 3+4+5
35464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vadd.u16     q0, q1                       \n"
35564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
35664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // Need to divide, but can't downshift as the the value
35764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    //  isn't a power of 2. So multiply by 65536 / n
35864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    //  and take the upper 16 bits.
35964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vqrdmulh.s16 q0, q0, q15                  \n"
36064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
36164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // Align for table lookup, vtbl requires registers to
36264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    //  be adjacent
36364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vmov.u8      d2, d4                       \n"
36464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
36564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vtbl.u8      d3, {d0, d1, d2}, d28        \n"
36664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vtbl.u8      d4, {d0, d1, d2}, d29        \n"
36764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
3682c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8       {d3}, [%1]!                  \n"
3692c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.32      {d4[0]}, [%1]!               \n"
37064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "bgt          1b                           \n"
3718e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com  : "+r"(src_ptr),          // %0
3728e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com    "+r"(dst_ptr),          // %1
3738e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com    "+r"(dst_width),        // %2
3748e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com    "+r"(src_stride)        // %3
3758e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com  : "r"(&kMult38_Div6),     // %4
3768e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com    "r"(&kShuf38_2),        // %5
3778e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com    "r"(&kMult38_Div9)      // %6
3788e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com  : "r4", "q0", "q1", "q2", "q3", "q8", "q9",
3798e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com    "q13", "q14", "q15", "memory", "cc"
38064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com  );
38164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com}
38264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
38364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com// 32x2 -> 12x1
3848ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.comvoid ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
38564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com                               ptrdiff_t src_stride,
38664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com                               uint8* dst_ptr, int dst_width) {
38764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com  asm volatile (
388c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "vld1.16    {q13}, [%4]                    \n"
389c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "vld1.8     {q14}, [%5]                    \n"
390c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "add        %3, %0                         \n"
391c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
39231d05032f7354baa4e84e0efe3321798c61e73bdfbarchard@google.com  "1:                                          \n"
39364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
39464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // d0 = 00 40 01 41 02 42 03 43
39564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // d1 = 10 50 11 51 12 52 13 53
39664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // d2 = 20 60 21 61 22 62 23 63
39764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // d3 = 30 70 31 71 32 72 33 73
3982c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld4.8       {d0, d1, d2, d3}, [%0]!      \n"
3992c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld4.8       {d4, d5, d6, d7}, [%3]!      \n"
4008ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.com    "subs         %2, %2, #12                  \n"
40164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
40264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // Shuffle the input data around to get align the data
40364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    //  so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7
40464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // d0 = 00 10 01 11 02 12 03 13
40564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // d1 = 40 50 41 51 42 52 43 53
40664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vtrn.u8      d0, d1                       \n"
40764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vtrn.u8      d4, d5                       \n"
40864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
40964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // d2 = 20 30 21 31 22 32 23 33
41064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // d3 = 60 70 61 71 62 72 63 73
41164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vtrn.u8      d2, d3                       \n"
41264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vtrn.u8      d6, d7                       \n"
41364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
41464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // d0 = 00+10 01+11 02+12 03+13
41564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // d2 = 40+50 41+51 42+52 43+53
41664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vpaddl.u8    q0, q0                       \n"
41764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vpaddl.u8    q2, q2                       \n"
41864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
41964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // d3 = 60+70 61+71 62+72 63+73
42064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vpaddl.u8    d3, d3                       \n"
42164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vpaddl.u8    d7, d7                       \n"
42264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
42364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // combine source lines
42464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vadd.u16     q0, q2                       \n"
42564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vadd.u16     d4, d3, d7                   \n"
42664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
42764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // dst_ptr[3] = (s[6] + s[7] + s[6+st] + s[7+st]) / 4
42864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vqrshrn.u16  d4, q2, #2                   \n"
42964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
43064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // Shuffle 2,3 reg around so that 2 can be added to the
43164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    //  0,1 reg and 3 can be added to the 4,5 reg. This
43264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    //  requires expanding from u8 to u16 as the 0,1 and 4,5
43364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    //  registers are already expanded. Then do transposes
43464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    //  to get aligned.
43564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // q2 = xx 20 xx 30 xx 21 xx 31 xx 22 xx 32 xx 23 xx 33
43664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vmovl.u8     q1, d2                       \n"
43764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vmovl.u8     q3, d6                       \n"
43864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
43964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // combine source lines
44064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vadd.u16     q1, q3                       \n"
44164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
44264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // d4 = xx 20 xx 30 xx 22 xx 32
44364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // d5 = xx 21 xx 31 xx 23 xx 33
44464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vtrn.u32     d2, d3                       \n"
44564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
44664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // d4 = xx 20 xx 21 xx 22 xx 23
44764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // d5 = xx 30 xx 31 xx 32 xx 33
44864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vtrn.u16     d2, d3                       \n"
44964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
45064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // 0+1+2, 3+4+5
45164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vadd.u16     q0, q1                       \n"
45264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
45364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // Need to divide, but can't downshift as the the value
45464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    //  isn't a power of 2. So multiply by 65536 / n
45564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    //  and take the upper 16 bits.
45664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vqrdmulh.s16 q0, q0, q13                  \n"
45764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
45864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    // Align for table lookup, vtbl requires registers to
45964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    //  be adjacent
46064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vmov.u8      d2, d4                       \n"
46164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
46264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vtbl.u8      d3, {d0, d1, d2}, d28        \n"
46364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vtbl.u8      d4, {d0, d1, d2}, d29        \n"
46464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
4652c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8       {d3}, [%1]!                  \n"
4662c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.32      {d4[0]}, [%1]!               \n"
46764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "bgt          1b                           \n"
4688e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com  : "+r"(src_ptr),       // %0
4698e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com    "+r"(dst_ptr),       // %1
4708e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com    "+r"(dst_width),     // %2
4718e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com    "+r"(src_stride)     // %3
4728e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com  : "r"(&kMult38_Div6),  // %4
4738e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com    "r"(&kShuf38_2)      // %5
4748e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com  : "q0", "q1", "q2", "q3", "q13", "q14", "memory", "cc"
47564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com  );
47664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com}
47764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
47864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com// 16x2 -> 16x1
47964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.comvoid ScaleFilterRows_NEON(uint8* dst_ptr,
48064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com                          const uint8* src_ptr, ptrdiff_t src_stride,
48164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com                          int dst_width, int source_y_fraction) {
48264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com  asm volatile (
48364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "cmp          %4, #0                       \n"
48466d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com    "beq          100f                         \n"
48564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "add          %2, %1                       \n"
48666d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com    "cmp          %4, #64                      \n"
48766d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com    "beq          75f                          \n"
48864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "cmp          %4, #128                     \n"
48966d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com    "beq          50f                          \n"
49066d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com    "cmp          %4, #192                     \n"
49166d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com    "beq          25f                          \n"
49264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
49364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vdup.8       d5, %4                       \n"
49464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "rsb          %4, #256                     \n"
49564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vdup.8       d4, %4                       \n"
49666d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com    // General purpose row blend.
49731d05032f7354baa4e84e0efe3321798c61e73bdfbarchard@google.com  "1:                                          \n"
4982c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8       {q0}, [%1]!                  \n"
4992c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8       {q1}, [%2]!                  \n"
5008ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.com    "subs         %3, %3, #16                  \n"
50164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vmull.u8     q13, d0, d4                  \n"
50264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vmull.u8     q14, d1, d4                  \n"
50364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vmlal.u8     q13, d2, d5                  \n"
50464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vmlal.u8     q14, d3, d5                  \n"
50564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vrshrn.u16   d0, q13, #8                  \n"
50664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vrshrn.u16   d1, q14, #8                  \n"
5072c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8       {q0}, [%0]!                  \n"
50864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "bgt          1b                           \n"
50966d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com    "b            99f                          \n"
51064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
51166d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com    // Blend 25 / 75.
51266d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com  "25:                                         \n"
5132c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8       {q0}, [%1]!                  \n"
5142c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8       {q1}, [%2]!                  \n"
5158ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.com    "subs         %3, %3, #16                  \n"
51666d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com    "vrhadd.u8    q0, q1                       \n"
51766d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com    "vrhadd.u8    q0, q1                       \n"
5182c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8       {q0}, [%0]!                  \n"
51966d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com    "bgt          25b                          \n"
52066d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com    "b            99f                          \n"
52164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
52266d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com    // Blend 50 / 50.
52366d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com  "50:                                         \n"
5242c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8       {q0}, [%1]!                  \n"
5252c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8       {q1}, [%2]!                  \n"
5268ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.com    "subs         %3, %3, #16                  \n"
52764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com    "vrhadd.u8    q0, q1                       \n"
5282c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8       {q0}, [%0]!                  \n"
52966d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com    "bgt          50b                          \n"
53066d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com    "b            99f                          \n"
53166d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com
53266d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com    // Blend 75 / 25.
53366d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com  "75:                                         \n"
5342c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8       {q1}, [%1]!                  \n"
5352c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8       {q0}, [%2]!                  \n"
5368ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.com    "subs         %3, %3, #16                  \n"
53766d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com    "vrhadd.u8    q0, q1                       \n"
53866d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com    "vrhadd.u8    q0, q1                       \n"
5392c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8       {q0}, [%0]!                  \n"
54066d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com    "bgt          75b                          \n"
54166d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com    "b            99f                          \n"
54266d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com
54366d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com    // Blend 100 / 0 - Copy row unchanged.
54466d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com  "100:                                        \n"
5452c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vld1.8       {q0}, [%1]!                  \n"
5468ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.com    "subs         %3, %3, #16                  \n"
5472c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8       {q0}, [%0]!                  \n"
54866d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com    "bgt          100b                         \n"
54966d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com
55066d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com  "99:                                         \n"
5512c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com    "vst1.8       {d1[7]}, [%0]                \n"
5528e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com  : "+r"(dst_ptr),          // %0
5538e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com    "+r"(src_ptr),          // %1
5548e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com    "+r"(src_stride),       // %2
5558e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com    "+r"(dst_width),        // %3
5568e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com    "+r"(source_y_fraction) // %4
5578e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com  :
5588e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com  : "q0", "q1", "d4", "d5", "q13", "q14", "memory", "cc"
55964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com  );
56064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com}
561dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com
5621f923e3ea6de7afd9380c73f60a2f3e7b0588811fbarchard@google.comvoid ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
563dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com                            uint8* dst, int dst_width) {
564dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com  asm volatile (
565c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
566dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com  "1:                                          \n"
567dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    // load even pixels into q0, odd into q1
568dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "vld2.32    {q0, q1}, [%0]!                \n"
569dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "vld2.32    {q2, q3}, [%0]!                \n"
570dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "subs       %2, %2, #8                     \n"  // 8 processed per loop
571dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "vst1.8     {q1}, [%1]!                    \n"  // store odd pixels
572dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "vst1.8     {q3}, [%1]!                    \n"
573dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "bgt        1b                             \n"
574dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com  : "+r"(src_ptr),          // %0
575dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "+r"(dst),              // %1
576dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "+r"(dst_width)         // %2
577dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com  :
578dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com  : "memory", "cc", "q0", "q1", "q2", "q3"  // Clobber List
579dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com  );
580dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com}
581dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com
582dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.comvoid ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
583dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com                               uint8* dst, int dst_width) {
584dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com  asm volatile (
585dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    // change the stride to row 2 pointer
586dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "add        %1, %1, %0                     \n"
587c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
588dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com  "1:                                          \n"
589dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "vld4.8     {d0, d2, d4, d6}, [%0]!        \n"  // load 8 ARGB pixels.
590dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "vld4.8     {d1, d3, d5, d7}, [%0]!        \n"  // load next 8 ARGB pixels.
591dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "subs       %3, %3, #8                     \n"  // 8 processed per loop.
592dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "vpaddl.u8  q0, q0                         \n"  // B 16 bytes -> 8 shorts.
593dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "vpaddl.u8  q1, q1                         \n"  // G 16 bytes -> 8 shorts.
594dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "vpaddl.u8  q2, q2                         \n"  // R 16 bytes -> 8 shorts.
595dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "vpaddl.u8  q3, q3                         \n"  // A 16 bytes -> 8 shorts.
596dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "vld4.8     {d16, d18, d20, d22}, [%1]!    \n"  // load 8 more ARGB pixels.
597dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "vld4.8     {d17, d19, d21, d23}, [%1]!    \n"  // load last 8 ARGB pixels.
598dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "vpadal.u8  q0, q8                         \n"  // B 16 bytes -> 8 shorts.
599dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "vpadal.u8  q1, q9                         \n"  // G 16 bytes -> 8 shorts.
600dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "vpadal.u8  q2, q10                        \n"  // R 16 bytes -> 8 shorts.
601dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "vpadal.u8  q3, q11                        \n"  // A 16 bytes -> 8 shorts.
602dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "vrshrn.u16 d0, q0, #2                     \n"  // downshift, round and pack
603dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "vrshrn.u16 d1, q1, #2                     \n"
604dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "vrshrn.u16 d2, q2, #2                     \n"
605dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "vrshrn.u16 d3, q3, #2                     \n"
606dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "vst4.8     {d0, d1, d2, d3}, [%2]!        \n"
607dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "bgt        1b                             \n"
608dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com  : "+r"(src_ptr),          // %0
609dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "+r"(src_stride),       // %1
610dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "+r"(dst),              // %2
611dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "+r"(dst_width)         // %3
612dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com  :
613dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com  : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"
614dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com  );
615dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com}
616dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com
617dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com// Reads 4 pixels at a time.
618dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com// Alignment requirement: src_argb 4 byte aligned.
6191f923e3ea6de7afd9380c73f60a2f3e7b0588811fbarchard@google.comvoid ScaleARGBRowDownEven_NEON(const uint8* src_argb,  ptrdiff_t src_stride,
6201f923e3ea6de7afd9380c73f60a2f3e7b0588811fbarchard@google.com                               int src_stepx, uint8* dst_argb, int dst_width) {
621dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com  asm volatile (
622dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "mov        r12, %3, lsl #2                \n"
623c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
624dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com  "1:                                          \n"
625dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "vld1.32    {d0[0]}, [%0], r12             \n"
626dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "vld1.32    {d0[1]}, [%0], r12             \n"
627dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "vld1.32    {d1[0]}, [%0], r12             \n"
628dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "vld1.32    {d1[1]}, [%0], r12             \n"
629dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "subs       %2, %2, #4                     \n"  // 4 pixels per loop.
630dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "vst1.8     {q0}, [%1]!                    \n"
631dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "bgt        1b                             \n"
632dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com  : "+r"(src_argb),    // %0
633dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "+r"(dst_argb),    // %1
634dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "+r"(dst_width)    // %2
635dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com  : "r"(src_stepx)     // %3
636dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com  : "memory", "cc", "r12", "q0"
637dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com  );
638dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com}
639dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com
640dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com// Reads 4 pixels at a time.
641dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com// Alignment requirement: src_argb 4 byte aligned.
642dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.comvoid ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
643dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com                                  int src_stepx,
644dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com                                  uint8* dst_argb, int dst_width) {
645dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com  asm volatile (
646c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "mov        r12, %4, lsl #2                \n"
647c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "add        %1, %1, %0                     \n"
648c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    ".p2align   2                              \n"
649dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com  "1:                                          \n"
650c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "vld1.8     {d0}, [%0], r12                \n"  // Read 4 2x2 blocks -> 2x1
651c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "vld1.8     {d1}, [%1], r12                \n"
652c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "vld1.8     {d2}, [%0], r12                \n"
653c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "vld1.8     {d3}, [%1], r12                \n"
654c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "vld1.8     {d4}, [%0], r12                \n"
655c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "vld1.8     {d5}, [%1], r12                \n"
656c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "vld1.8     {d6}, [%0], r12                \n"
657c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "vld1.8     {d7}, [%1], r12                \n"
658c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "vaddl.u8   q0, d0, d1                     \n"
659c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "vaddl.u8   q1, d2, d3                     \n"
660c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "vaddl.u8   q2, d4, d5                     \n"
661c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "vaddl.u8   q3, d6, d7                     \n"
662c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "vswp.8     d1, d2                         \n"  // ab_cd -> ac_bd
663c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "vswp.8     d5, d6                         \n"  // ef_gh -> eg_fh
664c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "vadd.u16   q0, q0, q1                     \n"  // (a+b)_(c+d)
665c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com    "vadd.u16   q2, q2, q3                     \n"  // (e+f)_(g+h)
666dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "vrshrn.u16 d0, q0, #2                     \n"  // first 2 pixels.
667dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "vrshrn.u16 d1, q2, #2                     \n"  // next 2 pixels.
668dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "subs       %3, %3, #4                     \n"  // 4 pixels per loop.
669dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "vst1.8     {q0}, [%2]!                    \n"
670dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "bgt        1b                             \n"
671dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com  : "+r"(src_argb),    // %0
672dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "+r"(src_stride),  // %1
673dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "+r"(dst_argb),    // %2
674dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com    "+r"(dst_width)    // %3
675dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com  : "r"(src_stepx)     // %4
676dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com  : "memory", "cc", "r12", "q0", "q1", "q2", "q3"
677dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com  );
678dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com}
679dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com
68064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com#endif  // __ARM_NEON__
68164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com
68264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com#ifdef __cplusplus
68364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com}  // extern "C"
68464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com}  // namespace libyuv
68564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com#endif
686