164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com/* 264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com * Copyright 2011 The LibYuv Project Authors. All rights reserved. 364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com * 464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com * Use of this source code is governed by a BSD-style license 564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com * that can be found in the LICENSE file in the root of the source 664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com * tree. An additional intellectual property rights grant can be found 7cde587092fef0dbed2c35602f30b79e7b892e766fbarchard@google.com * in the file PATENTS. All contributing project authors may 864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com * be found in the AUTHORS file in the root of the source tree. 964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com */ 1064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 1164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com#include "libyuv/row.h" 1264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 1364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com#ifdef __cplusplus 1464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.comnamespace libyuv { 1564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.comextern "C" { 1664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com#endif 1764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 18dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com// This module is for GCC Neon. 1937ad8b650717568e34a5ac807b63cc9f072c96b6fbarchard@google.com#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \ 2037ad8b650717568e34a5ac807b63cc9f072c96b6fbarchard@google.com !defined(__native_client__) 2164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 2283a63e65a6bcbf59f7dc617d0c7275d50625884cfbarchard@google.com// NEON downscalers with interpolation. 2383a63e65a6bcbf59f7dc617d0c7275d50625884cfbarchard@google.com// Provided by Fritz Koenig 2464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 2506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com// Read 32x1 throw away even pixels, and write 16x1. 261f923e3ea6de7afd9380c73f60a2f3e7b0588811fbarchard@google.comvoid ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride, 2764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com uint8* dst, int dst_width) { 2864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com asm volatile ( 29c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 3031d05032f7354baa4e84e0efe3321798c61e73bdfbarchard@google.com "1: \n" 3164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // load even pixels into q0, odd into q1 322c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld2.8 {q0, q1}, [%0]! \n" 3364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "subs %2, %2, #16 \n" // 16 processed per loop 342c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {q1}, [%1]! \n" // store odd pixels 3564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "bgt 1b \n" 368e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "+r"(src_ptr), // %0 378e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(dst), // %1 388e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(dst_width) // %2 398e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : 408e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "q0", "q1" // Clobber List 4164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com ); 4264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com} 4364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 4406ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com// Read 32x2 average down and write 16x1. 458ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.comvoid ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, 4664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com uint8* dst, int dst_width) { 4764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com asm volatile ( 4864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // change the stride to row 2 pointer 4964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "add %1, %0 \n" 50c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 5131d05032f7354baa4e84e0efe3321798c61e73bdfbarchard@google.com "1: \n" 522c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q0, q1}, [%0]! \n" // load row 1 and post inc 532c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q2, q3}, [%1]! \n" // load row 2 and post inc 548e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "subs %3, %3, #16 \n" // 16 processed per loop 5564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vpaddl.u8 q0, q0 \n" // row 1 add adjacent 5664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vpaddl.u8 q1, q1 \n" 5764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vpadal.u8 q0, q2 \n" // row 2 add adjacent + row1 5864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vpadal.u8 q1, q3 \n" 5964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vrshrn.u16 d0, q0, #2 \n" // downshift, round and pack 6064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vrshrn.u16 d1, q1, #2 \n" 612c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {q0}, [%2]! \n" 6264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "bgt 1b \n" 638e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "+r"(src_ptr), // %0 648e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(src_stride), // %1 658e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(dst), // %2 668e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(dst_width) // %3 678e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : 688e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "q0", "q1", "q2", "q3" // Clobber List 698e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com ); 7064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com} 7164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 721f923e3ea6de7afd9380c73f60a2f3e7b0588811fbarchard@google.comvoid ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride, 7364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com uint8* dst_ptr, int dst_width) { 7464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com asm volatile ( 75c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 7631d05032f7354baa4e84e0efe3321798c61e73bdfbarchard@google.com "1: \n" 77c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0 78c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop 79c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vst1.8 {d2}, [%1]! \n" 80c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "bgt 1b \n" 818e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "+r"(src_ptr), // %0 828e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(dst_ptr), // %1 838e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(dst_width) // %2 848e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : 858e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "q0", "q1", "memory", "cc" 8664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com ); 8764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com} 8864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 898ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.comvoid ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, 9064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com uint8* dst_ptr, int dst_width) { 9164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com asm volatile ( 9264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "add r4, %0, %3 \n" 9364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "add r5, r4, %3 \n" 9464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "add %3, r5, %3 \n" 95c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 9631d05032f7354baa4e84e0efe3321798c61e73bdfbarchard@google.com "1: \n" 972c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q0}, [%0]! \n" // load up 16x4 982c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q1}, [r4]! \n" 992c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q2}, [r5]! \n" 1002c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q3}, [%3]! \n" 1018ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.com "subs %2, %2, #4 \n" 10264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vpaddl.u8 q0, q0 \n" 10364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vpadal.u8 q0, q1 \n" 10464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vpadal.u8 q0, q2 \n" 10564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vpadal.u8 q0, q3 \n" 10664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vpaddl.u16 q0, q0 \n" 10764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vrshrn.u32 d0, q0, #4 \n" // divide by 16 w/rounding 10864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmovn.u16 d0, q0 \n" 1092c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.32 {d0[0]}, [%1]! \n" 11064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "bgt 1b \n" 1118e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "+r"(src_ptr), // %0 1128e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(dst_ptr), // %1 1138e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(dst_width) // %2 1148e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "r"(src_stride) // %3 1158e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "r4", "r5", "q0", "q1", "q2", "q3", "memory", "cc" 11664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com ); 11764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com} 11864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 11964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com// Down scale from 4 to 3 pixels. Use the neon multilane read/write 12064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com// to load up the every 4th pixel into a 4 different registers. 12164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com// Point samples 32 pixels to 24 pixels. 12264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.comvoid ScaleRowDown34_NEON(const uint8* src_ptr, 1231f923e3ea6de7afd9380c73f60a2f3e7b0588811fbarchard@google.com ptrdiff_t src_stride, 12464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com uint8* dst_ptr, int dst_width) { 12564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com asm volatile ( 126c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 12731d05032f7354baa4e84e0efe3321798c61e73bdfbarchard@google.com "1: \n" 128c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0 129c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "subs %2, %2, #24 \n" 130c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vmov d2, d3 \n" // order d0, d1, d2 131c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vst3.8 {d0, d1, d2}, [%1]! \n" 132c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "bgt 1b \n" 1338e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "+r"(src_ptr), // %0 1348e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(dst_ptr), // %1 1358e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(dst_width) // %2 1368e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : 1378e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "d0", "d1", "d2", "d3", "memory", "cc" 13864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com ); 13964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com} 14064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 1418ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.comvoid ScaleRowDown34_0_Box_NEON(const uint8* src_ptr, 14264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com ptrdiff_t src_stride, 14364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com uint8* dst_ptr, int dst_width) { 14464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com asm volatile ( 145c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vmov.u8 d24, #3 \n" 146c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "add %3, %0 \n" 147c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 14831d05032f7354baa4e84e0efe3321798c61e73bdfbarchard@google.com "1: \n" 1492c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0 1502c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld4.8 {d4, d5, d6, d7}, [%3]! \n" // src line 1 1518ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.com "subs %2, %2, #24 \n" 15264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 15364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // filter src line 0 with src line 1 15464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // expand chars to shorts to allow for room 15564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // when adding lines together 15664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmovl.u8 q8, d4 \n" 15764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmovl.u8 q9, d5 \n" 15864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmovl.u8 q10, d6 \n" 15964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmovl.u8 q11, d7 \n" 16064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 16164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // 3 * line_0 + line_1 16264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmlal.u8 q8, d0, d24 \n" 16364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmlal.u8 q9, d1, d24 \n" 16464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmlal.u8 q10, d2, d24 \n" 16564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmlal.u8 q11, d3, d24 \n" 16664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 16764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // (3 * line_0 + line_1) >> 2 16864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vqrshrn.u16 d0, q8, #2 \n" 16964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vqrshrn.u16 d1, q9, #2 \n" 17064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vqrshrn.u16 d2, q10, #2 \n" 17164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vqrshrn.u16 d3, q11, #2 \n" 17264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 17364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // a0 = (src[0] * 3 + s[1] * 1) >> 2 17464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmovl.u8 q8, d1 \n" 17564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmlal.u8 q8, d0, d24 \n" 17664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vqrshrn.u16 d0, q8, #2 \n" 17764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 17864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // a1 = (src[1] * 1 + s[2] * 1) >> 1 17964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vrhadd.u8 d1, d1, d2 \n" 18064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 18164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // a2 = (src[2] * 1 + s[3] * 3) >> 2 18264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmovl.u8 q8, d2 \n" 18364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmlal.u8 q8, d3, d24 \n" 18464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vqrshrn.u16 d2, q8, #2 \n" 18564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 1862c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst3.8 {d0, d1, d2}, [%1]! \n" 18764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 18864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "bgt 1b \n" 1898e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "+r"(src_ptr), // %0 1908e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(dst_ptr), // %1 1918e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(dst_width), // %2 1928e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(src_stride) // %3 1938e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : 1948e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "d24", "memory", "cc" 19564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com ); 19664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com} 19764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 1988ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.comvoid ScaleRowDown34_1_Box_NEON(const uint8* src_ptr, 19964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com ptrdiff_t src_stride, 20064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com uint8* dst_ptr, int dst_width) { 20164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com asm volatile ( 202c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vmov.u8 d24, #3 \n" 203c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "add %3, %0 \n" 204c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 20531d05032f7354baa4e84e0efe3321798c61e73bdfbarchard@google.com "1: \n" 2062c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0 2072c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld4.8 {d4, d5, d6, d7}, [%3]! \n" // src line 1 2088ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.com "subs %2, %2, #24 \n" 20964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // average src line 0 with src line 1 21064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vrhadd.u8 q0, q0, q2 \n" 21164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vrhadd.u8 q1, q1, q3 \n" 21264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 21364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // a0 = (src[0] * 3 + s[1] * 1) >> 2 21464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmovl.u8 q3, d1 \n" 21564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmlal.u8 q3, d0, d24 \n" 21664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vqrshrn.u16 d0, q3, #2 \n" 21764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 21864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // a1 = (src[1] * 1 + s[2] * 1) >> 1 21964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vrhadd.u8 d1, d1, d2 \n" 22064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 22164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // a2 = (src[2] * 1 + s[3] * 3) >> 2 22264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmovl.u8 q3, d2 \n" 22364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmlal.u8 q3, d3, d24 \n" 22464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vqrshrn.u16 d2, q3, #2 \n" 22564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 2262c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst3.8 {d0, d1, d2}, [%1]! \n" 22764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "bgt 1b \n" 2288e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "+r"(src_ptr), // %0 2298e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(dst_ptr), // %1 2308e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(dst_width), // %2 2318e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(src_stride) // %3 2328e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : 2338e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "r4", "q0", "q1", "q2", "q3", "d24", "memory", "cc" 23464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com ); 23564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com} 23664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 23764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com#define HAS_SCALEROWDOWN38_NEON 238f2aa91a1ac08703d5a22af7fa48c59eba8eb397afbarchard@google.comstatic uvec8 kShuf38 = 23964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com { 0, 3, 6, 8, 11, 14, 16, 19, 22, 24, 27, 30, 0, 0, 0, 0 }; 240f2aa91a1ac08703d5a22af7fa48c59eba8eb397afbarchard@google.comstatic uvec8 kShuf38_2 = 24164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com { 0, 8, 16, 2, 10, 17, 4, 12, 18, 6, 14, 19, 0, 0, 0, 0 }; 242f2aa91a1ac08703d5a22af7fa48c59eba8eb397afbarchard@google.comstatic vec16 kMult38_Div6 = 24364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com { 65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12, 24464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12 }; 245f2aa91a1ac08703d5a22af7fa48c59eba8eb397afbarchard@google.comstatic vec16 kMult38_Div9 = 24664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com { 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18, 24764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18 }; 24864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 24964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com// 32 -> 12 25064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.comvoid ScaleRowDown38_NEON(const uint8* src_ptr, 2511f923e3ea6de7afd9380c73f60a2f3e7b0588811fbarchard@google.com ptrdiff_t src_stride, 25264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com uint8* dst_ptr, int dst_width) { 25364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com asm volatile ( 254c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vld1.8 {q3}, [%3] \n" 255c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 25631d05032f7354baa4e84e0efe3321798c61e73bdfbarchard@google.com "1: \n" 257c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vld1.8 {d0, d1, d2, d3}, [%0]! \n" 258c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "subs %2, %2, #12 \n" 259c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vtbl.u8 d4, {d0, d1, d2, d3}, d6 \n" 260c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vtbl.u8 d5, {d0, d1, d2, d3}, d7 \n" 261c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vst1.8 {d4}, [%1]! \n" 262c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vst1.32 {d5[0]}, [%1]! \n" 263c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "bgt 1b \n" 2648e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "+r"(src_ptr), // %0 2658e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(dst_ptr), // %1 2668e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(dst_width) // %2 2678e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "r"(&kShuf38) // %3 2688e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "d0", "d1", "d2", "d3", "d4", "d5", "memory", "cc" 26964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com ); 27064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com} 27164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 27264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com// 32x3 -> 12x1 2738ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.comvoid OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr, 27464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com ptrdiff_t src_stride, 27564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com uint8* dst_ptr, int dst_width) { 27664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com asm volatile ( 277c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vld1.16 {q13}, [%4] \n" 278c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vld1.8 {q14}, [%5] \n" 279c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vld1.8 {q15}, [%6] \n" 280c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "add r4, %0, %3, lsl #1 \n" 281c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "add %3, %0 \n" 282c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 28331d05032f7354baa4e84e0efe3321798c61e73bdfbarchard@google.com "1: \n" 28464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 28564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d0 = 00 40 01 41 02 42 03 43 28664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d1 = 10 50 11 51 12 52 13 53 28764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d2 = 20 60 21 61 22 62 23 63 28864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d3 = 30 70 31 71 32 72 33 73 2892c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0]! \n" 2902c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld4.8 {d4, d5, d6, d7}, [%3]! \n" 2912c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld4.8 {d16, d17, d18, d19}, [r4]! \n" 2928ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.com "subs %2, %2, #12 \n" 29364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 29464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // Shuffle the input data around to get align the data 29564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7 29664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d0 = 00 10 01 11 02 12 03 13 29764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d1 = 40 50 41 51 42 52 43 53 29864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vtrn.u8 d0, d1 \n" 29964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vtrn.u8 d4, d5 \n" 30064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vtrn.u8 d16, d17 \n" 30164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 30264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d2 = 20 30 21 31 22 32 23 33 30364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d3 = 60 70 61 71 62 72 63 73 30464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vtrn.u8 d2, d3 \n" 30564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vtrn.u8 d6, d7 \n" 30664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vtrn.u8 d18, d19 \n" 30764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 30864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d0 = 00+10 01+11 02+12 03+13 30964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d2 = 40+50 41+51 42+52 43+53 31064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vpaddl.u8 q0, q0 \n" 31164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vpaddl.u8 q2, q2 \n" 31264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vpaddl.u8 q8, q8 \n" 31364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 31464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d3 = 60+70 61+71 62+72 63+73 31564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vpaddl.u8 d3, d3 \n" 31664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vpaddl.u8 d7, d7 \n" 31764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vpaddl.u8 d19, d19 \n" 31864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 31964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // combine source lines 32064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vadd.u16 q0, q2 \n" 32164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vadd.u16 q0, q8 \n" 32264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vadd.u16 d4, d3, d7 \n" 32364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vadd.u16 d4, d19 \n" 32464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 32564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // dst_ptr[3] = (s[6 + st * 0] + s[7 + st * 0] 32664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // + s[6 + st * 1] + s[7 + st * 1] 32764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // + s[6 + st * 2] + s[7 + st * 2]) / 6 32864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vqrdmulh.s16 q2, q2, q13 \n" 32964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmovn.u16 d4, q2 \n" 33064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 33164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // Shuffle 2,3 reg around so that 2 can be added to the 33264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // 0,1 reg and 3 can be added to the 4,5 reg. This 33364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // requires expanding from u8 to u16 as the 0,1 and 4,5 33464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // registers are already expanded. Then do transposes 33564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // to get aligned. 33664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // q2 = xx 20 xx 30 xx 21 xx 31 xx 22 xx 32 xx 23 xx 33 33764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmovl.u8 q1, d2 \n" 33864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmovl.u8 q3, d6 \n" 33964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmovl.u8 q9, d18 \n" 34064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 34164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // combine source lines 34264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vadd.u16 q1, q3 \n" 34364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vadd.u16 q1, q9 \n" 34464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 34564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d4 = xx 20 xx 30 xx 22 xx 32 34664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d5 = xx 21 xx 31 xx 23 xx 33 34764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vtrn.u32 d2, d3 \n" 34864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 34964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d4 = xx 20 xx 21 xx 22 xx 23 35064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d5 = xx 30 xx 31 xx 32 xx 33 35164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vtrn.u16 d2, d3 \n" 35264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 35364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // 0+1+2, 3+4+5 35464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vadd.u16 q0, q1 \n" 35564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 35664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // Need to divide, but can't downshift as the the value 35764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // isn't a power of 2. So multiply by 65536 / n 35864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // and take the upper 16 bits. 35964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vqrdmulh.s16 q0, q0, q15 \n" 36064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 36164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // Align for table lookup, vtbl requires registers to 36264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // be adjacent 36364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmov.u8 d2, d4 \n" 36464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 36564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vtbl.u8 d3, {d0, d1, d2}, d28 \n" 36664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vtbl.u8 d4, {d0, d1, d2}, d29 \n" 36764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 3682c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {d3}, [%1]! \n" 3692c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.32 {d4[0]}, [%1]! \n" 37064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "bgt 1b \n" 3718e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "+r"(src_ptr), // %0 3728e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(dst_ptr), // %1 3738e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(dst_width), // %2 3748e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(src_stride) // %3 3758e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "r"(&kMult38_Div6), // %4 3768e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "r"(&kShuf38_2), // %5 3778e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "r"(&kMult38_Div9) // %6 3788e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "r4", "q0", "q1", "q2", "q3", "q8", "q9", 3798e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "q13", "q14", "q15", "memory", "cc" 38064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com ); 38164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com} 38264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 38364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com// 32x2 -> 12x1 3848ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.comvoid ScaleRowDown38_2_Box_NEON(const uint8* src_ptr, 38564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com ptrdiff_t src_stride, 38664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com uint8* dst_ptr, int dst_width) { 38764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com asm volatile ( 388c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vld1.16 {q13}, [%4] \n" 389c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vld1.8 {q14}, [%5] \n" 390c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "add %3, %0 \n" 391c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 39231d05032f7354baa4e84e0efe3321798c61e73bdfbarchard@google.com "1: \n" 39364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 39464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d0 = 00 40 01 41 02 42 03 43 39564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d1 = 10 50 11 51 12 52 13 53 39664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d2 = 20 60 21 61 22 62 23 63 39764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d3 = 30 70 31 71 32 72 33 73 3982c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0]! \n" 3992c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld4.8 {d4, d5, d6, d7}, [%3]! \n" 4008ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.com "subs %2, %2, #12 \n" 40164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 40264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // Shuffle the input data around to get align the data 40364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7 40464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d0 = 00 10 01 11 02 12 03 13 40564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d1 = 40 50 41 51 42 52 43 53 40664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vtrn.u8 d0, d1 \n" 40764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vtrn.u8 d4, d5 \n" 40864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 40964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d2 = 20 30 21 31 22 32 23 33 41064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d3 = 60 70 61 71 62 72 63 73 41164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vtrn.u8 d2, d3 \n" 41264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vtrn.u8 d6, d7 \n" 41364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 41464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d0 = 00+10 01+11 02+12 03+13 41564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d2 = 40+50 41+51 42+52 43+53 41664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vpaddl.u8 q0, q0 \n" 41764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vpaddl.u8 q2, q2 \n" 41864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 41964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d3 = 60+70 61+71 62+72 63+73 42064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vpaddl.u8 d3, d3 \n" 42164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vpaddl.u8 d7, d7 \n" 42264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 42364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // combine source lines 42464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vadd.u16 q0, q2 \n" 42564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vadd.u16 d4, d3, d7 \n" 42664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 42764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // dst_ptr[3] = (s[6] + s[7] + s[6+st] + s[7+st]) / 4 42864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vqrshrn.u16 d4, q2, #2 \n" 42964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 43064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // Shuffle 2,3 reg around so that 2 can be added to the 43164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // 0,1 reg and 3 can be added to the 4,5 reg. This 43264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // requires expanding from u8 to u16 as the 0,1 and 4,5 43364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // registers are already expanded. Then do transposes 43464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // to get aligned. 43564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // q2 = xx 20 xx 30 xx 21 xx 31 xx 22 xx 32 xx 23 xx 33 43664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmovl.u8 q1, d2 \n" 43764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmovl.u8 q3, d6 \n" 43864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 43964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // combine source lines 44064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vadd.u16 q1, q3 \n" 44164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 44264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d4 = xx 20 xx 30 xx 22 xx 32 44364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d5 = xx 21 xx 31 xx 23 xx 33 44464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vtrn.u32 d2, d3 \n" 44564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 44664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d4 = xx 20 xx 21 xx 22 xx 23 44764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d5 = xx 30 xx 31 xx 32 xx 33 44864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vtrn.u16 d2, d3 \n" 44964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 45064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // 0+1+2, 3+4+5 45164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vadd.u16 q0, q1 \n" 45264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 45364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // Need to divide, but can't downshift as the the value 45464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // isn't a power of 2. So multiply by 65536 / n 45564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // and take the upper 16 bits. 45664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vqrdmulh.s16 q0, q0, q13 \n" 45764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 45864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // Align for table lookup, vtbl requires registers to 45964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // be adjacent 46064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmov.u8 d2, d4 \n" 46164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 46264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vtbl.u8 d3, {d0, d1, d2}, d28 \n" 46364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vtbl.u8 d4, {d0, d1, d2}, d29 \n" 46464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 4652c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {d3}, [%1]! \n" 4662c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.32 {d4[0]}, [%1]! \n" 46764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "bgt 1b \n" 4688e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "+r"(src_ptr), // %0 4698e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(dst_ptr), // %1 4708e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(dst_width), // %2 4718e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(src_stride) // %3 4728e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "r"(&kMult38_Div6), // %4 4738e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "r"(&kShuf38_2) // %5 4748e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "q0", "q1", "q2", "q3", "q13", "q14", "memory", "cc" 47564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com ); 47664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com} 47764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 47864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com// 16x2 -> 16x1 47964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.comvoid ScaleFilterRows_NEON(uint8* dst_ptr, 48064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com const uint8* src_ptr, ptrdiff_t src_stride, 48164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com int dst_width, int source_y_fraction) { 48264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com asm volatile ( 48364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "cmp %4, #0 \n" 48466d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "beq 100f \n" 48564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "add %2, %1 \n" 48666d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "cmp %4, #64 \n" 48766d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "beq 75f \n" 48864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "cmp %4, #128 \n" 48966d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "beq 50f \n" 49066d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "cmp %4, #192 \n" 49166d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "beq 25f \n" 49264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 49364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vdup.8 d5, %4 \n" 49464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "rsb %4, #256 \n" 49564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vdup.8 d4, %4 \n" 49666d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com // General purpose row blend. 49731d05032f7354baa4e84e0efe3321798c61e73bdfbarchard@google.com "1: \n" 4982c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q0}, [%1]! \n" 4992c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q1}, [%2]! \n" 5008ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.com "subs %3, %3, #16 \n" 50164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmull.u8 q13, d0, d4 \n" 50264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmull.u8 q14, d1, d4 \n" 50364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmlal.u8 q13, d2, d5 \n" 50464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmlal.u8 q14, d3, d5 \n" 50564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vrshrn.u16 d0, q13, #8 \n" 50664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vrshrn.u16 d1, q14, #8 \n" 5072c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {q0}, [%0]! \n" 50864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "bgt 1b \n" 50966d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "b 99f \n" 51064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 51166d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com // Blend 25 / 75. 51266d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "25: \n" 5132c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q0}, [%1]! \n" 5142c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q1}, [%2]! \n" 5158ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.com "subs %3, %3, #16 \n" 51666d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "vrhadd.u8 q0, q1 \n" 51766d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "vrhadd.u8 q0, q1 \n" 5182c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {q0}, [%0]! \n" 51966d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "bgt 25b \n" 52066d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "b 99f \n" 52164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 52266d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com // Blend 50 / 50. 52366d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "50: \n" 5242c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q0}, [%1]! \n" 5252c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q1}, [%2]! \n" 5268ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.com "subs %3, %3, #16 \n" 52764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vrhadd.u8 q0, q1 \n" 5282c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {q0}, [%0]! \n" 52966d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "bgt 50b \n" 53066d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "b 99f \n" 53166d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com 53266d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com // Blend 75 / 25. 53366d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "75: \n" 5342c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q1}, [%1]! \n" 5352c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q0}, [%2]! \n" 5368ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.com "subs %3, %3, #16 \n" 53766d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "vrhadd.u8 q0, q1 \n" 53866d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "vrhadd.u8 q0, q1 \n" 5392c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {q0}, [%0]! \n" 54066d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "bgt 75b \n" 54166d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "b 99f \n" 54266d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com 54366d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com // Blend 100 / 0 - Copy row unchanged. 54466d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "100: \n" 5452c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q0}, [%1]! \n" 5468ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.com "subs %3, %3, #16 \n" 5472c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {q0}, [%0]! \n" 54866d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "bgt 100b \n" 54966d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com 55066d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "99: \n" 5512c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {d1[7]}, [%0] \n" 5528e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "+r"(dst_ptr), // %0 5538e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(src_ptr), // %1 5548e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(src_stride), // %2 5558e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(dst_width), // %3 5568e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(source_y_fraction) // %4 5578e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : 5588e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "q0", "q1", "d4", "d5", "q13", "q14", "memory", "cc" 55964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com ); 56064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com} 561dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com 5621f923e3ea6de7afd9380c73f60a2f3e7b0588811fbarchard@google.comvoid ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride, 563dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com uint8* dst, int dst_width) { 564dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com asm volatile ( 565c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 566dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "1: \n" 567dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com // load even pixels into q0, odd into q1 568dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vld2.32 {q0, q1}, [%0]! \n" 569dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vld2.32 {q2, q3}, [%0]! \n" 570dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop 571dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vst1.8 {q1}, [%1]! \n" // store odd pixels 572dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vst1.8 {q3}, [%1]! \n" 573dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "bgt 1b \n" 574dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com : "+r"(src_ptr), // %0 575dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "+r"(dst), // %1 576dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "+r"(dst_width) // %2 577dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com : 578dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com : "memory", "cc", "q0", "q1", "q2", "q3" // Clobber List 579dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com ); 580dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com} 581dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com 582dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.comvoid ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, 583dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com uint8* dst, int dst_width) { 584dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com asm volatile ( 585dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com // change the stride to row 2 pointer 586dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "add %1, %1, %0 \n" 587c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 588dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "1: \n" 589dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. 590dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels. 591dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "subs %3, %3, #8 \n" // 8 processed per loop. 592dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts. 593dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. 594dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts. 595dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vpaddl.u8 q3, q3 \n" // A 16 bytes -> 8 shorts. 596dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vld4.8 {d16, d18, d20, d22}, [%1]! \n" // load 8 more ARGB pixels. 597dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vld4.8 {d17, d19, d21, d23}, [%1]! \n" // load last 8 ARGB pixels. 598dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vpadal.u8 q0, q8 \n" // B 16 bytes -> 8 shorts. 599dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vpadal.u8 q1, q9 \n" // G 16 bytes -> 8 shorts. 600dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vpadal.u8 q2, q10 \n" // R 16 bytes -> 8 shorts. 601dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vpadal.u8 q3, q11 \n" // A 16 bytes -> 8 shorts. 602dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vrshrn.u16 d0, q0, #2 \n" // downshift, round and pack 603dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vrshrn.u16 d1, q1, #2 \n" 604dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vrshrn.u16 d2, q2, #2 \n" 605dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vrshrn.u16 d3, q3, #2 \n" 606dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vst4.8 {d0, d1, d2, d3}, [%2]! \n" 607dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "bgt 1b \n" 608dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com : "+r"(src_ptr), // %0 609dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "+r"(src_stride), // %1 610dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "+r"(dst), // %2 611dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "+r"(dst_width) // %3 612dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com : 613dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11" 614dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com ); 615dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com} 616dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com 617dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com// Reads 4 pixels at a time. 618dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com// Alignment requirement: src_argb 4 byte aligned. 6191f923e3ea6de7afd9380c73f60a2f3e7b0588811fbarchard@google.comvoid ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride, 6201f923e3ea6de7afd9380c73f60a2f3e7b0588811fbarchard@google.com int src_stepx, uint8* dst_argb, int dst_width) { 621dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com asm volatile ( 622dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "mov r12, %3, lsl #2 \n" 623c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 624dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "1: \n" 625dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vld1.32 {d0[0]}, [%0], r12 \n" 626dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vld1.32 {d0[1]}, [%0], r12 \n" 627dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vld1.32 {d1[0]}, [%0], r12 \n" 628dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vld1.32 {d1[1]}, [%0], r12 \n" 629dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "subs %2, %2, #4 \n" // 4 pixels per loop. 630dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vst1.8 {q0}, [%1]! \n" 631dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "bgt 1b \n" 632dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com : "+r"(src_argb), // %0 633dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "+r"(dst_argb), // %1 634dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "+r"(dst_width) // %2 635dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com : "r"(src_stepx) // %3 636dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com : "memory", "cc", "r12", "q0" 637dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com ); 638dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com} 639dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com 640dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com// Reads 4 pixels at a time. 641dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com// Alignment requirement: src_argb 4 byte aligned. 642dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.comvoid ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride, 643dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com int src_stepx, 644dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com uint8* dst_argb, int dst_width) { 645dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com asm volatile ( 646c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "mov r12, %4, lsl #2 \n" 647c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "add %1, %1, %0 \n" 648c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 649dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "1: \n" 650c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vld1.8 {d0}, [%0], r12 \n" // Read 4 2x2 blocks -> 2x1 651c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vld1.8 {d1}, [%1], r12 \n" 652c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vld1.8 {d2}, [%0], r12 \n" 653c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vld1.8 {d3}, [%1], r12 \n" 654c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vld1.8 {d4}, [%0], r12 \n" 655c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vld1.8 {d5}, [%1], r12 \n" 656c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vld1.8 {d6}, [%0], r12 \n" 657c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vld1.8 {d7}, [%1], r12 \n" 658c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vaddl.u8 q0, d0, d1 \n" 659c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vaddl.u8 q1, d2, d3 \n" 660c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vaddl.u8 q2, d4, d5 \n" 661c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vaddl.u8 q3, d6, d7 \n" 662c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vswp.8 d1, d2 \n" // ab_cd -> ac_bd 663c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vswp.8 d5, d6 \n" // ef_gh -> eg_fh 664c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vadd.u16 q0, q0, q1 \n" // (a+b)_(c+d) 665c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vadd.u16 q2, q2, q3 \n" // (e+f)_(g+h) 666dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vrshrn.u16 d0, q0, #2 \n" // first 2 pixels. 667dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vrshrn.u16 d1, q2, #2 \n" // next 2 pixels. 668dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "subs %3, %3, #4 \n" // 4 pixels per loop. 669dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vst1.8 {q0}, [%2]! \n" 670dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "bgt 1b \n" 671dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com : "+r"(src_argb), // %0 672dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "+r"(src_stride), // %1 673dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "+r"(dst_argb), // %2 674dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "+r"(dst_width) // %3 675dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com : "r"(src_stepx) // %4 676dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com : "memory", "cc", "r12", "q0", "q1", "q2", "q3" 677dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com ); 678dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com} 679dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com 68064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com#endif // __ARM_NEON__ 68164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 68264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com#ifdef __cplusplus 68364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com} // extern "C" 68464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com} // namespace libyuv 68564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com#endif 686