164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com/* 264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com * Copyright 2011 The LibYuv Project Authors. All rights reserved. 364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com * 464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com * Use of this source code is governed by a BSD-style license 564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com * that can be found in the LICENSE file in the root of the source 664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com * tree. An additional intellectual property rights grant can be found 7cde587092fef0dbed2c35602f30b79e7b892e766fbarchard@google.com * in the file PATENTS. All contributing project authors may 864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com * be found in the AUTHORS file in the root of the source tree. 964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com */ 1064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 1164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com#include "libyuv/row.h" 1264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 1364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com#ifdef __cplusplus 1464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.comnamespace libyuv { 1564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.comextern "C" { 1664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com#endif 1764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 18dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com// This module is for GCC Neon. 194b7a04e86421f484f01da5262cded21431928574fbarchard@google.com#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) 2064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 2183a63e65a6bcbf59f7dc617d0c7275d50625884cfbarchard@google.com// NEON downscalers with interpolation. 2283a63e65a6bcbf59f7dc617d0c7275d50625884cfbarchard@google.com// Provided by Fritz Koenig 2364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 2406ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com// Read 32x1 throw away even pixels, and write 16x1. 251f923e3ea6de7afd9380c73f60a2f3e7b0588811fbarchard@google.comvoid ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride, 2664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com uint8* dst, int dst_width) { 2764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com asm volatile ( 28c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 2931d05032f7354baa4e84e0efe3321798c61e73bdfbarchard@google.com "1: \n" 3064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // load even pixels into q0, odd into q1 314e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(0) 322c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld2.8 {q0, q1}, [%0]! \n" 3364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "subs %2, %2, #16 \n" // 16 processed per loop 344e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(1) 352c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {q1}, [%1]! \n" // store odd pixels 3664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "bgt 1b \n" 378e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "+r"(src_ptr), // %0 388e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(dst), // %1 398e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(dst_width) // %2 408e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : 418e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "q0", "q1" // Clobber List 4264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com ); 4364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com} 4464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 4506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com// Read 32x2 average down and write 16x1. 468ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.comvoid ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, 4764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com uint8* dst, int dst_width) { 4864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com asm volatile ( 4964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // change the stride to row 2 pointer 5064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "add %1, %0 \n" 51c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 5231d05032f7354baa4e84e0efe3321798c61e73bdfbarchard@google.com "1: \n" 534e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(0) 542c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q0, q1}, [%0]! \n" // load row 1 and post inc 554e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(1) 562c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q2, q3}, [%1]! \n" // load row 2 and post inc 578e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "subs %3, %3, #16 \n" // 16 processed per loop 5864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vpaddl.u8 q0, q0 \n" // row 1 add adjacent 5964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vpaddl.u8 q1, q1 \n" 6064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vpadal.u8 q0, q2 \n" // row 2 add adjacent + row1 6164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vpadal.u8 q1, q3 \n" 6264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vrshrn.u16 d0, q0, #2 \n" // downshift, round and pack 6364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vrshrn.u16 d1, q1, #2 \n" 644e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(2) 652c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {q0}, [%2]! \n" 6664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "bgt 1b \n" 678e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "+r"(src_ptr), // %0 688e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(src_stride), // %1 698e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(dst), // %2 708e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(dst_width) // %3 718e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : 728e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "q0", "q1", "q2", "q3" // Clobber List 738e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com ); 7464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com} 7564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 761f923e3ea6de7afd9380c73f60a2f3e7b0588811fbarchard@google.comvoid ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride, 7764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com uint8* dst_ptr, int dst_width) { 7864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com asm volatile ( 79c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 8031d05032f7354baa4e84e0efe3321798c61e73bdfbarchard@google.com "1: \n" 814e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(0) 82c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0 83c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop 844e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(1) 85c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vst1.8 {d2}, [%1]! \n" 86c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "bgt 1b \n" 878e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "+r"(src_ptr), // %0 888e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(dst_ptr), // %1 898e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(dst_width) // %2 908e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : 918e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "q0", "q1", "memory", "cc" 9264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com ); 9364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com} 9464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 958ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.comvoid ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, 9664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com uint8* dst_ptr, int dst_width) { 974b7a04e86421f484f01da5262cded21431928574fbarchard@google.com const uint8* src_ptr1 = src_ptr + src_stride; 984b7a04e86421f484f01da5262cded21431928574fbarchard@google.com const uint8* src_ptr2 = src_ptr + src_stride * 2; 994b7a04e86421f484f01da5262cded21431928574fbarchard@google.com const uint8* src_ptr3 = src_ptr + src_stride * 3; 1004b7a04e86421f484f01da5262cded21431928574fbarchard@google.comasm volatile ( 101c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 10231d05032f7354baa4e84e0efe3321798c61e73bdfbarchard@google.com "1: \n" 1034e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(0) 1042c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q0}, [%0]! \n" // load up 16x4 1054e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(3) 1064b7a04e86421f484f01da5262cded21431928574fbarchard@google.com "vld1.8 {q1}, [%3]! \n" 1074b7a04e86421f484f01da5262cded21431928574fbarchard@google.com MEMACCESS(4) 1084b7a04e86421f484f01da5262cded21431928574fbarchard@google.com "vld1.8 {q2}, [%4]! \n" 1094b7a04e86421f484f01da5262cded21431928574fbarchard@google.com MEMACCESS(5) 1104b7a04e86421f484f01da5262cded21431928574fbarchard@google.com "vld1.8 {q3}, [%5]! \n" 1118ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.com "subs %2, %2, #4 \n" 11264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vpaddl.u8 q0, q0 \n" 11364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vpadal.u8 q0, q1 \n" 11464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vpadal.u8 q0, q2 \n" 11564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vpadal.u8 q0, q3 \n" 11664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vpaddl.u16 q0, q0 \n" 11764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vrshrn.u32 d0, q0, #4 \n" // divide by 16 w/rounding 11864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmovn.u16 d0, q0 \n" 1194e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(1) 1202c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.32 {d0[0]}, [%1]! \n" 12164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "bgt 1b \n" 1224b7a04e86421f484f01da5262cded21431928574fbarchard@google.com : "+r"(src_ptr), // %0 1234b7a04e86421f484f01da5262cded21431928574fbarchard@google.com "+r"(dst_ptr), // %1 1244b7a04e86421f484f01da5262cded21431928574fbarchard@google.com "+r"(dst_width), // %2 1254b7a04e86421f484f01da5262cded21431928574fbarchard@google.com "+r"(src_ptr1), // %3 1264b7a04e86421f484f01da5262cded21431928574fbarchard@google.com "+r"(src_ptr2), // %4 1274b7a04e86421f484f01da5262cded21431928574fbarchard@google.com "+r"(src_ptr3) // %5 1284b7a04e86421f484f01da5262cded21431928574fbarchard@google.com : 1294b7a04e86421f484f01da5262cded21431928574fbarchard@google.com : "q0", "q1", "q2", "q3", "memory", "cc" 13064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com ); 13164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com} 13264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 13364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com// Down scale from 4 to 3 pixels. Use the neon multilane read/write 13464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com// to load up the every 4th pixel into a 4 different registers. 13564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com// Point samples 32 pixels to 24 pixels. 13664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.comvoid ScaleRowDown34_NEON(const uint8* src_ptr, 1371f923e3ea6de7afd9380c73f60a2f3e7b0588811fbarchard@google.com ptrdiff_t src_stride, 13864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com uint8* dst_ptr, int dst_width) { 13964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com asm volatile ( 140c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 14131d05032f7354baa4e84e0efe3321798c61e73bdfbarchard@google.com "1: \n" 1424e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(0) 143c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0 144c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "subs %2, %2, #24 \n" 145c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vmov d2, d3 \n" // order d0, d1, d2 1464e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(1) 147c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vst3.8 {d0, d1, d2}, [%1]! \n" 148c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "bgt 1b \n" 1498e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "+r"(src_ptr), // %0 1508e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(dst_ptr), // %1 1518e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(dst_width) // %2 1528e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : 1538e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "d0", "d1", "d2", "d3", "memory", "cc" 15464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com ); 15564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com} 15664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 1578ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.comvoid ScaleRowDown34_0_Box_NEON(const uint8* src_ptr, 15864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com ptrdiff_t src_stride, 15964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com uint8* dst_ptr, int dst_width) { 16064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com asm volatile ( 161c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vmov.u8 d24, #3 \n" 162c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "add %3, %0 \n" 163c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 16431d05032f7354baa4e84e0efe3321798c61e73bdfbarchard@google.com "1: \n" 1654e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(0) 1662c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0 1674e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(3) 1682c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld4.8 {d4, d5, d6, d7}, [%3]! \n" // src line 1 1698ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.com "subs %2, %2, #24 \n" 17064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 17164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // filter src line 0 with src line 1 17264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // expand chars to shorts to allow for room 17364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // when adding lines together 17464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmovl.u8 q8, d4 \n" 17564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmovl.u8 q9, d5 \n" 17664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmovl.u8 q10, d6 \n" 17764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmovl.u8 q11, d7 \n" 17864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 17964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // 3 * line_0 + line_1 18064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmlal.u8 q8, d0, d24 \n" 18164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmlal.u8 q9, d1, d24 \n" 18264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmlal.u8 q10, d2, d24 \n" 18364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmlal.u8 q11, d3, d24 \n" 18464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 18564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // (3 * line_0 + line_1) >> 2 18664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vqrshrn.u16 d0, q8, #2 \n" 18764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vqrshrn.u16 d1, q9, #2 \n" 18864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vqrshrn.u16 d2, q10, #2 \n" 18964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vqrshrn.u16 d3, q11, #2 \n" 19064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 19164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // a0 = (src[0] * 3 + s[1] * 1) >> 2 19264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmovl.u8 q8, d1 \n" 19364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmlal.u8 q8, d0, d24 \n" 19464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vqrshrn.u16 d0, q8, #2 \n" 19564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 19664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // a1 = (src[1] * 1 + s[2] * 1) >> 1 19764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vrhadd.u8 d1, d1, d2 \n" 19864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 19964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // a2 = (src[2] * 1 + s[3] * 3) >> 2 20064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmovl.u8 q8, d2 \n" 20164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmlal.u8 q8, d3, d24 \n" 20264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vqrshrn.u16 d2, q8, #2 \n" 20364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 2044e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(1) 2052c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst3.8 {d0, d1, d2}, [%1]! \n" 20664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 20764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "bgt 1b \n" 2088e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "+r"(src_ptr), // %0 2098e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(dst_ptr), // %1 2108e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(dst_width), // %2 2118e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(src_stride) // %3 2128e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : 2138e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "d24", "memory", "cc" 21464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com ); 21564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com} 21664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 2178ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.comvoid ScaleRowDown34_1_Box_NEON(const uint8* src_ptr, 21864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com ptrdiff_t src_stride, 21964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com uint8* dst_ptr, int dst_width) { 22064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com asm volatile ( 221c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vmov.u8 d24, #3 \n" 222c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "add %3, %0 \n" 223c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 22431d05032f7354baa4e84e0efe3321798c61e73bdfbarchard@google.com "1: \n" 2254e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(0) 2262c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0 2274e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(3) 2282c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld4.8 {d4, d5, d6, d7}, [%3]! \n" // src line 1 2298ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.com "subs %2, %2, #24 \n" 23064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // average src line 0 with src line 1 23164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vrhadd.u8 q0, q0, q2 \n" 23264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vrhadd.u8 q1, q1, q3 \n" 23364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 23464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // a0 = (src[0] * 3 + s[1] * 1) >> 2 23564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmovl.u8 q3, d1 \n" 23664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmlal.u8 q3, d0, d24 \n" 23764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vqrshrn.u16 d0, q3, #2 \n" 23864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 23964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // a1 = (src[1] * 1 + s[2] * 1) >> 1 24064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vrhadd.u8 d1, d1, d2 \n" 24164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 24264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // a2 = (src[2] * 1 + s[3] * 3) >> 2 24364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmovl.u8 q3, d2 \n" 24464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmlal.u8 q3, d3, d24 \n" 24564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vqrshrn.u16 d2, q3, #2 \n" 24664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 2474e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(1) 2482c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst3.8 {d0, d1, d2}, [%1]! \n" 24964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "bgt 1b \n" 2508e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "+r"(src_ptr), // %0 2518e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(dst_ptr), // %1 2528e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(dst_width), // %2 2538e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(src_stride) // %3 2548e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : 2558e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "r4", "q0", "q1", "q2", "q3", "d24", "memory", "cc" 25664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com ); 25764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com} 25864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 25964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com#define HAS_SCALEROWDOWN38_NEON 260f2aa91a1ac08703d5a22af7fa48c59eba8eb397afbarchard@google.comstatic uvec8 kShuf38 = 26164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com { 0, 3, 6, 8, 11, 14, 16, 19, 22, 24, 27, 30, 0, 0, 0, 0 }; 262f2aa91a1ac08703d5a22af7fa48c59eba8eb397afbarchard@google.comstatic uvec8 kShuf38_2 = 26364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com { 0, 8, 16, 2, 10, 17, 4, 12, 18, 6, 14, 19, 0, 0, 0, 0 }; 264f2aa91a1ac08703d5a22af7fa48c59eba8eb397afbarchard@google.comstatic vec16 kMult38_Div6 = 26564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com { 65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12, 26664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12 }; 267f2aa91a1ac08703d5a22af7fa48c59eba8eb397afbarchard@google.comstatic vec16 kMult38_Div9 = 26864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com { 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18, 26964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18 }; 27064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 27164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com// 32 -> 12 27264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.comvoid ScaleRowDown38_NEON(const uint8* src_ptr, 2731f923e3ea6de7afd9380c73f60a2f3e7b0588811fbarchard@google.com ptrdiff_t src_stride, 27464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com uint8* dst_ptr, int dst_width) { 27564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com asm volatile ( 2764e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(3) 277c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vld1.8 {q3}, [%3] \n" 278c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 27931d05032f7354baa4e84e0efe3321798c61e73bdfbarchard@google.com "1: \n" 2804e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(0) 281c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vld1.8 {d0, d1, d2, d3}, [%0]! \n" 282c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "subs %2, %2, #12 \n" 283c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vtbl.u8 d4, {d0, d1, d2, d3}, d6 \n" 284c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vtbl.u8 d5, {d0, d1, d2, d3}, d7 \n" 2854e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(1) 286c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vst1.8 {d4}, [%1]! \n" 2874e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(1) 288c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vst1.32 {d5[0]}, [%1]! \n" 289c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "bgt 1b \n" 2908e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "+r"(src_ptr), // %0 2918e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(dst_ptr), // %1 2928e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(dst_width) // %2 2938e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "r"(&kShuf38) // %3 2948e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "d0", "d1", "d2", "d3", "d4", "d5", "memory", "cc" 29564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com ); 29664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com} 29764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 29864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com// 32x3 -> 12x1 2998ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.comvoid OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr, 30064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com ptrdiff_t src_stride, 30164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com uint8* dst_ptr, int dst_width) { 3024b7a04e86421f484f01da5262cded21431928574fbarchard@google.com const uint8* src_ptr1 = src_ptr + src_stride * 2; 3034b7a04e86421f484f01da5262cded21431928574fbarchard@google.com 30464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com asm volatile ( 305f67b426bdf3cb76514a056fa0207d2a47a8d8486fbarchard@google.com MEMACCESS(5) 3064b7a04e86421f484f01da5262cded21431928574fbarchard@google.com "vld1.16 {q13}, [%5] \n" 307f67b426bdf3cb76514a056fa0207d2a47a8d8486fbarchard@google.com MEMACCESS(6) 3084b7a04e86421f484f01da5262cded21431928574fbarchard@google.com "vld1.8 {q14}, [%6] \n" 309f67b426bdf3cb76514a056fa0207d2a47a8d8486fbarchard@google.com MEMACCESS(7) 3104b7a04e86421f484f01da5262cded21431928574fbarchard@google.com "vld1.8 {q15}, [%7] \n" 311c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "add %3, %0 \n" 312c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 31331d05032f7354baa4e84e0efe3321798c61e73bdfbarchard@google.com "1: \n" 31464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 31564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d0 = 00 40 01 41 02 42 03 43 31664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d1 = 10 50 11 51 12 52 13 53 31764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d2 = 20 60 21 61 22 62 23 63 31864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d3 = 30 70 31 71 32 72 33 73 3194e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(0) 3202c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0]! \n" 3214e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(3) 3222c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld4.8 {d4, d5, d6, d7}, [%3]! \n" 323f67b426bdf3cb76514a056fa0207d2a47a8d8486fbarchard@google.com MEMACCESS(4) 3244b7a04e86421f484f01da5262cded21431928574fbarchard@google.com "vld4.8 {d16, d17, d18, d19}, [%4]! \n" 3258ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.com "subs %2, %2, #12 \n" 32664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 32764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // Shuffle the input data around to get align the data 32864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7 32964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d0 = 00 10 01 11 02 12 03 13 33064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d1 = 40 50 41 51 42 52 43 53 33164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vtrn.u8 d0, d1 \n" 33264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vtrn.u8 d4, d5 \n" 33364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vtrn.u8 d16, d17 \n" 33464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 33564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d2 = 20 30 21 31 22 32 23 33 33664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d3 = 60 70 61 71 62 72 63 73 33764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vtrn.u8 d2, d3 \n" 33864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vtrn.u8 d6, d7 \n" 33964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vtrn.u8 d18, d19 \n" 34064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 34164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d0 = 00+10 01+11 02+12 03+13 34264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d2 = 40+50 41+51 42+52 43+53 34364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vpaddl.u8 q0, q0 \n" 34464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vpaddl.u8 q2, q2 \n" 34564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vpaddl.u8 q8, q8 \n" 34664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 34764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d3 = 60+70 61+71 62+72 63+73 34864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vpaddl.u8 d3, d3 \n" 34964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vpaddl.u8 d7, d7 \n" 35064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vpaddl.u8 d19, d19 \n" 35164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 35264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // combine source lines 35364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vadd.u16 q0, q2 \n" 35464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vadd.u16 q0, q8 \n" 35564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vadd.u16 d4, d3, d7 \n" 35664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vadd.u16 d4, d19 \n" 35764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 35864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // dst_ptr[3] = (s[6 + st * 0] + s[7 + st * 0] 35964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // + s[6 + st * 1] + s[7 + st * 1] 36064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // + s[6 + st * 2] + s[7 + st * 2]) / 6 36164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vqrdmulh.s16 q2, q2, q13 \n" 36264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmovn.u16 d4, q2 \n" 36364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 36464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // Shuffle 2,3 reg around so that 2 can be added to the 36564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // 0,1 reg and 3 can be added to the 4,5 reg. This 36664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // requires expanding from u8 to u16 as the 0,1 and 4,5 36764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // registers are already expanded. Then do transposes 36864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // to get aligned. 36964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // q2 = xx 20 xx 30 xx 21 xx 31 xx 22 xx 32 xx 23 xx 33 37064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmovl.u8 q1, d2 \n" 37164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmovl.u8 q3, d6 \n" 37264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmovl.u8 q9, d18 \n" 37364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 37464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // combine source lines 37564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vadd.u16 q1, q3 \n" 37664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vadd.u16 q1, q9 \n" 37764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 37864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d4 = xx 20 xx 30 xx 22 xx 32 37964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d5 = xx 21 xx 31 xx 23 xx 33 38064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vtrn.u32 d2, d3 \n" 38164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 38264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d4 = xx 20 xx 21 xx 22 xx 23 38364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d5 = xx 30 xx 31 xx 32 xx 33 38464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vtrn.u16 d2, d3 \n" 38564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 38664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // 0+1+2, 3+4+5 38764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vadd.u16 q0, q1 \n" 38864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 38964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // Need to divide, but can't downshift as the the value 39064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // isn't a power of 2. So multiply by 65536 / n 39164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // and take the upper 16 bits. 39264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vqrdmulh.s16 q0, q0, q15 \n" 39364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 39464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // Align for table lookup, vtbl requires registers to 39564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // be adjacent 39664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmov.u8 d2, d4 \n" 39764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 39864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vtbl.u8 d3, {d0, d1, d2}, d28 \n" 39964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vtbl.u8 d4, {d0, d1, d2}, d29 \n" 40064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 4014e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(1) 4022c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {d3}, [%1]! \n" 4034e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(1) 4042c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.32 {d4[0]}, [%1]! \n" 40564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "bgt 1b \n" 4068e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "+r"(src_ptr), // %0 4078e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(dst_ptr), // %1 4088e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(dst_width), // %2 4094b7a04e86421f484f01da5262cded21431928574fbarchard@google.com "+r"(src_stride), // %3 4104b7a04e86421f484f01da5262cded21431928574fbarchard@google.com "+r"(src_ptr1) // %4 4114b7a04e86421f484f01da5262cded21431928574fbarchard@google.com : "r"(&kMult38_Div6), // %5 4124b7a04e86421f484f01da5262cded21431928574fbarchard@google.com "r"(&kShuf38_2), // %6 4134b7a04e86421f484f01da5262cded21431928574fbarchard@google.com "r"(&kMult38_Div9) // %7 4144b7a04e86421f484f01da5262cded21431928574fbarchard@google.com : "q0", "q1", "q2", "q3", "q8", "q9", "q13", "q14", "q15", "memory", "cc" 41564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com ); 41664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com} 41764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 41864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com// 32x2 -> 12x1 4198ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.comvoid ScaleRowDown38_2_Box_NEON(const uint8* src_ptr, 42064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com ptrdiff_t src_stride, 42164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com uint8* dst_ptr, int dst_width) { 42264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com asm volatile ( 4234e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(4) 424c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vld1.16 {q13}, [%4] \n" 4254e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(5) 426c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vld1.8 {q14}, [%5] \n" 427c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "add %3, %0 \n" 428c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 42931d05032f7354baa4e84e0efe3321798c61e73bdfbarchard@google.com "1: \n" 43064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 43164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d0 = 00 40 01 41 02 42 03 43 43264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d1 = 10 50 11 51 12 52 13 53 43364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d2 = 20 60 21 61 22 62 23 63 43464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d3 = 30 70 31 71 32 72 33 73 4354e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(0) 4362c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0]! \n" 4374e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(3) 4382c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld4.8 {d4, d5, d6, d7}, [%3]! \n" 4398ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.com "subs %2, %2, #12 \n" 44064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 44164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // Shuffle the input data around to get align the data 44264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7 44364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d0 = 00 10 01 11 02 12 03 13 44464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d1 = 40 50 41 51 42 52 43 53 44564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vtrn.u8 d0, d1 \n" 44664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vtrn.u8 d4, d5 \n" 44764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 44864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d2 = 20 30 21 31 22 32 23 33 44964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d3 = 60 70 61 71 62 72 63 73 45064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vtrn.u8 d2, d3 \n" 45164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vtrn.u8 d6, d7 \n" 45264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 45364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d0 = 00+10 01+11 02+12 03+13 45464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d2 = 40+50 41+51 42+52 43+53 45564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vpaddl.u8 q0, q0 \n" 45664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vpaddl.u8 q2, q2 \n" 45764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 45864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d3 = 60+70 61+71 62+72 63+73 45964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vpaddl.u8 d3, d3 \n" 46064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vpaddl.u8 d7, d7 \n" 46164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 46264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // combine source lines 46364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vadd.u16 q0, q2 \n" 46464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vadd.u16 d4, d3, d7 \n" 46564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 46664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // dst_ptr[3] = (s[6] + s[7] + s[6+st] + s[7+st]) / 4 46764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vqrshrn.u16 d4, q2, #2 \n" 46864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 46964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // Shuffle 2,3 reg around so that 2 can be added to the 47064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // 0,1 reg and 3 can be added to the 4,5 reg. This 47164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // requires expanding from u8 to u16 as the 0,1 and 4,5 47264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // registers are already expanded. Then do transposes 47364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // to get aligned. 47464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // q2 = xx 20 xx 30 xx 21 xx 31 xx 22 xx 32 xx 23 xx 33 47564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmovl.u8 q1, d2 \n" 47664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmovl.u8 q3, d6 \n" 47764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 47864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // combine source lines 47964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vadd.u16 q1, q3 \n" 48064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 48164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d4 = xx 20 xx 30 xx 22 xx 32 48264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d5 = xx 21 xx 31 xx 23 xx 33 48364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vtrn.u32 d2, d3 \n" 48464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 48564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d4 = xx 20 xx 21 xx 22 xx 23 48664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // d5 = xx 30 xx 31 xx 32 xx 33 48764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vtrn.u16 d2, d3 \n" 48864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 48964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // 0+1+2, 3+4+5 49064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vadd.u16 q0, q1 \n" 49164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 49264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // Need to divide, but can't downshift as the the value 49364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // isn't a power of 2. So multiply by 65536 / n 49464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // and take the upper 16 bits. 49564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vqrdmulh.s16 q0, q0, q13 \n" 49664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 49764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // Align for table lookup, vtbl requires registers to 49864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com // be adjacent 49964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmov.u8 d2, d4 \n" 50064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 50164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vtbl.u8 d3, {d0, d1, d2}, d28 \n" 50264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vtbl.u8 d4, {d0, d1, d2}, d29 \n" 50364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 5044e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(1) 5052c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {d3}, [%1]! \n" 5064e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(1) 5072c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.32 {d4[0]}, [%1]! \n" 50864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "bgt 1b \n" 5098e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "+r"(src_ptr), // %0 5108e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(dst_ptr), // %1 5118e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(dst_width), // %2 5128e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(src_stride) // %3 5138e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "r"(&kMult38_Div6), // %4 5148e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "r"(&kShuf38_2) // %5 5158e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "q0", "q1", "q2", "q3", "q13", "q14", "memory", "cc" 51664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com ); 51764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com} 51864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 51964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com// 16x2 -> 16x1 52064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.comvoid ScaleFilterRows_NEON(uint8* dst_ptr, 52164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com const uint8* src_ptr, ptrdiff_t src_stride, 52264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com int dst_width, int source_y_fraction) { 52364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com asm volatile ( 52464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "cmp %4, #0 \n" 52566d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "beq 100f \n" 52664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "add %2, %1 \n" 52766d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "cmp %4, #64 \n" 52866d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "beq 75f \n" 52964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "cmp %4, #128 \n" 53066d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "beq 50f \n" 53166d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "cmp %4, #192 \n" 53266d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "beq 25f \n" 53364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 53464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vdup.8 d5, %4 \n" 53564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "rsb %4, #256 \n" 53664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vdup.8 d4, %4 \n" 53766d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com // General purpose row blend. 53831d05032f7354baa4e84e0efe3321798c61e73bdfbarchard@google.com "1: \n" 5394e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(1) 5402c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q0}, [%1]! \n" 5414e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(2) 5422c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q1}, [%2]! \n" 5438ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.com "subs %3, %3, #16 \n" 54464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmull.u8 q13, d0, d4 \n" 54564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmull.u8 q14, d1, d4 \n" 54664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmlal.u8 q13, d2, d5 \n" 54764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmlal.u8 q14, d3, d5 \n" 54864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vrshrn.u16 d0, q13, #8 \n" 54964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vrshrn.u16 d1, q14, #8 \n" 5504e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(0) 5512c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {q0}, [%0]! \n" 55264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "bgt 1b \n" 55366d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "b 99f \n" 55464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 55566d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com // Blend 25 / 75. 55666d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "25: \n" 5574e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(1) 5582c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q0}, [%1]! \n" 5594e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(2) 5602c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q1}, [%2]! \n" 5618ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.com "subs %3, %3, #16 \n" 56266d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "vrhadd.u8 q0, q1 \n" 56366d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "vrhadd.u8 q0, q1 \n" 5644e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(0) 5652c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {q0}, [%0]! \n" 56666d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "bgt 25b \n" 56766d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "b 99f \n" 56864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 56966d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com // Blend 50 / 50. 57066d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "50: \n" 5714e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(1) 5722c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q0}, [%1]! \n" 5734e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(2) 5742c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q1}, [%2]! \n" 5758ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.com "subs %3, %3, #16 \n" 57664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vrhadd.u8 q0, q1 \n" 5774e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(0) 5782c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {q0}, [%0]! \n" 57966d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "bgt 50b \n" 58066d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "b 99f \n" 58166d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com 58266d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com // Blend 75 / 25. 58366d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "75: \n" 5844e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(1) 5852c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q1}, [%1]! \n" 5864e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(2) 5872c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q0}, [%2]! \n" 5888ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.com "subs %3, %3, #16 \n" 58966d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "vrhadd.u8 q0, q1 \n" 59066d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "vrhadd.u8 q0, q1 \n" 5914e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(0) 5922c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {q0}, [%0]! \n" 59366d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "bgt 75b \n" 59466d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "b 99f \n" 59566d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com 59666d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com // Blend 100 / 0 - Copy row unchanged. 59766d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "100: \n" 5984e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(1) 5992c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q0}, [%1]! \n" 6008ffe78abd29e43ca344734af9fb0fc91067e7d1cfbarchard@google.com "subs %3, %3, #16 \n" 6014e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(0) 6022c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {q0}, [%0]! \n" 60366d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "bgt 100b \n" 60466d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com 60566d16f4100d7aa8c9bab5d2003a2b177c3ccc798fbarchard@google.com "99: \n" 6064e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(0) 6072c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {d1[7]}, [%0] \n" 6088e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "+r"(dst_ptr), // %0 6098e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(src_ptr), // %1 6108e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(src_stride), // %2 6118e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(dst_width), // %3 6128e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com "+r"(source_y_fraction) // %4 6138e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : 6148e50e619dbf904099d2be5f42ae77ea06dd4c503fbarchard@google.com : "q0", "q1", "d4", "d5", "q13", "q14", "memory", "cc" 61564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com ); 61664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com} 617dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com 6181f923e3ea6de7afd9380c73f60a2f3e7b0588811fbarchard@google.comvoid ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride, 619dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com uint8* dst, int dst_width) { 620dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com asm volatile ( 621c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 622dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "1: \n" 623dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com // load even pixels into q0, odd into q1 6244e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(0) 625dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vld2.32 {q0, q1}, [%0]! \n" 6264e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(0) 627dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vld2.32 {q2, q3}, [%0]! \n" 628dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop 6294e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(1) 630dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vst1.8 {q1}, [%1]! \n" // store odd pixels 6314e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(1) 632dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vst1.8 {q3}, [%1]! \n" 633dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "bgt 1b \n" 634dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com : "+r"(src_ptr), // %0 635dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "+r"(dst), // %1 636dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "+r"(dst_width) // %2 637dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com : 638dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com : "memory", "cc", "q0", "q1", "q2", "q3" // Clobber List 639dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com ); 640dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com} 641dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com 642dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.comvoid ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, 643dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com uint8* dst, int dst_width) { 644dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com asm volatile ( 645dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com // change the stride to row 2 pointer 646dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "add %1, %1, %0 \n" 647c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 648dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "1: \n" 6494e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(0) 650dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. 6514e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(0) 652dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels. 653dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "subs %3, %3, #8 \n" // 8 processed per loop. 654dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts. 655dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. 656dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts. 657dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vpaddl.u8 q3, q3 \n" // A 16 bytes -> 8 shorts. 6584e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(1) 659dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vld4.8 {d16, d18, d20, d22}, [%1]! \n" // load 8 more ARGB pixels. 6604e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(1) 661dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vld4.8 {d17, d19, d21, d23}, [%1]! \n" // load last 8 ARGB pixels. 662dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vpadal.u8 q0, q8 \n" // B 16 bytes -> 8 shorts. 663dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vpadal.u8 q1, q9 \n" // G 16 bytes -> 8 shorts. 664dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vpadal.u8 q2, q10 \n" // R 16 bytes -> 8 shorts. 665dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vpadal.u8 q3, q11 \n" // A 16 bytes -> 8 shorts. 666dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vrshrn.u16 d0, q0, #2 \n" // downshift, round and pack 667dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vrshrn.u16 d1, q1, #2 \n" 668dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vrshrn.u16 d2, q2, #2 \n" 669dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vrshrn.u16 d3, q3, #2 \n" 6704e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(2) 671dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vst4.8 {d0, d1, d2, d3}, [%2]! \n" 672dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "bgt 1b \n" 673dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com : "+r"(src_ptr), // %0 674dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "+r"(src_stride), // %1 675dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "+r"(dst), // %2 676dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "+r"(dst_width) // %3 677dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com : 678dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11" 679dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com ); 680dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com} 681dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com 682dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com// Reads 4 pixels at a time. 683dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com// Alignment requirement: src_argb 4 byte aligned. 6841f923e3ea6de7afd9380c73f60a2f3e7b0588811fbarchard@google.comvoid ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride, 6851f923e3ea6de7afd9380c73f60a2f3e7b0588811fbarchard@google.com int src_stepx, uint8* dst_argb, int dst_width) { 686dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com asm volatile ( 687dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "mov r12, %3, lsl #2 \n" 688c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 689dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "1: \n" 6904e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(0) 691dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vld1.32 {d0[0]}, [%0], r12 \n" 6924e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(0) 693dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vld1.32 {d0[1]}, [%0], r12 \n" 6944e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(0) 695dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vld1.32 {d1[0]}, [%0], r12 \n" 6964e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(0) 697dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vld1.32 {d1[1]}, [%0], r12 \n" 698dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "subs %2, %2, #4 \n" // 4 pixels per loop. 6994e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(1) 700dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vst1.8 {q0}, [%1]! \n" 701dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "bgt 1b \n" 702dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com : "+r"(src_argb), // %0 703dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "+r"(dst_argb), // %1 704dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "+r"(dst_width) // %2 705dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com : "r"(src_stepx) // %3 706dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com : "memory", "cc", "r12", "q0" 707dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com ); 708dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com} 709dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com 710dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com// Reads 4 pixels at a time. 711dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com// Alignment requirement: src_argb 4 byte aligned. 712dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.comvoid ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride, 713dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com int src_stepx, 714dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com uint8* dst_argb, int dst_width) { 715dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com asm volatile ( 716c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "mov r12, %4, lsl #2 \n" 717c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "add %1, %1, %0 \n" 718c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 719dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "1: \n" 7204e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(0) 721c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vld1.8 {d0}, [%0], r12 \n" // Read 4 2x2 blocks -> 2x1 7224e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(1) 723c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vld1.8 {d1}, [%1], r12 \n" 7244e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(0) 725c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vld1.8 {d2}, [%0], r12 \n" 7264e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(1) 727c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vld1.8 {d3}, [%1], r12 \n" 7284e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(0) 729c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vld1.8 {d4}, [%0], r12 \n" 7304e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(1) 731c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vld1.8 {d5}, [%1], r12 \n" 7324e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(0) 733c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vld1.8 {d6}, [%0], r12 \n" 7344e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(1) 735c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vld1.8 {d7}, [%1], r12 \n" 736c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vaddl.u8 q0, d0, d1 \n" 737c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vaddl.u8 q1, d2, d3 \n" 738c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vaddl.u8 q2, d4, d5 \n" 739c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vaddl.u8 q3, d6, d7 \n" 740c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vswp.8 d1, d2 \n" // ab_cd -> ac_bd 741c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vswp.8 d5, d6 \n" // ef_gh -> eg_fh 742c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vadd.u16 q0, q0, q1 \n" // (a+b)_(c+d) 743c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com "vadd.u16 q2, q2, q3 \n" // (e+f)_(g+h) 744dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vrshrn.u16 d0, q0, #2 \n" // first 2 pixels. 745dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vrshrn.u16 d1, q2, #2 \n" // next 2 pixels. 746dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "subs %3, %3, #4 \n" // 4 pixels per loop. 7474e5e44e21e511f20d1fa326ca369ba2458469d57fbarchard@google.com MEMACCESS(2) 748dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "vst1.8 {q0}, [%2]! \n" 749dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "bgt 1b \n" 750dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com : "+r"(src_argb), // %0 751dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "+r"(src_stride), // %1 752dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "+r"(dst_argb), // %2 753dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com "+r"(dst_width) // %3 754dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com : "r"(src_stepx) // %4 755dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com : "memory", "cc", "r12", "q0", "q1", "q2", "q3" 756dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com ); 757dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com} 758dbe4814361fb8fcbc462bbe45a2f39360e14a982fbarchard@google.com 75964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com#endif // __ARM_NEON__ 76064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 76164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com#ifdef __cplusplus 76264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com} // extern "C" 76364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com} // namespace libyuv 76464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com#endif 765