133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp/* 233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * Copyright 2011 The LibYuv Project Authors. All rights reserved. 333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * 433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * Use of this source code is governed by a BSD-style license 533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * that can be found in the LICENSE file in the root of the source 633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * tree. An additional intellectual property rights grant can be found 733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * in the file PATENTS. All contributing project authors may 833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp * be found in the AUTHORS file in the root of the source tree. 933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp */ 1033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 1133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#include "libyuv/row.h" 1233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 1333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef __cplusplus 1433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampnamespace libyuv { 1533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampextern "C" { 1633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 1733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 1833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// This module is for GCC Neon 1933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if !defined(YUV_DISABLE_ASM) && defined(__ARM_NEON__) 2033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 2133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Read 8 Y, 4 U and 4 V from 422 2233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define READYUV422 \ 2333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vld1.u8 {d0}, [%0]! \n" \ 2433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vld1.u32 {d2[0]}, [%1]! \n" \ 2533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vld1.u32 {d2[1]}, [%2]! \n" 2633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 2733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Read 8 Y and 4 UV from NV12 2833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define READNV12 \ 2933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vld1.u8 {d0}, [%0]! \n" \ 3033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vld1.u8 {d2}, [%1]! \n" \ 3133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vmov.u8 d3, d2 \n"/* split odd/even uv apart */\ 3233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vuzp.u8 d2, d3 \n" \ 3333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vtrn.u32 d2, d3 \n" \ 3433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 3533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Read 8 Y and 4 VU from NV21 3633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define READNV21 \ 3733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vld1.u8 {d0}, [%0]! \n" \ 3833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vld1.u8 {d2}, [%1]! \n" \ 3933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vmov.u8 d3, d2 \n"/* split odd/even uv apart */\ 4033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vuzp.u8 d3, d2 \n" \ 4133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vtrn.u32 d2, d3 \n" \ 4233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 4333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#define YUV422TORGB \ 4433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "veor.u8 d2, d26 \n"/*subtract 128 from u and v*/\ 4533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vmull.s8 q8, d2, d24 \n"/* u/v B/R component */\ 4633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vmull.s8 q9, d2, d25 \n"/* u/v G component */\ 4733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vmov.u8 d1, #0 \n"/* split odd/even y apart */\ 4833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vtrn.u8 d0, d1 \n" \ 4933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vsub.s16 q0, q0, q15 \n"/* offset y */\ 5033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vmul.s16 q0, q0, q14 \n" \ 5133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vadd.s16 d18, d19 \n" \ 5233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vqadd.s16 d20, d0, d16 \n" \ 5333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vqadd.s16 d21, d1, d16 \n" \ 5433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vqadd.s16 d22, d0, d17 \n" \ 5533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vqadd.s16 d23, d1, d17 \n" \ 5633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vqadd.s16 d16, d0, d18 \n" \ 5733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vqadd.s16 d17, d1, d18 \n" \ 5833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vqrshrun.s16 d0, q10, #6 \n" \ 5933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vqrshrun.s16 d1, q11, #6 \n" \ 6033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vqrshrun.s16 d2, q8, #6 \n" \ 6133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vmovl.u8 q10, d0 \n"/* set up for reinterleave*/\ 6233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vmovl.u8 q11, d1 \n" \ 6333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vmovl.u8 q8, d2 \n" \ 6433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vtrn.u8 d20, d21 \n" \ 6533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vtrn.u8 d22, d23 \n" \ 6633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vtrn.u8 d16, d17 \n" \ 6733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vmov.u8 d21, d16 \n" 6833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 6933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#if defined(HAS_I422TOARGBROW_NEON) || defined(HAS_I422TOBGRAROW_NEON) || \ 7033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp defined(HAS_I422TOABGRROW_NEON) || defined(HAS_I422TORGBAROW_NEON) 7133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampstatic const vec8 kUVToRB = { 127, 127, 127, 127, 102, 102, 102, 102, 7233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 0, 0, 0, 0, 0, 0, 0, 0 }; 7333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampstatic const vec8 kUVToG = { -25, -25, -25, -25, -52, -52, -52, -52, 7433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 0, 0, 0, 0, 0, 0, 0, 0 }; 7533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 7633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 7733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_I422TOARGBROW_NEON 7833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid I422ToARGBRow_NEON(const uint8* y_buf, 7933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* u_buf, 8033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* v_buf, 8133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* rgb_buf, 8233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int width) { 8333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 8433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vld1.u8 {d24}, [%5] \n" 8533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vld1.u8 {d25}, [%6] \n" 8633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vmov.u8 d26, #128 \n" 8733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vmov.u16 q14, #74 \n" 8833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vmov.u16 q15, #16 \n" 8933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 2 \n" 9033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 9133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp READYUV422 9233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp YUV422TORGB 9333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "subs %4, %4, #8 \n" 9433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vmov.u8 d23, #255 \n" 9533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vst4.8 {d20, d21, d22, d23}, [%3]! \n" 9633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "bgt 1b \n" 9733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(y_buf), // %0 9833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(u_buf), // %1 9933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(v_buf), // %2 10033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(rgb_buf), // %3 10133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(width) // %4 10233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "r"(&kUVToRB), // %5 10333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "r"(&kUVToG) // %6 10433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "cc", "memory", "q0", "q1", "q2", "q3", 10533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 10633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 10733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 10833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_I422TOARGBROW_NEON 10933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 11033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_I422TOBGRAROW_NEON 11133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid I422ToBGRARow_NEON(const uint8* y_buf, 11233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* u_buf, 11333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* v_buf, 11433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* rgb_buf, 11533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int width) { 11633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 11733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vld1.u8 {d24}, [%5] \n" 11833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vld1.u8 {d25}, [%6] \n" 11933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vmov.u8 d26, #128 \n" 12033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vmov.u16 q14, #74 \n" 12133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vmov.u16 q15, #16 \n" 12233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 2 \n" 12333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 12433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp READYUV422 12533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp YUV422TORGB 12633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "subs %4, %4, #8 \n" 12733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vswp.u8 d20, d22 \n" 12833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vmov.u8 d19, #255 \n" 12933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vst4.8 {d19, d20, d21, d22}, [%3]! \n" 13033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "bgt 1b \n" 13133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(y_buf), // %0 13233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(u_buf), // %1 13333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(v_buf), // %2 13433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(rgb_buf), // %3 13533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(width) // %4 13633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "r"(&kUVToRB), // %5 13733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "r"(&kUVToG) // %6 13833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "cc", "memory", "q0", "q1", "q2", "q3", 13933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 14033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 14133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 14233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_I422TOBGRAROW_NEON 14333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 14433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_I422TOABGRROW_NEON 14533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid I422ToABGRRow_NEON(const uint8* y_buf, 14633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* u_buf, 14733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* v_buf, 14833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* rgb_buf, 14933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int width) { 15033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 15133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vld1.u8 {d24}, [%5] \n" 15233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vld1.u8 {d25}, [%6] \n" 15333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vmov.u8 d26, #128 \n" 15433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vmov.u16 q14, #74 \n" 15533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vmov.u16 q15, #16 \n" 15633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 2 \n" 15733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 15833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp READYUV422 15933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp YUV422TORGB 16033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "subs %4, %4, #8 \n" 16133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vswp.u8 d20, d22 \n" 16233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vmov.u8 d23, #255 \n" 16333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vst4.8 {d20, d21, d22, d23}, [%3]! \n" 16433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "bgt 1b \n" 16533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(y_buf), // %0 16633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(u_buf), // %1 16733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(v_buf), // %2 16833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(rgb_buf), // %3 16933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(width) // %4 17033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "r"(&kUVToRB), // %5 17133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "r"(&kUVToG) // %6 17233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "cc", "memory", "q0", "q1", "q2", "q3", 17333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 17433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 17533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 17633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_I422TOABGRROW_NEON 17733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 17833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_I422TORGBAROW_NEON 17933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid I422ToRGBARow_NEON(const uint8* y_buf, 18033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* u_buf, 18133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* v_buf, 18233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* rgb_buf, 18333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int width) { 18433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 18533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vld1.u8 {d24}, [%5] \n" 18633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vld1.u8 {d25}, [%6] \n" 18733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vmov.u8 d26, #128 \n" 18833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vmov.u16 q14, #74 \n" 18933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vmov.u16 q15, #16 \n" 19033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 2 \n" 19133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 19233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp READYUV422 19333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp YUV422TORGB 19433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "subs %4, %4, #8 \n" 19533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vmov.u8 d19, #255 \n" 19633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vst4.8 {d19, d20, d21, d22}, [%3]! \n" 19733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "bgt 1b \n" 19833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(y_buf), // %0 19933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(u_buf), // %1 20033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(v_buf), // %2 20133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(rgb_buf), // %3 20233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(width) // %4 20333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "r"(&kUVToRB), // %5 20433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "r"(&kUVToG) // %6 20533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "cc", "memory", "q0", "q1", "q2", "q3", 20633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 20733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 20833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 20933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_I422TORGBAROW_NEON 21033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 21133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_I422TORGB24ROW_NEON 21233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid I422ToRGB24Row_NEON(const uint8* y_buf, 21333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* u_buf, 21433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* v_buf, 21533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* rgb_buf, 21633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int width) { 21733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 21833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vld1.u8 {d24}, [%5] \n" 21933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vld1.u8 {d25}, [%6] \n" 22033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vmov.u8 d26, #128 \n" 22133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vmov.u16 q14, #74 \n" 22233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vmov.u16 q15, #16 \n" 22333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 2 \n" 22433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 22533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp READYUV422 22633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp YUV422TORGB 22733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "subs %4, %4, #8 \n" 22833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vst3.8 {d20, d21, d22}, [%3]! \n" 22933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "bgt 1b \n" 23033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(y_buf), // %0 23133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(u_buf), // %1 23233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(v_buf), // %2 23333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(rgb_buf), // %3 23433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(width) // %4 23533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "r"(&kUVToRB), // %5 23633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "r"(&kUVToG) // %6 23733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "cc", "memory", "q0", "q1", "q2", "q3", 23833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 23933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 24033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 24133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_I422TORGB24ROW_NEON 24233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 24333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_I422TORAWROW_NEON 24433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid I422ToRAWRow_NEON(const uint8* y_buf, 24533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* u_buf, 24633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* v_buf, 24733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* rgb_buf, 24833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int width) { 24933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 25033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vld1.u8 {d24}, [%5] \n" 25133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vld1.u8 {d25}, [%6] \n" 25233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vmov.u8 d26, #128 \n" 25333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vmov.u16 q14, #74 \n" 25433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vmov.u16 q15, #16 \n" 25533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 2 \n" 25633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 25733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp READYUV422 25833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp YUV422TORGB 25933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "subs %4, %4, #8 \n" 26033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vswp.u8 d20, d22 \n" 26133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vst3.8 {d20, d21, d22}, [%3]! \n" 26233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "bgt 1b \n" 26333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(y_buf), // %0 26433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(u_buf), // %1 26533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(v_buf), // %2 26633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(rgb_buf), // %3 26733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(width) // %4 26833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "r"(&kUVToRB), // %5 26933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "r"(&kUVToG) // %6 27033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "cc", "memory", "q0", "q1", "q2", "q3", 27133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 27233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 27333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 27433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_I422TORAWROW_NEON 27533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 27633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_NV12TOARGBROW_NEON 27733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid NV12ToARGBRow_NEON(const uint8* y_buf, 27833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* uv_buf, 27933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* rgb_buf, 28033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int width) { 28133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 28233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vld1.u8 {d24}, [%4] \n" 28333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vld1.u8 {d25}, [%5] \n" 28433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vmov.u8 d26, #128 \n" 28533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vmov.u16 q14, #74 \n" 28633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vmov.u16 q15, #16 \n" 28733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 2 \n" 28833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 28933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp READNV12 29033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp YUV422TORGB 29133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "subs %3, %3, #8 \n" 29233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vmov.u8 d23, #255 \n" 29333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vst4.8 {d20, d21, d22, d23}, [%2]! \n" 29433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "bgt 1b \n" 29533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(y_buf), // %0 29633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(uv_buf), // %1 29733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(rgb_buf), // %2 29833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(width) // %3 29933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "r"(&kUVToRB), // %4 30033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "r"(&kUVToG) // %5 30133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "cc", "memory", "q0", "q1", "q2", "q3", 30233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 30333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 30433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 30533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_NV12TOARGBROW_NEON 30633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 30733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_NV21TOARGBROW_NEON 30833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid NV21ToARGBRow_NEON(const uint8* y_buf, 30933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp const uint8* uv_buf, 31033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* rgb_buf, 31133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int width) { 31233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 31333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vld1.u8 {d24}, [%4] \n" 31433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vld1.u8 {d25}, [%5] \n" 31533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vmov.u8 d26, #128 \n" 31633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vmov.u16 q14, #74 \n" 31733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vmov.u16 q15, #16 \n" 31833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 2 \n" 31933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 32033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp READNV21 32133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp YUV422TORGB 32233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "subs %3, %3, #8 \n" 32333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vmov.u8 d23, #255 \n" 32433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vst4.8 {d20, d21, d22, d23}, [%2]! \n" 32533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "bgt 1b \n" 32633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(y_buf), // %0 32733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(uv_buf), // %1 32833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(rgb_buf), // %2 32933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(width) // %3 33033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "r"(&kUVToRB), // %4 33133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "r"(&kUVToG) // %5 33233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "cc", "memory", "q0", "q1", "q2", "q3", 33333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 33433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 33533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 33633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_NV21TOARGBROW_NEON 33733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 33833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_SPLITUV_NEON 33933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v 34033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Alignment requirement: 16 bytes for pointers, and multiple of 16 pixels. 34133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid SplitUV_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { 34233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 34333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 2 \n" 34433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 34533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vld2.u8 {q0, q1}, [%0]! \n" // load 16 pairs of UV 34633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "subs %3, %3, #16 \n" // 16 processed per loop 34733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vst1.u8 {q0}, [%1]! \n" // store U 34833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vst1.u8 {q1}, [%2]! \n" // Store V 34933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "bgt 1b \n" 35033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_uv), // %0 35133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_u), // %1 35233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_v), // %2 35333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(width) // %3 // Output registers 35433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : // Input registers 35533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc", "q0", "q1" // Clobber List 35633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 35733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 35833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_SPLITUV_NEON 35933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 36033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_COPYROW_NEON 36133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// Copy multiple of 64 36233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid CopyRow_NEON(const uint8* src, uint8* dst, int count) { 36333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 36433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 2 \n" 36533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 36633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vldm %0!, {q0, q1, q2, q3} \n" // load 64 36733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "subs %2, %2, #64 \n" // 64 processed per loop 36833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vstm %1!, {q0, q1, q2, q3} \n" // store 64 36933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "bgt 1b \n" 37033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src), // %0 37133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst), // %1 37233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(count) // %2 // Output registers 37333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : // Input registers 37433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc", "q0", "q1", "q2", "q3" // Clobber List 37533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 37633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 37733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_COPYROW_NEON 37833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 37933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_SETROW_NEON 38033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// SetRow8 writes 'count' bytes using a 32 bit value repeated. 38133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid SetRow8_NEON(uint8* dst, uint32 v32, int count) { 38233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( // NOLINT 38333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vdup.u32 q0, %2 \n" // duplicate 4 ints 38433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 38533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "subs %1, %1, #16 \n" // 16 bytes per loop 38633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vst1.u32 {q0}, [%0]! \n" // store 38733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "bgt 1b \n" 38833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(dst), // %0 38933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(count) // %1 39033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "r"(v32) // %2 39133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "q0", "memory", "cc"); 39233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 39333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 39433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// TODO(fbarchard): Make fully assembler 39533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// SetRow32 writes 'count' words using a 32 bit value repeated. 39633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid SetRows32_NEON(uint8* dst, uint32 v32, int width, 39733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int dst_stride, int height) { 39833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp for (int y = 0; y < height; ++y) { 39933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp SetRow8_NEON(dst, v32, width << 2); 40033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp dst += dst_stride; 40133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp } 40233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 40333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_SETROW_NEON 40433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 40533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_MIRRORROW_NEON 40633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid MirrorRow_NEON(const uint8* src, uint8* dst, int width) { 40733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 40833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // compute where to start writing destination 40933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "add %1, %2 \n" 41033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // work on segments that are multiples of 16 41133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lsrs r3, %2, #4 \n" 41233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // the output is written in two block. 8 bytes followed 41333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // by another 8. reading is done sequentially, from left to 41433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // right. writing is done from right to left in block sizes 41533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // %1, the destination pointer is incremented after writing 41633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // the first of the two blocks. need to subtract that 8 off 41733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // along with 16 to get the next location. 41833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "mov r3, #-24 \n" 41933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "beq 2f \n" 42033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 42133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // back of destination by the size of the register that is 42233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // going to be mirrored 42333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %1, #16 \n" 42433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // the loop needs to run on blocks of 16. what will be left 42533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // over is either a negative number, the residuals that need 42633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // to be done, or 0. If this isn't subtracted off here the 42733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // loop will run one extra time. 42833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %2, #16 \n" 42933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 43033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // mirror the bytes in the 64 bit segments. unable to mirror 43133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // the bytes in the entire 128 bits in one go. 43233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // because of the inability to mirror the entire 128 bits 43333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // mirror the writing out of the two 64 bit segments. 43433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 2 \n" 43533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 43633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vld1.8 {q0}, [%0]! \n" // src += 16 43733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "subs %2, #16 \n" 43833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vrev64.8 q0, q0 \n" 43933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vst1.8 {d1}, [%1]! \n" 44033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vst1.8 {d0}, [%1], r3 \n" // dst -= 16 44133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "bge 1b \n" 44233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 44333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // add 16 back to the counter. if the result is 0 there is no 44433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // residuals so jump past 44533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "adds %2, #16 \n" 44633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "beq 5f \n" 44733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "add %1, #16 \n" 44833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "2: \n" 44933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "mov r3, #-3 \n" 45033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %1, #2 \n" 45133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "subs %2, #2 \n" 45233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // check for 16*n+1 scenarios where segments_of_2 should not 45333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // be run, but there is something left over. 45433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "blt 4f \n" 45533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 45633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// do this in neon registers as per 45733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp// http://blogs.arm.com/software-enablement/196-coding-for-neon-part-2-dealing-with-leftovers/ 45833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "3: \n" 45933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vld2.8 {d0[0], d1[0]}, [%0]! \n" // src += 2 46033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "subs %2, #2 \n" 46133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vst1.8 {d1[0]}, [%1]! \n" 46233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vst1.8 {d0[0]}, [%1], r3 \n" // dst -= 2 46333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "bge 3b \n" 46433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 46533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "adds %2, #2 \n" 46633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "beq 5f \n" 46733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "4: \n" 46833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "add %1, #1 \n" 46933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vld1.8 {d0[0]}, [%0] \n" 47033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vst1.8 {d0[0]}, [%1] \n" 47133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "5: \n" 47233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src), // %0 47333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst), // %1 47433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(width) // %2 47533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 47633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc", "r3", "q0" 47733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 47833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 47933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_MIRRORROW_NEON 48033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 48133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_MIRRORROWUV_NEON 48233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid MirrorRowUV_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width) { 48333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 48433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // compute where to start writing destination 48533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "add %1, %3 \n" // dst_a + width 48633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "add %2, %3 \n" // dst_b + width 48733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // work on input segments that are multiples of 16, but 48833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // width that has been passed is output segments, half 48933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // the size of input. 49033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "lsrs r12, %3, #3 \n" 49133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "beq 2f \n" 49233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // the output is written in to two blocks. 49333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "mov r12, #-8 \n" 49433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // back of destination by the size of the register that is 49533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // going to be mirrord 49633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %1, #8 \n" 49733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %2, #8 \n" 49833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // the loop needs to run on blocks of 8. what will be left 49933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // over is either a negative number, the residuals that need 50033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // to be done, or 0. if this isn't subtracted off here the 50133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // loop will run one extra time. 50233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %3, #8 \n" 50333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 50433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // mirror the bytes in the 64 bit segments 50533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 2 \n" 50633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 50733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vld2.8 {d0, d1}, [%0]! \n" // src += 16 50833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "subs %3, #8 \n" 50933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vrev64.8 q0, q0 \n" 51033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vst1.8 {d0}, [%1], r12 \n" // dst_a -= 8 51133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vst1.8 {d1}, [%2], r12 \n" // dst_b -= 8 51233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "bge 1b \n" 51333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 51433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // add 8 back to the counter. if the result is 0 there is no 51533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp // residuals so return 51633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "adds %3, #8 \n" 51733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "beq 4f \n" 51833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "add %1, #8 \n" 51933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "add %2, #8 \n" 52033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "2: \n" 52133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "mov r12, #-1 \n" 52233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %1, #1 \n" 52333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "sub %2, #1 \n" 52433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "3: \n" 52533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vld2.8 {d0[0], d1[0]}, [%0]! \n" // src += 2 52633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "subs %3, %3, #1 \n" 52733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vst1.8 {d0[0]}, [%1], r12 \n" // dst_a -= 1 52833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vst1.8 {d1[0]}, [%2], r12 \n" // dst_b -= 1 52933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "bgt 3b \n" 53033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "4: \n" 53133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src), // %0 53233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_a), // %1 53333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_b), // %2 53433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(width) // %3 53533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 53633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc", "r12", "q0" 53733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 53833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 53933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_MIRRORROWUV_NEON 54033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 54133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_BGRATOARGBROW_NEON 54233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid BGRAToARGBRow_NEON(const uint8* src_bgra, uint8* dst_argb, int pix) { 54333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 54433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 2 \n" 54533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 54633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of BGRA. 54733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "subs %2, %2, #8 \n" // 8 processed per loop. 54833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vswp.u8 d1, d2 \n" // swap G, R 54933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vswp.u8 d0, d3 \n" // swap B, A 55033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB. 55133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "bgt 1b \n" 55233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_bgra), // %0 55333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_argb), // %1 55433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %2 55533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 55633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc", "d0", "d1", "d2", "d3" // Clobber List 55733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 55833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 55933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_BGRATOARGBROW_NEON 56033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 56133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_ABGRTOARGBROW_NEON 56233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ABGRToARGBRow_NEON(const uint8* src_abgr, uint8* dst_argb, int pix) { 56333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 56433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 2 \n" 56533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 56633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ABGR. 56733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "subs %2, %2, #8 \n" // 8 processed per loop. 56833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vswp.u8 d0, d2 \n" // swap R, B 56933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB. 57033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "bgt 1b \n" 57133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_abgr), // %0 57233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_argb), // %1 57333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %2 57433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 57533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc", "d0", "d1", "d2", "d3" // Clobber List 57633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 57733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 57833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_ABGRTOARGBROW_NEON 57933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 58033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_RGBATOARGBROW_NEON 58133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid RGBAToARGBRow_NEON(const uint8* src_rgba, uint8* dst_argb, int pix) { 58233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 58333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 2 \n" 58433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 58533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vld1.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of RGBA. 58633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "subs %2, %2, #8 \n" // 8 processed per loop. 58733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vmov.u8 d4, d0 \n" // move A after RGB 58833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vst4.8 {d1, d2, d3, d4}, [%1]! \n" // store 8 pixels of ARGB. 58933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "bgt 1b \n" 59033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_rgba), // %0 59133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_argb), // %1 59233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %2 59333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 59433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc", "d0", "d1", "d2", "d3", "d4" // Clobber List 59533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 59633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 59733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_RGBATOARGBROW_NEON 59833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 59933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_RGB24TOARGBROW_NEON 60033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix) { 60133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 60233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vmov.u8 d4, #255 \n" // Alpha 60333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 2 \n" 60433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 60533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vld3.8 {d1, d2, d3}, [%0]! \n" // load 8 pixels of RGB24. 60633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "subs %2, %2, #8 \n" // 8 processed per loop. 60733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vst4.8 {d1, d2, d3, d4}, [%1]! \n" // store 8 pixels of ARGB. 60833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "bgt 1b \n" 60933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_rgb24), // %0 61033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_argb), // %1 61133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %2 61233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 61333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc", "d1", "d2", "d3", "d4" // Clobber List 61433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 61533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 61633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_RGB24TOARGBROW_NEON 61733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 61833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_RAWTOARGBROW_NEON 61933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int pix) { 62033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 62133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vmov.u8 d4, #255 \n" // Alpha 62233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 2 \n" 62333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 62433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vld3.8 {d1, d2, d3}, [%0]! \n" // load 8 pixels of RAW. 62533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "subs %2, %2, #8 \n" // 8 processed per loop. 62633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vswp.u8 d1, d3 \n" // swap R, B 62733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vst4.8 {d1, d2, d3, d4}, [%1]! \n" // store 8 pixels of ARGB. 62833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "bgt 1b \n" 62933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_raw), // %0 63033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_argb), // %1 63133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %2 63233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 63333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc", "d1", "d2", "d3", "d4" // Clobber List 63433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 63533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 63633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_RAWTOARGBROW_NEON 63733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 63833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_ARGBTORGBAROW_NEON 63933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ARGBToRGBARow_NEON(const uint8* src_argb, uint8* dst_rgba, int pix) { 64033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 64133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 2 \n" 64233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 64333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vld4.8 {d1, d2, d3, d4}, [%0]! \n" // load 8 pixels of ARGB. 64433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "subs %2, %2, #8 \n" // 8 processed per loop. 64533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vmov.u8 d0, d4 \n" // move A before RGB. 64633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of RGBA. 64733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "bgt 1b \n" 64833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_argb), // %0 64933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_rgba), // %1 65033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %2 65133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 65233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc", "d0", "d1", "d2", "d3", "d4" // Clobber List 65333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 65433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 65533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_ARGBTORGBAROW_NEON 65633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 65733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_ARGBTORGB24ROW_NEON 65833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb24, int pix) { 65933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 66033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 2 \n" 66133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 66233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vld4.8 {d1, d2, d3, d4}, [%0]! \n" // load 8 pixels of ARGB. 66333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "subs %2, %2, #8 \n" // 8 processed per loop. 66433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vst3.8 {d1, d2, d3}, [%1]! \n" // store 8 pixels of RGB24. 66533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "bgt 1b \n" 66633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_argb), // %0 66733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_rgb24), // %1 66833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %2 66933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 67033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc", "d1", "d2", "d3", "d4" // Clobber List 67133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 67233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 67333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_ARGBTORGB24ROW_NEON 67433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 67533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_ARGBTORAWROW_NEON 67633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_raw, int pix) { 67733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 67833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 2 \n" 67933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 68033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vld4.8 {d1, d2, d3, d4}, [%0]! \n" // load 8 pixels of ARGB. 68133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "subs %2, %2, #8 \n" // 8 processed per loop. 68233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vswp.u8 d1, d3 \n" // swap R, B 68333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vst3.8 {d1, d2, d3}, [%1]! \n" // store 8 pixels of RAW. 68433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "bgt 1b \n" 68533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_argb), // %0 68633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_raw), // %1 68733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %2 68833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 68933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc", "d1", "d2", "d3", "d4" // Clobber List 69033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 69133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 69233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_ARGBTORAWROW_NEON 69333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 69433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_YUY2TOYROW_NEON 69533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int pix) { 69633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 69733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 2 \n" 69833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 69933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vld2.u8 {q0, q1}, [%0]! \n" // load 16 pixels of YUY2. 70033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "subs %2, %2, #16 \n" // 16 processed per loop. 70133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vst1.u8 {q0}, [%1]! \n" // store 16 pixels of Y. 70233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "bgt 1b \n" 70333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_yuy2), // %0 70433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_y), // %1 70533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %2 70633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 70733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc", "q0", "q1" // Clobber List 70833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 70933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 71033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_YUY2TOYROW_NEON 71133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 71233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_UYVYTOYROW_NEON 71333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int pix) { 71433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 71533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 2 \n" 71633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 71733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vld2.u8 {q0, q1}, [%0]! \n" // load 16 pixels of UYVY. 71833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "subs %2, %2, #16 \n" // 16 processed per loop. 71933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vst1.u8 {q1}, [%1]! \n" // store 16 pixels of Y. 72033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "bgt 1b \n" 72133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_uyvy), // %0 72233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_y), // %1 72333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %2 72433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 72533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc", "q0", "q1" // Clobber List 72633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 72733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 72833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_UYVYTOYROW_NEON 72933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 73033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_YUY2TOYROW_NEON 73133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid YUY2ToUV422Row_NEON(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v, 73233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int pix) { 73333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 73433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 2 \n" 73533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 73633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of YUY2. 73733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "subs %3, %3, #16 \n" // 16 pixels = 8 UVs. 73833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vst1.u8 {d1}, [%1]! \n" // store 8 U. 73933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vst1.u8 {d3}, [%2]! \n" // store 8 V. 74033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "bgt 1b \n" 74133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_yuy2), // %0 74233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_u), // %1 74333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_v), // %2 74433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %3 74533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 74633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc", "d0", "d1", "d2", "d3" // Clobber List 74733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 74833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 74933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_YUY2TOYROW_NEON 75033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 75133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_UYVYTOYROW_NEON 75233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid UYVYToUV422Row_NEON(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v, 75333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp int pix) { 75433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 75533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 2 \n" 75633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 75733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of UYVY. 75833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "subs %3, %3, #16 \n" // 16 pixels = 8 UVs. 75933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vst1.u8 {d0}, [%1]! \n" // store 8 U. 76033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vst1.u8 {d2}, [%2]! \n" // store 8 V. 76133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "bgt 1b \n" 76233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_uyvy), // %0 76333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_u), // %1 76433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_v), // %2 76533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %3 76633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 76733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc", "d0", "d1", "d2", "d3" // Clobber List 76833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 76933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 77033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_UYVYTOYROW_NEON 77133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 77233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_YUY2TOYROW_NEON 77333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2, 77433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* dst_u, uint8* dst_v, int pix) { 77533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 77633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "adds %1, %0, %1 \n" // stride + src_yuy2 77733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 2 \n" 77833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 77933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of YUY2. 78033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "subs %4, %4, #16 \n" // 16 pixels = 8 UVs. 78133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load next row YUY2. 78233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vrhadd.u8 d1, d1, d5 \n" // average rows of U 78333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vrhadd.u8 d3, d3, d7 \n" // average rows of V 78433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vst1.u8 {d1}, [%2]! \n" // store 8 U. 78533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vst1.u8 {d3}, [%3]! \n" // store 8 V. 78633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "bgt 1b \n" 78733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_yuy2), // %0 78833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(stride_yuy2), // %1 78933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_u), // %2 79033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_v), // %3 79133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %4 79233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 79333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7" // Clobber List 79433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 79533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 79633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_YUY2TOYROW_NEON 79733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 79833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef HAS_UYVYTOYROW_NEON 79933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkampvoid UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy, 80033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp uint8* dst_u, uint8* dst_v, int pix) { 80133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp asm volatile ( 80233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "adds %1, %0, %1 \n" // stride + src_uyvy 80333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ".p2align 2 \n" 80433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "1: \n" 80533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of UYVY. 80633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "subs %4, %4, #16 \n" // 16 pixels = 8 UVs. 80733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load next row UYVY. 80833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vrhadd.u8 d0, d0, d4 \n" // average rows of U 80933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vrhadd.u8 d2, d2, d6 \n" // average rows of V 81033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vst1.u8 {d0}, [%2]! \n" // store 8 U. 81133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "vst1.u8 {d2}, [%3]! \n" // store 8 V. 81233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "bgt 1b \n" 81333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "+r"(src_uyvy), // %0 81433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(stride_uyvy), // %1 81533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_u), // %2 81633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(dst_v), // %3 81733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp "+r"(pix) // %4 81833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : 81933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp : "memory", "cc", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7" // Clobber List 82033cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp ); 82133cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} 82233cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // HAS_UYVYTOYROW_NEON 82333cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 82433cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif // __ARM_NEON__ 82533cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp 82633cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#ifdef __cplusplus 82733cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} // extern "C" 82833cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp} // namespace libyuv 82933cfdeb7b267ab635413797fffb046b73272f7ecHendrik Dahlkamp#endif 830