141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org/* 241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org * Copyright 2011 The LibYuv Project Authors. All rights reserved. 341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org * 441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org * Use of this source code is governed by a BSD-style license 541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org * that can be found in the LICENSE file in the root of the source 641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org * tree. An additional intellectual property rights grant can be found 741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org * in the file PATENTS. All contributing project authors may 841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org * be found in the AUTHORS file in the root of the source tree. 941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org */ 1041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 1141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#include "libyuv/row.h" 1241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 1341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef __cplusplus 1441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgnamespace libyuv { 1541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgextern "C" { 1641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif 1741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 1841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// This module is for GCC Neon 1941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) 2041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 2141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Read 8 Y, 4 U and 4 V from 422 2241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#define READYUV422 \ 2341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) \ 2441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d0}, [%0]! \n" \ 2541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) \ 2641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.32 {d2[0]}, [%1]! \n" \ 2741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(2) \ 2841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.32 {d2[1]}, [%2]! \n" 2941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 3041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Read 8 Y, 2 U and 2 V from 422 3141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#define READYUV411 \ 3241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) \ 3341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d0}, [%0]! \n" \ 3441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) \ 3541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.16 {d2[0]}, [%1]! \n" \ 3641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(2) \ 3741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.16 {d2[1]}, [%2]! \n" \ 3841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d3, d2 \n" \ 3941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vzip.u8 d2, d3 \n" 4041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 4141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Read 8 Y, 8 U and 8 V from 444 4241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#define READYUV444 \ 4341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) \ 4441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d0}, [%0]! \n" \ 4541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) \ 4641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d2}, [%1]! \n" \ 4741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(2) \ 4841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d3}, [%2]! \n" \ 4941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 q1, q1 \n" \ 5041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vrshrn.u16 d2, q1, #1 \n" 5141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 5241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Read 8 Y, and set 4 U and 4 V to 128 5341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#define READYUV400 \ 5441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) \ 5541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d0}, [%0]! \n" \ 5641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d2, #128 \n" 5741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 5841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Read 8 Y and 4 UV from NV12 5941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#define READNV12 \ 6041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) \ 6141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d0}, [%0]! \n" \ 6241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) \ 6341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d2}, [%1]! \n" \ 6441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d3, d2 \n"/* split odd/even uv apart */\ 6541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vuzp.u8 d2, d3 \n" \ 6641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vtrn.u32 d2, d3 \n" 6741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 6841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Read 8 Y and 4 VU from NV21 6941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#define READNV21 \ 7041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) \ 7141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d0}, [%0]! \n" \ 7241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) \ 7341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d2}, [%1]! \n" \ 7441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d3, d2 \n"/* split odd/even uv apart */\ 7541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vuzp.u8 d3, d2 \n" \ 7641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vtrn.u32 d2, d3 \n" 7741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 7841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Read 8 YUY2 7941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#define READYUY2 \ 8041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) \ 8141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld2.8 {d0, d2}, [%0]! \n" \ 8241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d3, d2 \n" \ 8341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vuzp.u8 d2, d3 \n" \ 8441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vtrn.u32 d2, d3 \n" 8541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 8641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Read 8 UYVY 8741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#define READUYVY \ 8841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) \ 8941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld2.8 {d2, d3}, [%0]! \n" \ 9041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d0, d3 \n" \ 9141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d3, d2 \n" \ 9241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vuzp.u8 d2, d3 \n" \ 9341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vtrn.u32 d2, d3 \n" 9441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 9541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#define YUV422TORGB \ 9641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "veor.u8 d2, d26 \n"/*subtract 128 from u and v*/\ 9741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmull.s8 q8, d2, d24 \n"/* u/v B/R component */\ 9841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmull.s8 q9, d2, d25 \n"/* u/v G component */\ 9941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d1, #0 \n"/* split odd/even y apart */\ 10041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vtrn.u8 d0, d1 \n" \ 10141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vsub.s16 q0, q0, q15 \n"/* offset y */\ 10241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmul.s16 q0, q0, q14 \n" \ 10341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vadd.s16 d18, d19 \n" \ 10441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqadd.s16 d20, d0, d16 \n" /* B */ \ 10541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqadd.s16 d21, d1, d16 \n" \ 10641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqadd.s16 d22, d0, d17 \n" /* R */ \ 10741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqadd.s16 d23, d1, d17 \n" \ 10841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqadd.s16 d16, d0, d18 \n" /* G */ \ 10941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqadd.s16 d17, d1, d18 \n" \ 11041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqshrun.s16 d0, q10, #6 \n" /* B */ \ 11141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqshrun.s16 d1, q11, #6 \n" /* G */ \ 11241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqshrun.s16 d2, q8, #6 \n" /* R */ \ 11341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmovl.u8 q10, d0 \n"/* set up for reinterleave*/\ 11441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmovl.u8 q11, d1 \n" \ 11541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmovl.u8 q8, d2 \n" \ 11641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vtrn.u8 d20, d21 \n" \ 11741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vtrn.u8 d22, d23 \n" \ 11841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vtrn.u8 d16, d17 \n" \ 11941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d21, d16 \n" 12041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 12141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgstatic vec8 kUVToRB = { 127, 127, 127, 127, 102, 102, 102, 102, 12241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 0, 0, 0, 0, 0, 0, 0, 0 }; 12341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgstatic vec8 kUVToG = { -25, -25, -25, -25, -52, -52, -52, -52, 12441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 0, 0, 0, 0, 0, 0, 0, 0 }; 12541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 12641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_I444TOARGBROW_NEON 12741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid I444ToARGBRow_NEON(const uint8* src_y, 12841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org const uint8* src_u, 12941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org const uint8* src_v, 13041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org uint8* dst_argb, 13141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org int width) { 13241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 13341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(5) 13441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d24}, [%5] \n" 13541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(6) 13641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d25}, [%6] \n" 13741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d26, #128 \n" 13841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q14, #74 \n" 13941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q15, #16 \n" 14041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 14141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 14241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org READYUV444 14341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org YUV422TORGB 14441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %4, %4, #8 \n" 14541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d23, #255 \n" 14641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(3) 14741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst4.8 {d20, d21, d22, d23}, [%3]! \n" 14841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 14941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_y), // %0 15041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_u), // %1 15141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_v), // %2 15241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_argb), // %3 15341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(width) // %4 15441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "r"(&kUVToRB), // %5 15541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "r"(&kUVToG) // %6 15641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q1", "q2", "q3", 15741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 15841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 15941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 16041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_I444TOARGBROW_NEON 16141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 16241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_I422TOARGBROW_NEON 16341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid I422ToARGBRow_NEON(const uint8* src_y, 16441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org const uint8* src_u, 16541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org const uint8* src_v, 16641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org uint8* dst_argb, 16741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org int width) { 16841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 16941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(5) 17041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d24}, [%5] \n" 17141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(6) 17241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d25}, [%6] \n" 17341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d26, #128 \n" 17441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q14, #74 \n" 17541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q15, #16 \n" 17641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 17741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 17841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org READYUV422 17941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org YUV422TORGB 18041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %4, %4, #8 \n" 18141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d23, #255 \n" 18241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(3) 18341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst4.8 {d20, d21, d22, d23}, [%3]! \n" 18441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 18541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_y), // %0 18641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_u), // %1 18741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_v), // %2 18841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_argb), // %3 18941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(width) // %4 19041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "r"(&kUVToRB), // %5 19141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "r"(&kUVToG) // %6 19241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q1", "q2", "q3", 19341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 19441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 19541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 19641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_I422TOARGBROW_NEON 19741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 19841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_I411TOARGBROW_NEON 19941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid I411ToARGBRow_NEON(const uint8* src_y, 20041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org const uint8* src_u, 20141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org const uint8* src_v, 20241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org uint8* dst_argb, 20341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org int width) { 20441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 20541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(5) 20641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d24}, [%5] \n" 20741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(6) 20841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d25}, [%6] \n" 20941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d26, #128 \n" 21041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q14, #74 \n" 21141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q15, #16 \n" 21241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 21341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 21441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org READYUV411 21541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org YUV422TORGB 21641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %4, %4, #8 \n" 21741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d23, #255 \n" 21841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(3) 21941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst4.8 {d20, d21, d22, d23}, [%3]! \n" 22041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 22141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_y), // %0 22241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_u), // %1 22341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_v), // %2 22441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_argb), // %3 22541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(width) // %4 22641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "r"(&kUVToRB), // %5 22741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "r"(&kUVToG) // %6 22841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q1", "q2", "q3", 22941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 23041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 23141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 23241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_I411TOARGBROW_NEON 23341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 23441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_I422TOBGRAROW_NEON 23541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid I422ToBGRARow_NEON(const uint8* src_y, 23641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org const uint8* src_u, 23741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org const uint8* src_v, 23841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org uint8* dst_bgra, 23941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org int width) { 24041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 24141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(5) 24241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d24}, [%5] \n" 24341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(6) 24441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d25}, [%6] \n" 24541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d26, #128 \n" 24641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q14, #74 \n" 24741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q15, #16 \n" 24841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 24941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 25041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org READYUV422 25141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org YUV422TORGB 25241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %4, %4, #8 \n" 25341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vswp.u8 d20, d22 \n" 25441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d19, #255 \n" 25541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(3) 25641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst4.8 {d19, d20, d21, d22}, [%3]! \n" 25741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 25841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_y), // %0 25941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_u), // %1 26041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_v), // %2 26141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_bgra), // %3 26241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(width) // %4 26341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "r"(&kUVToRB), // %5 26441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "r"(&kUVToG) // %6 26541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q1", "q2", "q3", 26641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 26741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 26841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 26941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_I422TOBGRAROW_NEON 27041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 27141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_I422TOABGRROW_NEON 27241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid I422ToABGRRow_NEON(const uint8* src_y, 27341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org const uint8* src_u, 27441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org const uint8* src_v, 27541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org uint8* dst_abgr, 27641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org int width) { 27741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 27841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(5) 27941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d24}, [%5] \n" 28041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(6) 28141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d25}, [%6] \n" 28241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d26, #128 \n" 28341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q14, #74 \n" 28441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q15, #16 \n" 28541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 28641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 28741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org READYUV422 28841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org YUV422TORGB 28941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %4, %4, #8 \n" 29041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vswp.u8 d20, d22 \n" 29141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d23, #255 \n" 29241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(3) 29341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst4.8 {d20, d21, d22, d23}, [%3]! \n" 29441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 29541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_y), // %0 29641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_u), // %1 29741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_v), // %2 29841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_abgr), // %3 29941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(width) // %4 30041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "r"(&kUVToRB), // %5 30141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "r"(&kUVToG) // %6 30241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q1", "q2", "q3", 30341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 30441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 30541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 30641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_I422TOABGRROW_NEON 30741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 30841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_I422TORGBAROW_NEON 30941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid I422ToRGBARow_NEON(const uint8* src_y, 31041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org const uint8* src_u, 31141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org const uint8* src_v, 31241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org uint8* dst_rgba, 31341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org int width) { 31441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 31541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(5) 31641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d24}, [%5] \n" 31741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(6) 31841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d25}, [%6] \n" 31941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d26, #128 \n" 32041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q14, #74 \n" 32141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q15, #16 \n" 32241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 32341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 32441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org READYUV422 32541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org YUV422TORGB 32641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %4, %4, #8 \n" 32741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d19, #255 \n" 32841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(3) 32941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst4.8 {d19, d20, d21, d22}, [%3]! \n" 33041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 33141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_y), // %0 33241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_u), // %1 33341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_v), // %2 33441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_rgba), // %3 33541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(width) // %4 33641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "r"(&kUVToRB), // %5 33741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "r"(&kUVToG) // %6 33841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q1", "q2", "q3", 33941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 34041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 34141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 34241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_I422TORGBAROW_NEON 34341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 34441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_I422TORGB24ROW_NEON 34541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid I422ToRGB24Row_NEON(const uint8* src_y, 34641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org const uint8* src_u, 34741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org const uint8* src_v, 34841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org uint8* dst_rgb24, 34941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org int width) { 35041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 35141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(5) 35241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d24}, [%5] \n" 35341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(6) 35441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d25}, [%6] \n" 35541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d26, #128 \n" 35641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q14, #74 \n" 35741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q15, #16 \n" 35841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 35941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 36041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org READYUV422 36141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org YUV422TORGB 36241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %4, %4, #8 \n" 36341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(3) 36441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst3.8 {d20, d21, d22}, [%3]! \n" 36541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 36641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_y), // %0 36741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_u), // %1 36841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_v), // %2 36941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_rgb24), // %3 37041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(width) // %4 37141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "r"(&kUVToRB), // %5 37241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "r"(&kUVToG) // %6 37341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q1", "q2", "q3", 37441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 37541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 37641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 37741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_I422TORGB24ROW_NEON 37841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 37941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_I422TORAWROW_NEON 38041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid I422ToRAWRow_NEON(const uint8* src_y, 38141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org const uint8* src_u, 38241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org const uint8* src_v, 38341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org uint8* dst_raw, 38441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org int width) { 38541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 38641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(5) 38741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d24}, [%5] \n" 38841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(6) 38941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d25}, [%6] \n" 39041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d26, #128 \n" 39141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q14, #74 \n" 39241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q15, #16 \n" 39341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 39441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 39541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org READYUV422 39641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org YUV422TORGB 39741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %4, %4, #8 \n" 39841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vswp.u8 d20, d22 \n" 39941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(3) 40041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst3.8 {d20, d21, d22}, [%3]! \n" 40141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 40241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_y), // %0 40341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_u), // %1 40441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_v), // %2 40541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_raw), // %3 40641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(width) // %4 40741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "r"(&kUVToRB), // %5 40841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "r"(&kUVToG) // %6 40941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q1", "q2", "q3", 41041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 41141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 41241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 41341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_I422TORAWROW_NEON 41441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 41541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#define ARGBTORGB565 \ 41641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vshr.u8 d20, d20, #3 \n" /* B */ \ 41741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vshr.u8 d21, d21, #2 \n" /* G */ \ 41841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vshr.u8 d22, d22, #3 \n" /* R */ \ 41941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmovl.u8 q8, d20 \n" /* B */ \ 42041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmovl.u8 q9, d21 \n" /* G */ \ 42141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmovl.u8 q10, d22 \n" /* R */ \ 42241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vshl.u16 q9, q9, #5 \n" /* G */ \ 42341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vshl.u16 q10, q10, #11 \n" /* R */ \ 42441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vorr q0, q8, q9 \n" /* BG */ \ 42541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vorr q0, q0, q10 \n" /* BGR */ 42641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 42741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_I422TORGB565ROW_NEON 42841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid I422ToRGB565Row_NEON(const uint8* src_y, 42941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org const uint8* src_u, 43041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org const uint8* src_v, 43141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org uint8* dst_rgb565, 43241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org int width) { 43341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 43441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(5) 43541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d24}, [%5] \n" 43641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(6) 43741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d25}, [%6] \n" 43841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d26, #128 \n" 43941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q14, #74 \n" 44041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q15, #16 \n" 44141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 44241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 44341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org READYUV422 44441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org YUV422TORGB 44541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %4, %4, #8 \n" 44641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ARGBTORGB565 44741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(3) 44841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {q0}, [%3]! \n" // store 8 pixels RGB565. 44941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 45041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_y), // %0 45141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_u), // %1 45241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_v), // %2 45341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_rgb565), // %3 45441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(width) // %4 45541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "r"(&kUVToRB), // %5 45641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "r"(&kUVToG) // %6 45741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q1", "q2", "q3", 45841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 45941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 46041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 46141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_I422TORGB565ROW_NEON 46241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 46341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#define ARGBTOARGB1555 \ 46441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vshr.u8 q10, q10, #3 \n" /* B */ \ 46541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vshr.u8 d22, d22, #3 \n" /* R */ \ 46641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vshr.u8 d23, d23, #7 \n" /* A */ \ 46741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmovl.u8 q8, d20 \n" /* B */ \ 46841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmovl.u8 q9, d21 \n" /* G */ \ 46941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmovl.u8 q10, d22 \n" /* R */ \ 47041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmovl.u8 q11, d23 \n" /* A */ \ 47141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vshl.u16 q9, q9, #5 \n" /* G */ \ 47241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vshl.u16 q10, q10, #10 \n" /* R */ \ 47341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vshl.u16 q11, q11, #15 \n" /* A */ \ 47441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vorr q0, q8, q9 \n" /* BG */ \ 47541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vorr q1, q10, q11 \n" /* RA */ \ 47641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vorr q0, q0, q1 \n" /* BGRA */ 47741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 47841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_I422TOARGB1555ROW_NEON 47941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid I422ToARGB1555Row_NEON(const uint8* src_y, 48041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org const uint8* src_u, 48141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org const uint8* src_v, 48241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org uint8* dst_argb1555, 48341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org int width) { 48441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 48541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(5) 48641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d24}, [%5] \n" 48741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(6) 48841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d25}, [%6] \n" 48941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d26, #128 \n" 49041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q14, #74 \n" 49141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q15, #16 \n" 49241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 49341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 49441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org READYUV422 49541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org YUV422TORGB 49641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %4, %4, #8 \n" 49741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d23, #255 \n" 49841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ARGBTOARGB1555 49941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(3) 50041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {q0}, [%3]! \n" // store 8 pixels ARGB1555. 50141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 50241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_y), // %0 50341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_u), // %1 50441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_v), // %2 50541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_argb1555), // %3 50641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(width) // %4 50741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "r"(&kUVToRB), // %5 50841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "r"(&kUVToG) // %6 50941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q1", "q2", "q3", 51041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 51141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 51241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 51341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_I422TOARGB1555ROW_NEON 51441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 51541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#define ARGBTOARGB4444 \ 51641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vshr.u8 d20, d20, #4 \n" /* B */ \ 51741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vbic.32 d21, d21, d4 \n" /* G */ \ 51841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vshr.u8 d22, d22, #4 \n" /* R */ \ 51941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vbic.32 d23, d23, d4 \n" /* A */ \ 52041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vorr d0, d20, d21 \n" /* BG */ \ 52141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vorr d1, d22, d23 \n" /* RA */ \ 52241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vzip.u8 d0, d1 \n" /* BGRA */ 52341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 52441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_I422TOARGB4444ROW_NEON 52541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid I422ToARGB4444Row_NEON(const uint8* src_y, 52641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org const uint8* src_u, 52741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org const uint8* src_v, 52841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org uint8* dst_argb4444, 52941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org int width) { 53041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 53141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(5) 53241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d24}, [%5] \n" 53341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(6) 53441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d25}, [%6] \n" 53541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d26, #128 \n" 53641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q14, #74 \n" 53741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q15, #16 \n" 53841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d4, #0x0f \n" // bits to clear with vbic. 53941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 54041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 54141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org READYUV422 54241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org YUV422TORGB 54341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %4, %4, #8 \n" 54441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d23, #255 \n" 54541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ARGBTOARGB4444 54641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(3) 54741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {q0}, [%3]! \n" // store 8 pixels ARGB4444. 54841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 54941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_y), // %0 55041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_u), // %1 55141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_v), // %2 55241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_argb4444), // %3 55341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(width) // %4 55441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "r"(&kUVToRB), // %5 55541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "r"(&kUVToG) // %6 55641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q1", "q2", "q3", 55741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 55841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 55941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 56041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_I422TOARGB4444ROW_NEON 56141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 56241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_YTOARGBROW_NEON 56341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid YToARGBRow_NEON(const uint8* src_y, 56441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org uint8* dst_argb, 56541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org int width) { 56641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 56741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(3) 56841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d24}, [%3] \n" 56941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(4) 57041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d25}, [%4] \n" 57141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d26, #128 \n" 57241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q14, #74 \n" 57341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q15, #16 \n" 57441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 57541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 57641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org READYUV400 57741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org YUV422TORGB 57841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %2, %2, #8 \n" 57941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d23, #255 \n" 58041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 58141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst4.8 {d20, d21, d22, d23}, [%1]! \n" 58241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 58341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_y), // %0 58441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_argb), // %1 58541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(width) // %2 58641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "r"(&kUVToRB), // %3 58741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "r"(&kUVToG) // %4 58841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q1", "q2", "q3", 58941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 59041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 59141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 59241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_YTOARGBROW_NEON 59341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 59441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_I400TOARGBROW_NEON 59541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid I400ToARGBRow_NEON(const uint8* src_y, 59641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org uint8* dst_argb, 59741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org int width) { 59841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 59941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 60041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d23, #255 \n" 60141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 60241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 60341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d20}, [%0]! \n" 60441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov d21, d20 \n" 60541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov d22, d20 \n" 60641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %2, %2, #8 \n" 60741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 60841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst4.8 {d20, d21, d22, d23}, [%1]! \n" 60941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 61041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_y), // %0 61141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_argb), // %1 61241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(width) // %2 61341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 61441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "d20", "d21", "d22", "d23" 61541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 61641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 61741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_I400TOARGBROW_NEON 61841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 61941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_NV12TOARGBROW_NEON 62041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid NV12ToARGBRow_NEON(const uint8* src_y, 62141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org const uint8* src_uv, 62241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org uint8* dst_argb, 62341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org int width) { 62441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 62541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(4) 62641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d24}, [%4] \n" 62741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(5) 62841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d25}, [%5] \n" 62941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d26, #128 \n" 63041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q14, #74 \n" 63141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q15, #16 \n" 63241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 63341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 63441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org READNV12 63541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org YUV422TORGB 63641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %3, %3, #8 \n" 63741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d23, #255 \n" 63841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(2) 63941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst4.8 {d20, d21, d22, d23}, [%2]! \n" 64041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 64141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_y), // %0 64241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_uv), // %1 64341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_argb), // %2 64441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(width) // %3 64541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "r"(&kUVToRB), // %4 64641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "r"(&kUVToG) // %5 64741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q1", "q2", "q3", 64841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 64941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 65041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 65141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_NV12TOARGBROW_NEON 65241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 65341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_NV21TOARGBROW_NEON 65441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid NV21ToARGBRow_NEON(const uint8* src_y, 65541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org const uint8* src_uv, 65641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org uint8* dst_argb, 65741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org int width) { 65841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 65941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(4) 66041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d24}, [%4] \n" 66141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(5) 66241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d25}, [%5] \n" 66341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d26, #128 \n" 66441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q14, #74 \n" 66541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q15, #16 \n" 66641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 66741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 66841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org READNV21 66941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org YUV422TORGB 67041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %3, %3, #8 \n" 67141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d23, #255 \n" 67241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(2) 67341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst4.8 {d20, d21, d22, d23}, [%2]! \n" 67441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 67541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_y), // %0 67641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_uv), // %1 67741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_argb), // %2 67841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(width) // %3 67941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "r"(&kUVToRB), // %4 68041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "r"(&kUVToG) // %5 68141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q1", "q2", "q3", 68241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 68341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 68441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 68541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_NV21TOARGBROW_NEON 68641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 68741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_NV12TORGB565ROW_NEON 68841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid NV12ToRGB565Row_NEON(const uint8* src_y, 68941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org const uint8* src_uv, 69041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org uint8* dst_rgb565, 69141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org int width) { 69241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 69341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(4) 69441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d24}, [%4] \n" 69541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(5) 69641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d25}, [%5] \n" 69741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d26, #128 \n" 69841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q14, #74 \n" 69941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q15, #16 \n" 70041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 70141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 70241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org READNV12 70341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org YUV422TORGB 70441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %3, %3, #8 \n" 70541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ARGBTORGB565 70641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(2) 70741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {q0}, [%2]! \n" // store 8 pixels RGB565. 70841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 70941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_y), // %0 71041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_uv), // %1 71141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_rgb565), // %2 71241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(width) // %3 71341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "r"(&kUVToRB), // %4 71441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "r"(&kUVToG) // %5 71541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q1", "q2", "q3", 71641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 71741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 71841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 71941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_NV12TORGB565ROW_NEON 72041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 72141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_NV21TORGB565ROW_NEON 72241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid NV21ToRGB565Row_NEON(const uint8* src_y, 72341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org const uint8* src_uv, 72441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org uint8* dst_rgb565, 72541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org int width) { 72641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 72741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(4) 72841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d24}, [%4] \n" 72941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(5) 73041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d25}, [%5] \n" 73141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d26, #128 \n" 73241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q14, #74 \n" 73341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q15, #16 \n" 73441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 73541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 73641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org READNV21 73741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org YUV422TORGB 73841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %3, %3, #8 \n" 73941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ARGBTORGB565 74041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(2) 74141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {q0}, [%2]! \n" // store 8 pixels RGB565. 74241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 74341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_y), // %0 74441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_uv), // %1 74541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_rgb565), // %2 74641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(width) // %3 74741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "r"(&kUVToRB), // %4 74841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "r"(&kUVToG) // %5 74941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q1", "q2", "q3", 75041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 75141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 75241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 75341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_NV21TORGB565ROW_NEON 75441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 75541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_YUY2TOARGBROW_NEON 75641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid YUY2ToARGBRow_NEON(const uint8* src_yuy2, 75741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org uint8* dst_argb, 75841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org int width) { 75941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 76041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(3) 76141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d24}, [%3] \n" 76241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(4) 76341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d25}, [%4] \n" 76441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d26, #128 \n" 76541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q14, #74 \n" 76641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q15, #16 \n" 76741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 76841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 76941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org READYUY2 77041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org YUV422TORGB 77141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %2, %2, #8 \n" 77241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d23, #255 \n" 77341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 77441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst4.8 {d20, d21, d22, d23}, [%1]! \n" 77541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 77641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_yuy2), // %0 77741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_argb), // %1 77841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(width) // %2 77941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "r"(&kUVToRB), // %3 78041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "r"(&kUVToG) // %4 78141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q1", "q2", "q3", 78241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 78341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 78441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 78541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_YUY2TOARGBROW_NEON 78641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 78741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_UYVYTOARGBROW_NEON 78841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid UYVYToARGBRow_NEON(const uint8* src_uyvy, 78941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org uint8* dst_argb, 79041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org int width) { 79141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 79241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(3) 79341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d24}, [%3] \n" 79441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(4) 79541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {d25}, [%4] \n" 79641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d26, #128 \n" 79741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q14, #74 \n" 79841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q15, #16 \n" 79941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 80041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 80141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org READUYVY 80241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org YUV422TORGB 80341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %2, %2, #8 \n" 80441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d23, #255 \n" 80541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 80641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst4.8 {d20, d21, d22, d23}, [%1]! \n" 80741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 80841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_uyvy), // %0 80941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_argb), // %1 81041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(width) // %2 81141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "r"(&kUVToRB), // %3 81241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "r"(&kUVToG) // %4 81341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q1", "q2", "q3", 81441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 81541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 81641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 81741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_UYVYTOARGBROW_NEON 81841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 81941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v. 82041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_SPLITUVROW_NEON 82141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, 82241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org int width) { 82341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 82441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 82541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 82641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 827d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld2 {v0.16b, v1.16b}, [%0], #32 \n" // load 16 pairs of UV 82841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %3, %3, #16 \n" // 16 processed per loop 82941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 830d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "st1 {v0.16b}, [%1], #16 \n" // store U 83141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(2) 832d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "st1 {v1.16b}, [%2], #16 \n" // store V 83341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 83441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_uv), // %0 83541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_u), // %1 83641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_v), // %2 83741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(width) // %3 // Output registers 83841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : // Input registers 839d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org : "cc", "memory", "v0", "v1" // Clobber List 84041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 84141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 84241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_SPLITUVROW_NEON 84341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 84441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Reads 16 U's and V's and writes out 16 pairs of UV. 84541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_MERGEUVROW_NEON 84641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv, 84741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org int width) { 84841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 84941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 85041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 85141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 852d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld1 {v0.16b}, [%0], #16 \n" // load U 85341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 854d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld1 {v1.16b}, [%1], #16 \n" // load V 85541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %3, %3, #16 \n" // 16 processed per loop 85641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(2) 857d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "st2 {v0.16b, v1.16b}, [%2], #32 \n" // store 16 pairs of UV 85841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 85941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 86041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_u), // %0 86141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_v), // %1 86241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_uv), // %2 86341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(width) // %3 // Output registers 86441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : // Input registers 865d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org : "cc", "memory", "v0", "v1" // Clobber List 86641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 86741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 86841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_MERGEUVROW_NEON 86941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 87041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Copy multiple of 32. vld4.8 allow unaligned and is fastest on a15. 87141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_COPYROW_NEON 87241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid CopyRow_NEON(const uint8* src, uint8* dst, int count) { 87341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 87441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 87541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 87641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 877d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld1 {v0.8b-v3.8b}, [%0], #32 \n" // load 32 87841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %2, %2, #32 \n" // 32 processed per loop 87941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 880d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "st1 {v0.8b-v3.8b}, [%1], #32 \n" // store 32 88141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 88241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src), // %0 88341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst), // %1 88441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(count) // %2 // Output registers 88541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : // Input registers 886d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List 88741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 88841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 88941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_COPYROW_NEON 89041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 89141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// SetRow8 writes 'count' bytes using a 32 bit value repeated. 89241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_SETROW_NEON 89341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid SetRow_NEON(uint8* dst, uint32 v32, int count) { 89441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 895d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "dup v0.4s, %w2 \n" // duplicate 4 ints 89641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 89741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %1, %1, #16 \n" // 16 bytes per loop 89841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 899d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "st1 {v0.16b}, [%0], #16 \n" // store 90041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 90141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(dst), // %0 90241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(count) // %1 90341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "r"(v32) // %2 904d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org : "cc", "memory", "v0" 90541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 90641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 90741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_SETROW_NEON 90841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 90941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// TODO(fbarchard): Make fully assembler 91041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// SetRow32 writes 'count' words using a 32 bit value repeated. 91141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBSETROWS_NEON 91241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBSetRows_NEON(uint8* dst, uint32 v32, int width, 91341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org int dst_stride, int height) { 91441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org for (int y = 0; y < height; ++y) { 91541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org SetRow_NEON(dst, v32, width << 2); 91641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org dst += dst_stride; 91741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org } 91841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 91941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_ARGBSETROWS_NEON 92041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 92141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_MIRRORROW_NEON 92241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid MirrorRow_NEON(const uint8* src, uint8* dst, int width) { 92341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 92441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org // Start at end of source row. 92541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "add %0, %0, %2 \n" 926d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "sub %0, %0, #16 \n" 92741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 92841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 92941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 93041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 931d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld1 {v0.16b}, [%0], %3 \n" // src -= 16 932d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "subs %2, %2, #16 \n" // 16 pixels per loop. 933d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "rev64 v0.16b, v0.16b \n" 93441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 935d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "st1 {v0.D}[1], [%1], #8 \n" // dst += 16 93641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 937d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "st1 {v0.D}[0], [%1], #8 \n" 93841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 93941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src), // %0 94041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst), // %1 94141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(width) // %2 942d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org : "r"((ptrdiff_t)-16) // %3 943d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org : "cc", "memory", "v0" 94441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 94541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 94641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_MIRRORROW_NEON 94741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 94841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_MIRRORUVROW_NEON 94941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, 95041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org int width) { 95141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 95241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org // Start at end of source row. 95341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "add %0, %0, %3, lsl #1 \n" 954d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "sub %0, %0, #16 \n" 95541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 95641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 95741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 95841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 959d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld2 {v0.8b, v1.8b}, [%0], %4 \n" // src -= 16 960d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "subs %3, %3, #8 \n" // 8 pixels per loop. 961d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "rev64 v0.8b, v0.8b \n" 962d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "rev64 v1.8b, v1.8b \n" 96341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 964d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "st1 {v0.8b}, [%1], #8 \n" // dst += 8 96541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(2) 966d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "st1 {v1.8b}, [%2], #8 \n" 96741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 96841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_uv), // %0 96941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_u), // %1 97041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_v), // %2 97141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(width) // %3 972d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org : "r"((ptrdiff_t)-16) // %4 973d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org : "cc", "memory", "v0", "v1" 97441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 97541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 97641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_MIRRORUVROW_NEON 97741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 97841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBMIRRORROW_NEON 97941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) { 98041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 98141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org // Start at end of source row. 98241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "add %0, %0, %2, lsl #2 \n" 983d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "sub %0, %0, #16 \n" 98441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 98541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 98641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 98741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 988d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld1 {v0.16b}, [%0], %3 \n" // src -= 16 989d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "subs %2, %2, #4 \n" // 4 pixels per loop. 990d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "rev64 v0.4s, v0.4s \n" 99141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 992d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "st1 {v0.D}[1], [%1], #8 \n" // dst += 16 99341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 994d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "st1 {v0.D}[0], [%1], #8 \n" 99541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 99641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src), // %0 99741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst), // %1 99841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(width) // %2 999d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org : "r"((ptrdiff_t)-16) // %3 1000d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org : "cc", "memory", "v0" 100141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 100241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 100341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_ARGBMIRRORROW_NEON 100441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 100541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_RGB24TOARGBROW_NEON 100641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix) { 100741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 1008d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "movi v4.8b, #255 \n" // Alpha 100941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 101041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 101141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 1012d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld3 {v1.8b-v3.8b}, [%0], #24 \n" // load 8 pixels of RGB24. 101341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %2, %2, #8 \n" // 8 processed per loop. 101441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 1015d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "st4 {v1.8b-v4.8b}, [%1], #32 \n" // store 8 pixels of ARGB. 101641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 101741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_rgb24), // %0 101841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_argb), // %1 101941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(pix) // %2 102041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 1021d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org : "cc", "memory", "v1", "v2", "v3", "v4" // Clobber List 102241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 102341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 102441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_RGB24TOARGBROW_NEON 102541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 102641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_RAWTOARGBROW_NEON 102741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int pix) { 102841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 1029d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "movi v5.8b, #255 \n" // Alpha 103041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 103141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 103241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 1033d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld3 {v0.8b-v2.8b}, [%0], #24 \n" // read r g b 103441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %2, %2, #8 \n" // 8 processed per loop. 1035d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "mov v3.8b, v1.8b \n" // move g 1036d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "mov v4.8b, v0.8b \n" // move r 103741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 1038d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "st4 {v2.8b-v5.8b}, [%1], #32 \n" // store b g r a 103941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 104041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_raw), // %0 104141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_argb), // %1 104241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(pix) // %2 104341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 1044d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5" // Clobber List 104541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 104641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 104741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_RAWTOARGBROW_NEON 104841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 104941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#define RGB565TOARGB \ 105041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vshrn.u16 d6, q0, #5 \n" /* G xxGGGGGG */ \ 105141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vuzp.u8 d0, d1 \n" /* d0 xxxBBBBB RRRRRxxx */ \ 105241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vshl.u8 d6, d6, #2 \n" /* G GGGGGG00 upper 6 */ \ 105341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vshr.u8 d1, d1, #3 \n" /* R 000RRRRR lower 5 */ \ 105441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vshl.u8 q0, q0, #3 \n" /* B,R BBBBB000 upper 5 */ \ 105541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vshr.u8 q2, q0, #5 \n" /* B,R 00000BBB lower 3 */ \ 105641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vorr.u8 d0, d0, d4 \n" /* B */ \ 105741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vshr.u8 d4, d6, #6 \n" /* G 000000GG lower 2 */ \ 105841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vorr.u8 d2, d1, d5 \n" /* R */ \ 105941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vorr.u8 d1, d4, d6 \n" /* G */ 106041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 106141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_RGB565TOARGBROW_NEON 106241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int pix) { 106341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 106441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d3, #255 \n" // Alpha 106541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 106641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 106741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 106841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {q0}, [%0]! \n" // load 8 RGB565 pixels. 106941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %2, %2, #8 \n" // 8 processed per loop. 107041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org RGB565TOARGB 107141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 107241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB. 107341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 107441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_rgb565), // %0 107541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_argb), // %1 107641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(pix) // %2 107741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 107841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List 107941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 108041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 108141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_RGB565TOARGBROW_NEON 108241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 108341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#define ARGB1555TOARGB \ 108441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vshrn.u16 d7, q0, #8 \n" /* A Arrrrrxx */ \ 108541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vshr.u8 d6, d7, #2 \n" /* R xxxRRRRR */ \ 108641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vshrn.u16 d5, q0, #5 \n" /* G xxxGGGGG */ \ 108741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmovn.u16 d4, q0 \n" /* B xxxBBBBB */ \ 108841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vshr.u8 d7, d7, #7 \n" /* A 0000000A */ \ 108941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vneg.s8 d7, d7 \n" /* A AAAAAAAA upper 8 */ \ 109041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vshl.u8 d6, d6, #3 \n" /* R RRRRR000 upper 5 */ \ 109141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vshr.u8 q1, q3, #5 \n" /* R,A 00000RRR lower 3 */ \ 109241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vshl.u8 q0, q2, #3 \n" /* B,G BBBBB000 upper 5 */ \ 109341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vshr.u8 q2, q0, #5 \n" /* B,G 00000BBB lower 3 */ \ 109441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vorr.u8 q1, q1, q3 \n" /* R,A */ \ 109541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vorr.u8 q0, q0, q2 \n" /* B,G */ \ 109641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 109741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// RGB555TOARGB is same as ARGB1555TOARGB but ignores alpha. 109841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#define RGB555TOARGB \ 109941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vshrn.u16 d6, q0, #5 \n" /* G xxxGGGGG */ \ 110041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vuzp.u8 d0, d1 \n" /* d0 xxxBBBBB xRRRRRxx */ \ 110141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vshl.u8 d6, d6, #3 \n" /* G GGGGG000 upper 5 */ \ 110241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vshr.u8 d1, d1, #2 \n" /* R 00xRRRRR lower 5 */ \ 110341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vshl.u8 q0, q0, #3 \n" /* B,R BBBBB000 upper 5 */ \ 110441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vshr.u8 q2, q0, #5 \n" /* B,R 00000BBB lower 3 */ \ 110541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vorr.u8 d0, d0, d4 \n" /* B */ \ 110641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vshr.u8 d4, d6, #5 \n" /* G 00000GGG lower 3 */ \ 110741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vorr.u8 d2, d1, d5 \n" /* R */ \ 110841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vorr.u8 d1, d4, d6 \n" /* G */ 110941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 111041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGB1555TOARGBROW_NEON 111141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb, 111241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org int pix) { 111341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 111441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d3, #255 \n" // Alpha 111541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 111641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 111741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 111841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {q0}, [%0]! \n" // load 8 ARGB1555 pixels. 111941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %2, %2, #8 \n" // 8 processed per loop. 112041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ARGB1555TOARGB 112141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 112241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB. 112341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 112441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_argb1555), // %0 112541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_argb), // %1 112641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(pix) // %2 112741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 112841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List 112941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 113041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 113141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_ARGB1555TOARGBROW_NEON 113241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 113341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#define ARGB4444TOARGB \ 113441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vuzp.u8 d0, d1 \n" /* d0 BG, d1 RA */ \ 113541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vshl.u8 q2, q0, #4 \n" /* B,R BBBB0000 */ \ 113641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vshr.u8 q1, q0, #4 \n" /* G,A 0000GGGG */ \ 113741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vshr.u8 q0, q2, #4 \n" /* B,R 0000BBBB */ \ 113841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vorr.u8 q0, q0, q2 \n" /* B,R BBBBBBBB */ \ 113941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vshl.u8 q2, q1, #4 \n" /* G,A GGGG0000 */ \ 114041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vorr.u8 q1, q1, q2 \n" /* G,A GGGGGGGG */ \ 114141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vswp.u8 d1, d2 \n" /* B,R,G,A -> B,G,R,A */ 114241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 114341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGB4444TOARGBROW_NEON 114441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb, 114541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org int pix) { 114641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 114741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d3, #255 \n" // Alpha 114841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 114941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 115041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 115141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {q0}, [%0]! \n" // load 8 ARGB4444 pixels. 115241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %2, %2, #8 \n" // 8 processed per loop. 115341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ARGB4444TOARGB 115441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 115541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB. 115641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 115741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_argb4444), // %0 115841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_argb), // %1 115941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(pix) // %2 116041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 116141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q1", "q2" // Clobber List 116241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 116341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 116441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_ARGB4444TOARGBROW_NEON 116541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 116641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBTORGB24ROW_NEON 116741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb24, int pix) { 116841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 116941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 117041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 117141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 1172d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld4 {v1.8b-v4.8b}, [%0], #32 \n" // load 8 pixels of ARGB. 117341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %2, %2, #8 \n" // 8 processed per loop. 117441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 1175d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "st3 {v1.8b-v3.8b}, [%1], #24 \n" // store 8 pixels of RGB24. 117641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 117741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_argb), // %0 117841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_rgb24), // %1 117941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(pix) // %2 118041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 1181d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org : "cc", "memory", "v1", "v2", "v3", "v4" // Clobber List 118241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 118341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 118441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_ARGBTORGB24ROW_NEON 118541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 118641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBTORAWROW_NEON 118741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_raw, int pix) { 118841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 118941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 119041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 119141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 1192d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld4 {v1.8b-v4.8b}, [%0], #32 \n" // load b g r a 119341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %2, %2, #8 \n" // 8 processed per loop. 1194d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "mov v4.8b, v2.8b \n" // mov g 1195d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "mov v5.8b, v1.8b \n" // mov b 119641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 1197d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "st3 {v3.8b-v5.8b}, [%1], #24 \n" // store r g b 119841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 119941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_argb), // %0 120041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_raw), // %1 120141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(pix) // %2 120241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 1203d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org : "cc", "memory", "v1", "v2", "v3", "v4", "v5" // Clobber List 120441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 120541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 120641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_ARGBTORAWROW_NEON 120741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 120841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_YUY2TOYROW_NEON 120941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int pix) { 121041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 121141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 121241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 121341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 1214d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld2 {v0.16b, v1.16b}, [%0], #32 \n" // load 16 pixels of YUY2. 121541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %2, %2, #16 \n" // 16 processed per loop. 121641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 1217d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "st1 {v0.16b}, [%1], #16 \n" // store 16 pixels of Y. 121841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 121941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_yuy2), // %0 122041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_y), // %1 122141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(pix) // %2 122241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 1223d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org : "cc", "memory", "v0", "v1" // Clobber List 122441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 122541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 122641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_YUY2TOYROW_NEON 122741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 122841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_UYVYTOYROW_NEON 122941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int pix) { 123041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 123141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 123241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 123341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 1234d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld2 {v0.16b, v1.16b}, [%0], #32 \n" // load 16 pixels of UYVY. 123541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %2, %2, #16 \n" // 16 processed per loop. 123641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 1237d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "st1 {v1.16b}, [%1], #16 \n" // store 16 pixels of Y. 123841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 123941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_uyvy), // %0 124041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_y), // %1 124141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(pix) // %2 124241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 1243d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org : "cc", "memory", "v0", "v1" // Clobber List 124441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 124541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 124641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_UYVYTOYROW_NEON 124741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 124841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_YUY2TOUV422ROW_NEON 124941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid YUY2ToUV422Row_NEON(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v, 125041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org int pix) { 125141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 125241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 125341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 125441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 1255d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 16 pixels of YUY2. 125641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %3, %3, #16 \n" // 16 pixels = 8 UVs. 125741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 1258d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "st1 {v1.8b}, [%1], #8 \n" // store 8 U. 125941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(2) 1260d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "st1 {v3.8b}, [%2], #8 \n" // store 8 V. 126141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 126241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_yuy2), // %0 126341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_u), // %1 126441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_v), // %2 126541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(pix) // %3 126641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 1267d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List 126841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 126941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 127041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_YUY2TOUV422ROW_NEON 127141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 127241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_UYVYTOUV422ROW_NEON 127341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid UYVYToUV422Row_NEON(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v, 127441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org int pix) { 127541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 127641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 127741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 127841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 1279d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 16 pixels of UYVY. 128041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %3, %3, #16 \n" // 16 pixels = 8 UVs. 128141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 1282d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "st1 {v0.8b}, [%1], #8 \n" // store 8 U. 128341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(2) 1284d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "st1 {v2.8b}, [%2], #8 \n" // store 8 V. 128541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 128641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_uyvy), // %0 128741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_u), // %1 128841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_v), // %2 128941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(pix) // %3 129041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 1291d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List 129241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 129341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 129441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_UYVYTOUV422ROW_NEON 129541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 129641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_YUY2TOUVROW_NEON 129741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2, 129841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org uint8* dst_u, uint8* dst_v, int pix) { 129941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 1300d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "add %x1, %x0, %w1, sxtw \n" // stride + src_yuy2 130141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 130241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 130341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 1304d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 16 pixels of YUY2. 130541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %4, %4, #16 \n" // 16 pixels = 8 UVs. 130641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 1307d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld4 {v4.8b-v7.8b}, [%1], #32 \n" // load next row YUY2. 1308d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "urhadd v1.8b, v1.8b, v5.8b \n" // average rows of U 1309d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "urhadd v3.8b, v3.8b, v7.8b \n" // average rows of V 131041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(2) 1311d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "st1 {v1.8b}, [%2], #8 \n" // store 8 U. 131241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(3) 1313d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "st1 {v3.8b}, [%3], #8 \n" // store 8 V. 131441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 131541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_yuy2), // %0 131641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(stride_yuy2), // %1 131741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_u), // %2 131841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_v), // %3 131941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(pix) // %4 132041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 1321d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7" // Clobber List 132241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 132341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 132441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_YUY2TOUVROW_NEON 132541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 132641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_UYVYTOUVROW_NEON 132741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy, 132841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org uint8* dst_u, uint8* dst_v, int pix) { 132941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 1330d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "add %x1, %x0, %w1, sxtw \n" // stride + src_uyvy 133141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 133241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 133341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 1334d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 16 pixels of UYVY. 133541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %4, %4, #16 \n" // 16 pixels = 8 UVs. 133641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 1337d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld4 {v4.8b-v7.8b}, [%1], #32 \n" // load next row UYVY. 1338d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "urhadd v0.8b, v0.8b, v4.8b \n" // average rows of U 1339d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "urhadd v2.8b, v2.8b, v6.8b \n" // average rows of V 134041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(2) 1341d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "st1 {v0.8b}, [%2], #8 \n" // store 8 U. 134241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(3) 1343d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "st1 {v2.8b}, [%3], #8 \n" // store 8 V. 134441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 134541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_uyvy), // %0 134641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(stride_uyvy), // %1 134741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_u), // %2 134841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_v), // %3 134941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(pix) // %4 135041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 1351d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7" // Clobber List 135241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 135341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 135441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_UYVYTOUVROW_NEON 135541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 135641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_HALFROW_NEON 135741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid HalfRow_NEON(const uint8* src_uv, int src_uv_stride, 135841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org uint8* dst_uv, int pix) { 135941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 136041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org // change the stride to row 2 pointer 1361d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "add %x1, %x0, %w1, sxtw \n" 136241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 136341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 1364d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld1 {v0.16b}, [%0], #16 \n" // load row 1 16 pixels. 136541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %3, %3, #16 \n" // 16 processed per loop 136641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 1367d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld1 {v1.16b}, [%1], #16 \n" // load row 2 16 pixels. 1368d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "urhadd v0.16b, v0.16b, v1.16b \n" // average row 1 and 2 136941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(2) 1370d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "st1 {v0.16b}, [%2], #16 \n" 137141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 137241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_uv), // %0 137341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_uv_stride), // %1 137441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_uv), // %2 137541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(pix) // %3 137641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 1377d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org : "cc", "memory", "v0", "v1" // Clobber List 137841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 137941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 138041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_HALFROW_NEON 138141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 138241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Select 2 channels from ARGB on alternating pixels. e.g. BGBGBGBG 138341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBTOBAYERROW_NEON 138441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBToBayerRow_NEON(const uint8* src_argb, uint8* dst_bayer, 138541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org uint32 selector, int pix) { 138641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 1387d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "mov v2.s[0], %w3 \n" // selector 138841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 138941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 1390d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld1 {v0.16b, v1.16b}, [%0], 32 \n" // load row 8 pixels. 139141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %2, %2, #8 \n" // 8 processed per loop 1392d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "tbl v4.8b, {v0.16b}, v2.8b \n" // look up 4 pixels 1393d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "tbl v5.8b, {v1.16b}, v2.8b \n" // look up 4 pixels 1394d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "trn1 v4.4s, v4.4s, v5.4s \n" // combine 8 pixels 139541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 1396d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "st1 {v4.8b}, [%1], #8 \n" // store 8. 139741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 139841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_argb), // %0 139941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_bayer), // %1 140041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(pix) // %2 140141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "r"(selector) // %3 1402d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org : "cc", "memory", "v0", "v1", "v2", "v4", "v5" // Clobber List 140341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 140441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 140541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_ARGBTOBAYERROW_NEON 140641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 140741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Select G channels from ARGB. e.g. GGGGGGGG 140841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBTOBAYERGGROW_NEON 140941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBToBayerGGRow_NEON(const uint8* src_argb, uint8* dst_bayer, 141041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org uint32 /*selector*/, int pix) { 141141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 141241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 141341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 1414d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load row 8 pixels. 141541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %2, %2, #8 \n" // 8 processed per loop 141641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 1417d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "st1 {v1.8b}, [%1], #8 \n" // store 8 G's. 141841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 141941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_argb), // %0 142041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_bayer), // %1 142141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(pix) // %2 142241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 1423d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List 142441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 142541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 142641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_ARGBTOBAYERGGROW_NEON 142741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 142841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. 142941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBSHUFFLEROW_NEON 143041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb, 143141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org const uint8* shuffler, int pix) { 143241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 143341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(3) 1434d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld1 {v2.16b}, [%3] \n" // shuffler 143541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 143641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 1437d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld1 {v0.16b}, [%0], #16 \n" // load 4 pixels. 143841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %2, %2, #4 \n" // 4 processed per loop 1439d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "tbl v1.16b, {v0.16b}, v2.16b \n" // look up 4 pixels 144041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 1441d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "st1 {v1.16b}, [%1], #16 \n" // store 4. 144241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 144341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_argb), // %0 144441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_argb), // %1 144541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(pix) // %2 144641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "r"(shuffler) // %3 1447d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org : "cc", "memory", "v0", "v1", "v2" // Clobber List 144841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 144941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 145041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_ARGBSHUFFLEROW_NEON 145141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 145241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_I422TOYUY2ROW_NEON 145341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid I422ToYUY2Row_NEON(const uint8* src_y, 145441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org const uint8* src_u, 145541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org const uint8* src_v, 145641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org uint8* dst_yuy2, int width) { 145741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 145841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 145941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 146041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 1461d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld2 {v0.8b, v1.8b}, [%0], #16 \n" // load 16 Ys 1462d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "mov v2.8b, v1.8b \n" 146341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 1464d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld1 {v1.8b}, [%1], #8 \n" // load 8 Us 146541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(2) 1466d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld1 {v3.8b}, [%2], #8 \n" // load 8 Vs 146741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %4, %4, #16 \n" // 16 pixels 146841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(3) 1469d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "st4 {v0.8b-v3.8b}, [%3], #32 \n" // Store 8 YUY2/16 pixels. 147041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 147141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_y), // %0 147241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_u), // %1 147341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_v), // %2 147441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_yuy2), // %3 147541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(width) // %4 147641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 1477d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org : "cc", "memory", "v0", "v1", "v2", "v3" 147841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 147941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 148041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_I422TOYUY2ROW_NEON 148141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 148241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_I422TOUYVYROW_NEON 148341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid I422ToUYVYRow_NEON(const uint8* src_y, 148441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org const uint8* src_u, 148541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org const uint8* src_v, 148641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org uint8* dst_uyvy, int width) { 148741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 148841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 148941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 149041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 1491d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld2 {v1.8b, v2.8b}, [%0], #16 \n" // load 16 Ys 1492d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "mov v3.8b, v2.8b \n" 149341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 1494d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld1 {v0.8b}, [%1], #8 \n" // load 8 Us 149541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(2) 1496d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld1 {v2.8b}, [%2], #8 \n" // load 8 Vs 149741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %4, %4, #16 \n" // 16 pixels 149841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(3) 1499d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "st4 {v0.8b-v3.8b}, [%3], #32 \n" // Store 8 UYVY/16 pixels. 150041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 150141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_y), // %0 150241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_u), // %1 150341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_v), // %2 150441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_uyvy), // %3 150541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(width) // %4 150641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 1507d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org : "cc", "memory", "v0", "v1", "v2", "v3" 150841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 150941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 151041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_I422TOUYVYROW_NEON 151141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 151241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBTORGB565ROW_NEON 151341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb565, int pix) { 151441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 151541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 151641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 151741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 151841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld4.8 {d20, d21, d22, d23}, [%0]! \n" // load 8 pixels of ARGB. 151941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %2, %2, #8 \n" // 8 processed per loop. 152041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ARGBTORGB565 152141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 152241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {q0}, [%1]! \n" // store 8 pixels RGB565. 152341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 152441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_argb), // %0 152541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_rgb565), // %1 152641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(pix) // %2 152741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 152841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q8", "q9", "q10", "q11" 152941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 153041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 153141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_ARGBTORGB565ROW_NEON 153241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 153341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBTOARGB1555ROW_NEON 153441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_argb1555, 153541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org int pix) { 153641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 153741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 153841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 153941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 154041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld4.8 {d20, d21, d22, d23}, [%0]! \n" // load 8 pixels of ARGB. 154141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %2, %2, #8 \n" // 8 processed per loop. 154241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ARGBTOARGB1555 154341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 154441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {q0}, [%1]! \n" // store 8 pixels ARGB1555. 154541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 154641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_argb), // %0 154741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_argb1555), // %1 154841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(pix) // %2 154941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 155041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q8", "q9", "q10", "q11" 155141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 155241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 155341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_ARGBTOARGB1555ROW_NEON 155441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 155541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBTOARGB4444ROW_NEON 155641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_argb4444, 155741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org int pix) { 155841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 155941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d4, #0x0f \n" // bits to clear with vbic. 156041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 156141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 156241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 156341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld4.8 {d20, d21, d22, d23}, [%0]! \n" // load 8 pixels of ARGB. 156441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %2, %2, #8 \n" // 8 processed per loop. 156541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ARGBTOARGB4444 156641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 156741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {q0}, [%1]! \n" // store 8 pixels ARGB4444. 156841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 156941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_argb), // %0 157041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_argb4444), // %1 157141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(pix) // %2 157241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 157341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q8", "q9", "q10", "q11" 157441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 157541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 157641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_ARGBTOARGB4444ROW_NEON 157741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 157841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBTOYROW_NEON 157941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) { 158041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 1581d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "movi v4.8b, #13 \n" // B * 0.1016 coefficient 1582d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "movi v5.8b, #65 \n" // G * 0.5078 coefficient 1583d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "movi v6.8b, #33 \n" // R * 0.2578 coefficient 1584d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "movi v7.8b, #16 \n" // Add 16 constant 158541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 158641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 158741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 1588d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 8 ARGB pixels. 158941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %2, %2, #8 \n" // 8 processed per loop. 1590d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "umull v3.8h, v0.8b, v4.8b \n" // B 1591d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "umlal v3.8h, v1.8b, v5.8b \n" // G 1592d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "umlal v3.8h, v2.8b, v6.8b \n" // R 1593d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "sqrshrun v0.8b, v3.8h, #7 \n" // 16 bit to 8 bit Y 1594d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "uqadd v0.8b, v0.8b, v7.8b \n" 159541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 1596d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. 159741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 159841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_argb), // %0 159941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_y), // %1 160041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(pix) // %2 160141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 1602d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7" 160341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 160441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 160541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_ARGBTOYROW_NEON 160641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 160741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBTOYJROW_NEON 160841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) { 160941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 1610d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "movi v4.8b, #15 \n" // B * 0.11400 coefficient 1611d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "movi v5.8b, #75 \n" // G * 0.58700 coefficient 1612d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "movi v6.8b, #38 \n" // R * 0.29900 coefficient 161341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 161441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 161541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 1616d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 8 ARGB pixels. 161741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %2, %2, #8 \n" // 8 processed per loop. 1618d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "umull v3.8h, v0.8b, v4.8b \n" // B 1619d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "umlal v3.8h, v1.8b, v5.8b \n" // G 1620d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "umlal v3.8h, v2.8b, v6.8b \n" // R 1621d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "sqrshrun v0.8b, v3.8h, #7 \n" // 15 bit to 8 bit Y 162241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 1623d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. 162441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 162541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_argb), // %0 162641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_y), // %1 162741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(pix) // %2 162841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 1629d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6" 163041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 163141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 163241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_ARGBTOYJROW_NEON 163341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 163441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// 8x1 pixels. 163541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBTOUV444ROW_NEON 163641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, 163741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org int pix) { 163841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 163941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d24, #112 \n" // UB / VR 0.875 coefficient 164041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d25, #74 \n" // UG -0.5781 coefficient 164141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d26, #38 \n" // UR -0.2969 coefficient 164241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d27, #18 \n" // VB -0.1406 coefficient 164341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d28, #94 \n" // VG -0.7344 coefficient 164441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q15, #0x8080 \n" // 128.5 164541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 164641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 164741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 164841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. 164941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %3, %3, #8 \n" // 8 processed per loop. 165041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmull.u8 q2, d0, d24 \n" // B 165141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmlsl.u8 q2, d1, d25 \n" // G 165241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmlsl.u8 q2, d2, d26 \n" // R 165341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vadd.u16 q2, q2, q15 \n" // +128 -> unsigned 165441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 165541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmull.u8 q3, d2, d24 \n" // R 165641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmlsl.u8 q3, d1, d28 \n" // G 165741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmlsl.u8 q3, d0, d27 \n" // B 165841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vadd.u16 q3, q3, q15 \n" // +128 -> unsigned 165941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 166041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqshrn.u16 d0, q2, #8 \n" // 16 bit to 8 bit U 166141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqshrn.u16 d1, q3, #8 \n" // 16 bit to 8 bit V 166241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 166341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 166441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {d0}, [%1]! \n" // store 8 pixels U. 166541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(2) 166641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {d1}, [%2]! \n" // store 8 pixels V. 166741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 166841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_argb), // %0 166941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_u), // %1 167041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_v), // %2 167141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(pix) // %3 167241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 167341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q12", "q13", "q14", "q15" 167441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 167541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 167641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_ARGBTOUV444ROW_NEON 167741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 167841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// 16x1 pixels -> 8x1. pix is number of argb pixels. e.g. 16. 167941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBTOUV422ROW_NEON 168041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, 168141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org int pix) { 168241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 168341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient 168441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient 168541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient 168641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient 168741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient 168841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q15, #0x8080 \n" // 128.5 168941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 169041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 169141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 169241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. 169341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 169441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels. 169541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 169641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts. 169741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. 169841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts. 169941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 170041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %3, %3, #16 \n" // 16 processed per loop. 170141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmul.s16 q8, q0, q10 \n" // B 170241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmls.s16 q8, q1, q11 \n" // G 170341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmls.s16 q8, q2, q12 \n" // R 170441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned 170541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 170641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmul.s16 q9, q2, q10 \n" // R 170741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmls.s16 q9, q1, q14 \n" // G 170841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmls.s16 q9, q0, q13 \n" // B 170941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned 171041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 171141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U 171241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V 171341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 171441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 171541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {d0}, [%1]! \n" // store 8 pixels U. 171641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(2) 171741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {d1}, [%2]! \n" // store 8 pixels V. 171841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 171941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_argb), // %0 172041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_u), // %1 172141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_v), // %2 172241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(pix) // %3 172341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 172441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q1", "q2", "q3", 172541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 172641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 172741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 172841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_ARGBTOUV422ROW_NEON 172941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 173041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// 32x1 pixels -> 8x1. pix is number of argb pixels. e.g. 32. 173141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBTOUV411ROW_NEON 173241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, 173341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org int pix) { 173441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 173541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient 173641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient 173741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient 173841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient 173941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient 174041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q15, #0x8080 \n" // 128.5 174141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 174241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 174341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 174441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. 174541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 174641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels. 174741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts. 174841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. 174941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts. 175041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 175141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld4.8 {d8, d10, d12, d14}, [%0]! \n" // load 8 more ARGB pixels. 175241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 175341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld4.8 {d9, d11, d13, d15}, [%0]! \n" // load last 8 ARGB pixels. 175441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 q4, q4 \n" // B 16 bytes -> 8 shorts. 175541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 q5, q5 \n" // G 16 bytes -> 8 shorts. 175641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 q6, q6 \n" // R 16 bytes -> 8 shorts. 175741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 175841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpadd.u16 d0, d0, d1 \n" // B 16 shorts -> 8 shorts. 175941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpadd.u16 d1, d8, d9 \n" // B 176041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpadd.u16 d2, d2, d3 \n" // G 16 shorts -> 8 shorts. 176141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpadd.u16 d3, d10, d11 \n" // G 176241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpadd.u16 d4, d4, d5 \n" // R 16 shorts -> 8 shorts. 176341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpadd.u16 d5, d12, d13 \n" // R 176441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 176541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vrshr.u16 q0, q0, #1 \n" // 2x average 176641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vrshr.u16 q1, q1, #1 \n" 176741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vrshr.u16 q2, q2, #1 \n" 176841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 176941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %3, %3, #32 \n" // 32 processed per loop. 177041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmul.s16 q8, q0, q10 \n" // B 177141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmls.s16 q8, q1, q11 \n" // G 177241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmls.s16 q8, q2, q12 \n" // R 177341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned 177441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmul.s16 q9, q2, q10 \n" // R 177541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmls.s16 q9, q1, q14 \n" // G 177641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmls.s16 q9, q0, q13 \n" // B 177741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned 177841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U 177941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V 178041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 178141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {d0}, [%1]! \n" // store 8 pixels U. 178241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(2) 178341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {d1}, [%2]! \n" // store 8 pixels V. 178441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 178541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_argb), // %0 178641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_u), // %1 178741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_v), // %2 178841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(pix) // %3 178941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 179041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", 179141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 179241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 179341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 179441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_ARGBTOUV411ROW_NEON 179541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 179641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// 16x2 pixels -> 8x1. pix is number of argb pixels. e.g. 16. 179741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#define RGBTOUV(QB, QG, QR) \ 179841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmul.s16 q8, " #QB ", q10 \n" /* B */ \ 179941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmls.s16 q8, " #QG ", q11 \n" /* G */ \ 180041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmls.s16 q8, " #QR ", q12 \n" /* R */ \ 180141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vadd.u16 q8, q8, q15 \n" /* +128 -> unsigned */ \ 180241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmul.s16 q9, " #QR ", q10 \n" /* R */ \ 180341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmls.s16 q9, " #QG ", q14 \n" /* G */ \ 180441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmls.s16 q9, " #QB ", q13 \n" /* B */ \ 180541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vadd.u16 q9, q9, q15 \n" /* +128 -> unsigned */ \ 180641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqshrn.u16 d0, q8, #8 \n" /* 16 bit to 8 bit U */ \ 180741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqshrn.u16 d1, q9, #8 \n" /* 16 bit to 8 bit V */ 180841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 180941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// TODO(fbarchard): Consider vhadd vertical, then vpaddl horizontal, avoid shr. 181041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBTOUVROW_NEON 181141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb, 181241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org uint8* dst_u, uint8* dst_v, int pix) { 181341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 181441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "add %1, %0, %1 \n" // src_stride + src_argb 181541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient 181641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient 181741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient 181841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient 181941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient 182041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q15, #0x8080 \n" // 128.5 182141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 182241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 182341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 182441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. 182541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 182641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels. 182741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts. 182841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. 182941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts. 183041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 183141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more ARGB pixels. 183241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 183341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 ARGB pixels. 183441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts. 183541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts. 183641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpadal.u8 q2, q6 \n" // R 16 bytes -> 8 shorts. 183741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 183841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vrshr.u16 q0, q0, #1 \n" // 2x average 183941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vrshr.u16 q1, q1, #1 \n" 184041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vrshr.u16 q2, q2, #1 \n" 184141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 184241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %4, %4, #16 \n" // 32 processed per loop. 184341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org RGBTOUV(q0, q1, q2) 184441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(2) 184541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. 184641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(3) 184741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. 184841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 184941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_argb), // %0 185041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_stride_argb), // %1 185141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_u), // %2 185241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_v), // %3 185341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(pix) // %4 185441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 185541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", 185641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 185741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 185841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 185941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_ARGBTOUVROW_NEON 186041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 186141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// TODO(fbarchard): Subsample match C code. 186241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBTOUVJROW_NEON 186341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb, 186441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org uint8* dst_u, uint8* dst_v, int pix) { 186541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 186641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "add %1, %0, %1 \n" // src_stride + src_argb 186741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q10, #127 / 2 \n" // UB / VR 0.500 coefficient 186841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q11, #84 / 2 \n" // UG -0.33126 coefficient 186941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q12, #43 / 2 \n" // UR -0.16874 coefficient 187041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q13, #20 / 2 \n" // VB -0.08131 coefficient 187141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q14, #107 / 2 \n" // VG -0.41869 coefficient 187241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q15, #0x8080 \n" // 128.5 187341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 187441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 187541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 187641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. 187741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 187841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels. 187941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts. 188041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. 188141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts. 188241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 188341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more ARGB pixels. 188441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 188541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 ARGB pixels. 188641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts. 188741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts. 188841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpadal.u8 q2, q6 \n" // R 16 bytes -> 8 shorts. 188941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 189041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vrshr.u16 q0, q0, #1 \n" // 2x average 189141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vrshr.u16 q1, q1, #1 \n" 189241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vrshr.u16 q2, q2, #1 \n" 189341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 189441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %4, %4, #16 \n" // 32 processed per loop. 189541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org RGBTOUV(q0, q1, q2) 189641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(2) 189741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. 189841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(3) 189941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. 190041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 190141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_argb), // %0 190241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_stride_argb), // %1 190341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_u), // %2 190441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_v), // %3 190541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(pix) // %4 190641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 190741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", 190841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 190941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 191041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 191141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_ARGBTOUVJROW_NEON 191241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 191341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_BGRATOUVROW_NEON 191441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra, 191541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org uint8* dst_u, uint8* dst_v, int pix) { 191641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 191741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "add %1, %0, %1 \n" // src_stride + src_bgra 191841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient 191941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient 192041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient 192141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient 192241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient 192341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q15, #0x8080 \n" // 128.5 192441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 192541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 192641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 192741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 BGRA pixels. 192841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 192941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 BGRA pixels. 193041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 q3, q3 \n" // B 16 bytes -> 8 shorts. 193141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 q2, q2 \n" // G 16 bytes -> 8 shorts. 193241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 q1, q1 \n" // R 16 bytes -> 8 shorts. 193341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 193441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more BGRA pixels. 193541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 193641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 BGRA pixels. 193741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpadal.u8 q3, q7 \n" // B 16 bytes -> 8 shorts. 193841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpadal.u8 q2, q6 \n" // G 16 bytes -> 8 shorts. 193941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpadal.u8 q1, q5 \n" // R 16 bytes -> 8 shorts. 194041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 194141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vrshr.u16 q1, q1, #1 \n" // 2x average 194241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vrshr.u16 q2, q2, #1 \n" 194341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vrshr.u16 q3, q3, #1 \n" 194441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 194541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %4, %4, #16 \n" // 32 processed per loop. 194641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org RGBTOUV(q3, q2, q1) 194741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(2) 194841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. 194941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(3) 195041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. 195141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 195241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_bgra), // %0 195341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_stride_bgra), // %1 195441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_u), // %2 195541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_v), // %3 195641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(pix) // %4 195741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 195841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", 195941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 196041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 196141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 196241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_BGRATOUVROW_NEON 196341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 196441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ABGRTOUVROW_NEON 196541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr, 196641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org uint8* dst_u, uint8* dst_v, int pix) { 196741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 196841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "add %1, %0, %1 \n" // src_stride + src_abgr 196941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient 197041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient 197141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient 197241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient 197341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient 197441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q15, #0x8080 \n" // 128.5 197541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 197641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 197741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 197841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ABGR pixels. 197941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 198041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ABGR pixels. 198141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 q2, q2 \n" // B 16 bytes -> 8 shorts. 198241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. 198341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 q0, q0 \n" // R 16 bytes -> 8 shorts. 198441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 198541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more ABGR pixels. 198641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 198741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 ABGR pixels. 198841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpadal.u8 q2, q6 \n" // B 16 bytes -> 8 shorts. 198941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts. 199041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpadal.u8 q0, q4 \n" // R 16 bytes -> 8 shorts. 199141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 199241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vrshr.u16 q0, q0, #1 \n" // 2x average 199341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vrshr.u16 q1, q1, #1 \n" 199441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vrshr.u16 q2, q2, #1 \n" 199541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 199641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %4, %4, #16 \n" // 32 processed per loop. 199741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org RGBTOUV(q2, q1, q0) 199841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(2) 199941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. 200041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(3) 200141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. 200241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 200341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_abgr), // %0 200441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_stride_abgr), // %1 200541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_u), // %2 200641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_v), // %3 200741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(pix) // %4 200841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 200941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", 201041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 201141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 201241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 201341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_ABGRTOUVROW_NEON 201441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 201541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_RGBATOUVROW_NEON 201641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba, 201741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org uint8* dst_u, uint8* dst_v, int pix) { 201841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 201941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "add %1, %0, %1 \n" // src_stride + src_rgba 202041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient 202141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient 202241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient 202341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient 202441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient 202541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q15, #0x8080 \n" // 128.5 202641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 202741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 202841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 202941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 RGBA pixels. 203041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 203141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 RGBA pixels. 203241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 q0, q1 \n" // B 16 bytes -> 8 shorts. 203341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 q1, q2 \n" // G 16 bytes -> 8 shorts. 203441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 q2, q3 \n" // R 16 bytes -> 8 shorts. 203541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 203641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more RGBA pixels. 203741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 203841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 RGBA pixels. 203941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpadal.u8 q0, q5 \n" // B 16 bytes -> 8 shorts. 204041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpadal.u8 q1, q6 \n" // G 16 bytes -> 8 shorts. 204141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpadal.u8 q2, q7 \n" // R 16 bytes -> 8 shorts. 204241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 204341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vrshr.u16 q0, q0, #1 \n" // 2x average 204441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vrshr.u16 q1, q1, #1 \n" 204541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vrshr.u16 q2, q2, #1 \n" 204641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 204741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %4, %4, #16 \n" // 32 processed per loop. 204841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org RGBTOUV(q0, q1, q2) 204941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(2) 205041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. 205141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(3) 205241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. 205341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 205441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_rgba), // %0 205541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_stride_rgba), // %1 205641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_u), // %2 205741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_v), // %3 205841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(pix) // %4 205941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 206041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", 206141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 206241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 206341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 206441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_RGBATOUVROW_NEON 206541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 206641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_RGB24TOUVROW_NEON 206741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24, 206841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org uint8* dst_u, uint8* dst_v, int pix) { 206941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 207041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "add %1, %0, %1 \n" // src_stride + src_rgb24 207141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient 207241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient 207341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient 207441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient 207541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient 207641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q15, #0x8080 \n" // 128.5 207741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 207841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 207941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 208041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld3.8 {d0, d2, d4}, [%0]! \n" // load 8 RGB24 pixels. 208141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 208241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld3.8 {d1, d3, d5}, [%0]! \n" // load next 8 RGB24 pixels. 208341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts. 208441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. 208541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts. 208641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 208741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld3.8 {d8, d10, d12}, [%1]! \n" // load 8 more RGB24 pixels. 208841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 208941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld3.8 {d9, d11, d13}, [%1]! \n" // load last 8 RGB24 pixels. 209041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts. 209141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts. 209241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpadal.u8 q2, q6 \n" // R 16 bytes -> 8 shorts. 209341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 209441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vrshr.u16 q0, q0, #1 \n" // 2x average 209541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vrshr.u16 q1, q1, #1 \n" 209641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vrshr.u16 q2, q2, #1 \n" 209741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 209841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %4, %4, #16 \n" // 32 processed per loop. 209941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org RGBTOUV(q0, q1, q2) 210041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(2) 210141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. 210241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(3) 210341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. 210441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 210541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_rgb24), // %0 210641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_stride_rgb24), // %1 210741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_u), // %2 210841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_v), // %3 210941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(pix) // %4 211041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 211141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", 211241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 211341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 211441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 211541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_RGB24TOUVROW_NEON 211641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 211741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_RAWTOUVROW_NEON 211841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw, 211941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org uint8* dst_u, uint8* dst_v, int pix) { 212041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 212141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "add %1, %0, %1 \n" // src_stride + src_raw 212241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient 212341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient 212441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient 212541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient 212641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient 212741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q15, #0x8080 \n" // 128.5 212841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 212941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 213041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 213141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld3.8 {d0, d2, d4}, [%0]! \n" // load 8 RAW pixels. 213241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 213341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld3.8 {d1, d3, d5}, [%0]! \n" // load next 8 RAW pixels. 213441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 q2, q2 \n" // B 16 bytes -> 8 shorts. 213541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. 213641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 q0, q0 \n" // R 16 bytes -> 8 shorts. 213741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 213841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld3.8 {d8, d10, d12}, [%1]! \n" // load 8 more RAW pixels. 213941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 214041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld3.8 {d9, d11, d13}, [%1]! \n" // load last 8 RAW pixels. 214141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpadal.u8 q2, q6 \n" // B 16 bytes -> 8 shorts. 214241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts. 214341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpadal.u8 q0, q4 \n" // R 16 bytes -> 8 shorts. 214441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 214541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vrshr.u16 q0, q0, #1 \n" // 2x average 214641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vrshr.u16 q1, q1, #1 \n" 214741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vrshr.u16 q2, q2, #1 \n" 214841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 214941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %4, %4, #16 \n" // 32 processed per loop. 215041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org RGBTOUV(q2, q1, q0) 215141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(2) 215241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. 215341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(3) 215441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. 215541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 215641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_raw), // %0 215741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_stride_raw), // %1 215841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_u), // %2 215941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_v), // %3 216041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(pix) // %4 216141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 216241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", 216341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 216441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 216541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 216641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_RAWTOUVROW_NEON 216741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 216841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// 16x2 pixels -> 8x1. pix is number of argb pixels. e.g. 16. 216941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_RGB565TOUVROW_NEON 217041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565, 217141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org uint8* dst_u, uint8* dst_v, int pix) { 217241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 217341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "add %1, %0, %1 \n" // src_stride + src_argb 217441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient 217541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient 217641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient 217741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient 217841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient 217941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q15, #0x8080 \n" // 128.5 218041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 218141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 218241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 218341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {q0}, [%0]! \n" // load 8 RGB565 pixels. 218441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org RGB565TOARGB 218541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 d8, d0 \n" // B 8 bytes -> 4 shorts. 218641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 d10, d1 \n" // G 8 bytes -> 4 shorts. 218741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 d12, d2 \n" // R 8 bytes -> 4 shorts. 218841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 218941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {q0}, [%0]! \n" // next 8 RGB565 pixels. 219041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org RGB565TOARGB 219141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 d9, d0 \n" // B 8 bytes -> 4 shorts. 219241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 d11, d1 \n" // G 8 bytes -> 4 shorts. 219341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 d13, d2 \n" // R 8 bytes -> 4 shorts. 219441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 219541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 219641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {q0}, [%1]! \n" // load 8 RGB565 pixels. 219741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org RGB565TOARGB 219841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpadal.u8 d8, d0 \n" // B 8 bytes -> 4 shorts. 219941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpadal.u8 d10, d1 \n" // G 8 bytes -> 4 shorts. 220041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpadal.u8 d12, d2 \n" // R 8 bytes -> 4 shorts. 220141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 220241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {q0}, [%1]! \n" // next 8 RGB565 pixels. 220341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org RGB565TOARGB 220441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpadal.u8 d9, d0 \n" // B 8 bytes -> 4 shorts. 220541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpadal.u8 d11, d1 \n" // G 8 bytes -> 4 shorts. 220641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpadal.u8 d13, d2 \n" // R 8 bytes -> 4 shorts. 220741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 220841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vrshr.u16 q4, q4, #1 \n" // 2x average 220941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vrshr.u16 q5, q5, #1 \n" 221041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vrshr.u16 q6, q6, #1 \n" 221141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 221241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %4, %4, #16 \n" // 16 processed per loop. 221341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmul.s16 q8, q4, q10 \n" // B 221441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmls.s16 q8, q5, q11 \n" // G 221541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmls.s16 q8, q6, q12 \n" // R 221641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned 221741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmul.s16 q9, q6, q10 \n" // R 221841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmls.s16 q9, q5, q14 \n" // G 221941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmls.s16 q9, q4, q13 \n" // B 222041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned 222141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U 222241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V 222341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(2) 222441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. 222541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(3) 222641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. 222741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 222841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_rgb565), // %0 222941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_stride_rgb565), // %1 223041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_u), // %2 223141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_v), // %3 223241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(pix) // %4 223341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 223441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", 223541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 223641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 223741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 223841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_RGB565TOUVROW_NEON 223941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 224041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// 16x2 pixels -> 8x1. pix is number of argb pixels. e.g. 16. 224141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGB1555TOUVROW_NEON 224241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555, 224341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org uint8* dst_u, uint8* dst_v, int pix) { 224441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 224541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "add %1, %0, %1 \n" // src_stride + src_argb 224641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient 224741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient 224841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient 224941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient 225041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient 225141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q15, #0x8080 \n" // 128.5 225241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 225341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 225441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 225541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {q0}, [%0]! \n" // load 8 ARGB1555 pixels. 225641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org RGB555TOARGB 225741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 d8, d0 \n" // B 8 bytes -> 4 shorts. 225841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 d10, d1 \n" // G 8 bytes -> 4 shorts. 225941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 d12, d2 \n" // R 8 bytes -> 4 shorts. 226041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 226141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {q0}, [%0]! \n" // next 8 ARGB1555 pixels. 226241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org RGB555TOARGB 226341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 d9, d0 \n" // B 8 bytes -> 4 shorts. 226441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 d11, d1 \n" // G 8 bytes -> 4 shorts. 226541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 d13, d2 \n" // R 8 bytes -> 4 shorts. 226641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 226741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 226841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {q0}, [%1]! \n" // load 8 ARGB1555 pixels. 226941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org RGB555TOARGB 227041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpadal.u8 d8, d0 \n" // B 8 bytes -> 4 shorts. 227141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpadal.u8 d10, d1 \n" // G 8 bytes -> 4 shorts. 227241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpadal.u8 d12, d2 \n" // R 8 bytes -> 4 shorts. 227341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 227441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {q0}, [%1]! \n" // next 8 ARGB1555 pixels. 227541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org RGB555TOARGB 227641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpadal.u8 d9, d0 \n" // B 8 bytes -> 4 shorts. 227741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpadal.u8 d11, d1 \n" // G 8 bytes -> 4 shorts. 227841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpadal.u8 d13, d2 \n" // R 8 bytes -> 4 shorts. 227941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 228041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vrshr.u16 q4, q4, #1 \n" // 2x average 228141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vrshr.u16 q5, q5, #1 \n" 228241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vrshr.u16 q6, q6, #1 \n" 228341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 228441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %4, %4, #16 \n" // 16 processed per loop. 228541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmul.s16 q8, q4, q10 \n" // B 228641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmls.s16 q8, q5, q11 \n" // G 228741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmls.s16 q8, q6, q12 \n" // R 228841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned 228941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmul.s16 q9, q6, q10 \n" // R 229041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmls.s16 q9, q5, q14 \n" // G 229141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmls.s16 q9, q4, q13 \n" // B 229241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned 229341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U 229441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V 229541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(2) 229641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. 229741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(3) 229841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. 229941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 230041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_argb1555), // %0 230141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_stride_argb1555), // %1 230241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_u), // %2 230341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_v), // %3 230441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(pix) // %4 230541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 230641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", 230741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 230841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 230941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 231041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_ARGB1555TOUVROW_NEON 231141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 231241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// 16x2 pixels -> 8x1. pix is number of argb pixels. e.g. 16. 231341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGB4444TOUVROW_NEON 231441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444, 231541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org uint8* dst_u, uint8* dst_v, int pix) { 231641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 231741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "add %1, %0, %1 \n" // src_stride + src_argb 231841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient 231941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient 232041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient 232141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient 232241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient 232341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u16 q15, #0x8080 \n" // 128.5 232441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 232541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 232641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 232741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {q0}, [%0]! \n" // load 8 ARGB4444 pixels. 232841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ARGB4444TOARGB 232941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 d8, d0 \n" // B 8 bytes -> 4 shorts. 233041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 d10, d1 \n" // G 8 bytes -> 4 shorts. 233141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 d12, d2 \n" // R 8 bytes -> 4 shorts. 233241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 233341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {q0}, [%0]! \n" // next 8 ARGB4444 pixels. 233441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ARGB4444TOARGB 233541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 d9, d0 \n" // B 8 bytes -> 4 shorts. 233641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 d11, d1 \n" // G 8 bytes -> 4 shorts. 233741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpaddl.u8 d13, d2 \n" // R 8 bytes -> 4 shorts. 233841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 233941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 234041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {q0}, [%1]! \n" // load 8 ARGB4444 pixels. 234141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ARGB4444TOARGB 234241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpadal.u8 d8, d0 \n" // B 8 bytes -> 4 shorts. 234341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpadal.u8 d10, d1 \n" // G 8 bytes -> 4 shorts. 234441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpadal.u8 d12, d2 \n" // R 8 bytes -> 4 shorts. 234541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 234641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {q0}, [%1]! \n" // next 8 ARGB4444 pixels. 234741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ARGB4444TOARGB 234841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpadal.u8 d9, d0 \n" // B 8 bytes -> 4 shorts. 234941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpadal.u8 d11, d1 \n" // G 8 bytes -> 4 shorts. 235041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vpadal.u8 d13, d2 \n" // R 8 bytes -> 4 shorts. 235141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 235241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vrshr.u16 q4, q4, #1 \n" // 2x average 235341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vrshr.u16 q5, q5, #1 \n" 235441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vrshr.u16 q6, q6, #1 \n" 235541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 235641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %4, %4, #16 \n" // 16 processed per loop. 235741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmul.s16 q8, q4, q10 \n" // B 235841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmls.s16 q8, q5, q11 \n" // G 235941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmls.s16 q8, q6, q12 \n" // R 236041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned 236141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmul.s16 q9, q6, q10 \n" // R 236241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmls.s16 q9, q5, q14 \n" // G 236341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmls.s16 q9, q4, q13 \n" // B 236441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned 236541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U 236641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V 236741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(2) 236841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. 236941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(3) 237041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. 237141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 237241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_argb4444), // %0 237341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_stride_argb4444), // %1 237441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_u), // %2 237541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_v), // %3 237641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(pix) // %4 237741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 237841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", 237941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 238041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 238141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 238241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_ARGB4444TOUVROW_NEON 238341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 238441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_RGB565TOYROW_NEON 238541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int pix) { 238641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 238741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient 238841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient 238941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient 239041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d27, #16 \n" // Add 16 constant 239141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 239241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 239341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 239441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {q0}, [%0]! \n" // load 8 RGB565 pixels. 239541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %2, %2, #8 \n" // 8 processed per loop. 239641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org RGB565TOARGB 239741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmull.u8 q2, d0, d24 \n" // B 239841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmlal.u8 q2, d1, d25 \n" // G 239941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmlal.u8 q2, d2, d26 \n" // R 240041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y 240141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqadd.u8 d0, d27 \n" 240241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 240341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. 240441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 240541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_rgb565), // %0 240641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_y), // %1 240741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(pix) // %2 240841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 240941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13" 241041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 241141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 241241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_RGB565TOYROW_NEON 241341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 241441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGB1555TOYROW_NEON 241541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int pix) { 241641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 241741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient 241841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient 241941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient 242041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d27, #16 \n" // Add 16 constant 242141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 242241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 242341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 242441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {q0}, [%0]! \n" // load 8 ARGB1555 pixels. 242541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %2, %2, #8 \n" // 8 processed per loop. 242641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ARGB1555TOARGB 242741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmull.u8 q2, d0, d24 \n" // B 242841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmlal.u8 q2, d1, d25 \n" // G 242941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmlal.u8 q2, d2, d26 \n" // R 243041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y 243141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqadd.u8 d0, d27 \n" 243241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 243341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. 243441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 243541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_argb1555), // %0 243641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_y), // %1 243741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(pix) // %2 243841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 243941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13" 244041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 244141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 244241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_ARGB1555TOYROW_NEON 244341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 244441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGB4444TOYROW_NEON 244541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int pix) { 244641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 244741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient 244841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient 244941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient 245041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d27, #16 \n" // Add 16 constant 245141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 245241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 245341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 245441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {q0}, [%0]! \n" // load 8 ARGB4444 pixels. 245541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %2, %2, #8 \n" // 8 processed per loop. 245641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ARGB4444TOARGB 245741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmull.u8 q2, d0, d24 \n" // B 245841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmlal.u8 q2, d1, d25 \n" // G 245941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmlal.u8 q2, d2, d26 \n" // R 246041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y 246141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqadd.u8 d0, d27 \n" 246241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 246341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. 246441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 246541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_argb4444), // %0 246641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_y), // %1 246741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(pix) // %2 246841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 246941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13" 247041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 247141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 247241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_ARGB4444TOYROW_NEON 247341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 247441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_BGRATOYROW_NEON 247541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int pix) { 247641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 247741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d4, #33 \n" // R * 0.2578 coefficient 247841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient 247941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d6, #13 \n" // B * 0.1016 coefficient 248041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d7, #16 \n" // Add 16 constant 248141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 248241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 248341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 248441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of BGRA. 248541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %2, %2, #8 \n" // 8 processed per loop. 248641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmull.u8 q8, d1, d4 \n" // R 248741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmlal.u8 q8, d2, d5 \n" // G 248841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmlal.u8 q8, d3, d6 \n" // B 248941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y 249041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqadd.u8 d0, d7 \n" 249141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 249241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. 249341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 249441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_bgra), // %0 249541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_y), // %1 249641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(pix) // %2 249741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 249841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8" 249941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 250041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 250141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_BGRATOYROW_NEON 250241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 250341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ABGRTOYROW_NEON 250441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int pix) { 250541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 250641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d4, #33 \n" // R * 0.2578 coefficient 250741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient 250841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d6, #13 \n" // B * 0.1016 coefficient 250941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d7, #16 \n" // Add 16 constant 251041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 251141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 251241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 251341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ABGR. 251441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %2, %2, #8 \n" // 8 processed per loop. 251541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmull.u8 q8, d0, d4 \n" // R 251641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmlal.u8 q8, d1, d5 \n" // G 251741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmlal.u8 q8, d2, d6 \n" // B 251841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y 251941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqadd.u8 d0, d7 \n" 252041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 252141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. 252241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 252341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_abgr), // %0 252441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_y), // %1 252541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(pix) // %2 252641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 252741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8" 252841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 252941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 253041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_ABGRTOYROW_NEON 253141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 253241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_RGBATOYROW_NEON 253341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int pix) { 253441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 253541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d4, #13 \n" // B * 0.1016 coefficient 253641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient 253741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d6, #33 \n" // R * 0.2578 coefficient 253841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d7, #16 \n" // Add 16 constant 253941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 254041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 254141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 254241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of RGBA. 254341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %2, %2, #8 \n" // 8 processed per loop. 254441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmull.u8 q8, d1, d4 \n" // B 254541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmlal.u8 q8, d2, d5 \n" // G 254641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmlal.u8 q8, d3, d6 \n" // R 254741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y 254841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqadd.u8 d0, d7 \n" 254941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 255041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. 255141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 255241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_rgba), // %0 255341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_y), // %1 255441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(pix) // %2 255541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 255641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8" 255741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 255841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 255941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_RGBATOYROW_NEON 256041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 256141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_RGB24TOYROW_NEON 256241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int pix) { 256341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 256441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d4, #13 \n" // B * 0.1016 coefficient 256541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient 256641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d6, #33 \n" // R * 0.2578 coefficient 256741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d7, #16 \n" // Add 16 constant 256841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 256941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 257041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 257141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld3.8 {d0, d1, d2}, [%0]! \n" // load 8 pixels of RGB24. 257241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %2, %2, #8 \n" // 8 processed per loop. 257341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmull.u8 q8, d0, d4 \n" // B 257441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmlal.u8 q8, d1, d5 \n" // G 257541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmlal.u8 q8, d2, d6 \n" // R 257641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y 257741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqadd.u8 d0, d7 \n" 257841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 257941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. 258041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 258141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_rgb24), // %0 258241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_y), // %1 258341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(pix) // %2 258441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 258541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8" 258641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 258741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 258841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_RGB24TOYROW_NEON 258941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 259041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_RAWTOYROW_NEON 259141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int pix) { 259241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 259341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d4, #33 \n" // R * 0.2578 coefficient 259441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient 259541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d6, #13 \n" // B * 0.1016 coefficient 259641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d7, #16 \n" // Add 16 constant 259741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 259841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 259941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 260041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld3.8 {d0, d1, d2}, [%0]! \n" // load 8 pixels of RAW. 260141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %2, %2, #8 \n" // 8 processed per loop. 260241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmull.u8 q8, d0, d4 \n" // B 260341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmlal.u8 q8, d1, d5 \n" // G 260441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmlal.u8 q8, d2, d6 \n" // R 260541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y 260641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqadd.u8 d0, d7 \n" 260741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 260841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. 260941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 261041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_raw), // %0 261141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_y), // %1 261241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(pix) // %2 261341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 261441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8" 261541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 261641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 261741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_RAWTOYROW_NEON 261841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 261941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Bilinear filter 16x2 -> 16x1 262041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_INTERPOLATEROW_NEON 262141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid InterpolateRow_NEON(uint8* dst_ptr, 262241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org const uint8* src_ptr, ptrdiff_t src_stride, 262341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org int dst_width, int source_y_fraction) { 262441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 262541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "cmp %4, #0 \n" 262641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "beq 100f \n" 262741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "add %2, %1 \n" 262841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "cmp %4, #64 \n" 262941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "beq 75f \n" 263041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "cmp %4, #128 \n" 263141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "beq 50f \n" 263241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "cmp %4, #192 \n" 263341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "beq 25f \n" 263441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 263541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vdup.8 d5, %4 \n" 263641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "rsb %4, #256 \n" 263741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vdup.8 d4, %4 \n" 263841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org // General purpose row blend. 263941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 264041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 264141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {q0}, [%1]! \n" 264241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(2) 264341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {q1}, [%2]! \n" 264441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %3, %3, #16 \n" 264541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmull.u8 q13, d0, d4 \n" 264641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmull.u8 q14, d1, d4 \n" 264741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmlal.u8 q13, d2, d5 \n" 264841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmlal.u8 q14, d3, d5 \n" 264941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vrshrn.u16 d0, q13, #8 \n" 265041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vrshrn.u16 d1, q14, #8 \n" 265141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 265241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {q0}, [%0]! \n" 265341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 265441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "b 99f \n" 265541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 265641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org // Blend 25 / 75. 265741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "25: \n" 265841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 265941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {q0}, [%1]! \n" 266041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(2) 266141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {q1}, [%2]! \n" 266241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %3, %3, #16 \n" 266341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vrhadd.u8 q0, q1 \n" 266441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vrhadd.u8 q0, q1 \n" 266541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 266641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {q0}, [%0]! \n" 266741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 25b \n" 266841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "b 99f \n" 266941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 267041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org // Blend 50 / 50. 267141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "50: \n" 267241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 267341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {q0}, [%1]! \n" 267441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(2) 267541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {q1}, [%2]! \n" 267641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %3, %3, #16 \n" 267741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vrhadd.u8 q0, q1 \n" 267841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 267941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {q0}, [%0]! \n" 268041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 50b \n" 268141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "b 99f \n" 268241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 268341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org // Blend 75 / 25. 268441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "75: \n" 268541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 268641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {q1}, [%1]! \n" 268741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(2) 268841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {q0}, [%2]! \n" 268941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %3, %3, #16 \n" 269041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vrhadd.u8 q0, q1 \n" 269141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vrhadd.u8 q0, q1 \n" 269241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 269341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {q0}, [%0]! \n" 269441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 75b \n" 269541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "b 99f \n" 269641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 269741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org // Blend 100 / 0 - Copy row unchanged. 269841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "100: \n" 269941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 270041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {q0}, [%1]! \n" 270141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %3, %3, #16 \n" 270241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 270341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst1.8 {q0}, [%0]! \n" 270441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 100b \n" 270541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 270641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "99: \n" 270741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(dst_ptr), // %0 270841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_ptr), // %1 270941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_stride), // %2 271041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_width), // %3 271141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(source_y_fraction) // %4 271241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 271341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q1", "d4", "d5", "q13", "q14" 271441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 271541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 271641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_INTERPOLATEROW_NEON 271741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 271841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr 271941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBBLENDROW_NEON 272041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBBlendRow_NEON(const uint8* src_argb0, const uint8* src_argb1, 272141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org uint8* dst_argb, int width) { 272241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 272341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %3, #8 \n" 272441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "blt 89f \n" 272541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org // Blend 8 pixels. 272641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "8: \n" 272741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 272841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ARGB0. 272941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 273041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load 8 pixels of ARGB1. 273141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %3, %3, #8 \n" // 8 processed per loop. 273241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmull.u8 q10, d4, d3 \n" // db * a 273341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmull.u8 q11, d5, d3 \n" // dg * a 273441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmull.u8 q12, d6, d3 \n" // dr * a 273541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqrshrn.u16 d20, q10, #8 \n" // db >>= 8 273641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqrshrn.u16 d21, q11, #8 \n" // dg >>= 8 273741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqrshrn.u16 d22, q12, #8 \n" // dr >>= 8 273841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqsub.u8 q2, q2, q10 \n" // dbg - dbg * a / 256 273941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqsub.u8 d6, d6, d22 \n" // dr - dr * a / 256 274041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqadd.u8 q0, q0, q2 \n" // + sbg 274141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqadd.u8 d2, d2, d6 \n" // + sr 274241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d3, #255 \n" // a = 255 274341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(2) 274441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 pixels of ARGB. 274541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bge 8b \n" 274641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 274741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "89: \n" 274841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "adds %3, #8-1 \n" 274941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "blt 99f \n" 275041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 275141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org // Blend 1 pixels. 275241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 275341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 275441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [%0]! \n" // load 1 pixel ARGB0. 275541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 275641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld4.8 {d4[0],d5[0],d6[0],d7[0]}, [%1]! \n" // load 1 pixel ARGB1. 275741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %3, %3, #1 \n" // 1 processed per loop. 275841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmull.u8 q10, d4, d3 \n" // db * a 275941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmull.u8 q11, d5, d3 \n" // dg * a 276041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmull.u8 q12, d6, d3 \n" // dr * a 276141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqrshrn.u16 d20, q10, #8 \n" // db >>= 8 276241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqrshrn.u16 d21, q11, #8 \n" // dg >>= 8 276341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqrshrn.u16 d22, q12, #8 \n" // dr >>= 8 276441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqsub.u8 q2, q2, q10 \n" // dbg - dbg * a / 256 276541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqsub.u8 d6, d6, d22 \n" // dr - dr * a / 256 276641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqadd.u8 q0, q0, q2 \n" // + sbg 276741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqadd.u8 d2, d2, d6 \n" // + sr 276841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d3, #255 \n" // a = 255 276941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(2) 277041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [%2]! \n" // store 1 pixel. 277141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bge 1b \n" 277241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 277341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "99: \n" 277441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 277541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_argb0), // %0 277641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_argb1), // %1 277741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_argb), // %2 277841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(width) // %3 277941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 278041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q1", "q2", "q3", "q10", "q11", "q12" 278141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 278241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 278341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_ARGBBLENDROW_NEON 278441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 278541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Attenuate 8 pixels at a time. 278641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBATTENUATEROW_NEON 278741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) { 278841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 278941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org // Attenuate 8 pixels. 279041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 279141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 279241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ARGB. 279341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %2, %2, #8 \n" // 8 processed per loop. 279441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmull.u8 q10, d0, d3 \n" // b * a 279541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmull.u8 q11, d1, d3 \n" // g * a 279641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmull.u8 q12, d2, d3 \n" // r * a 279741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqrshrn.u16 d0, q10, #8 \n" // b >>= 8 279841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqrshrn.u16 d1, q11, #8 \n" // g >>= 8 279941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqrshrn.u16 d2, q12, #8 \n" // r >>= 8 280041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 280141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB. 280241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 280341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_argb), // %0 280441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_argb), // %1 280541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(width) // %2 280641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 280741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q1", "q10", "q11", "q12" 280841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 280941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 281041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_ARGBATTENUATEROW_NEON 281141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 281241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Quantize 8 ARGB pixels (32 bytes). 281341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// dst = (dst * scale >> 16) * interval_size + interval_offset; 281441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBQUANTIZEROW_NEON 281541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBQuantizeRow_NEON(uint8* dst_argb, int scale, int interval_size, 281641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org int interval_offset, int width) { 281741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 281841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vdup.u16 q8, %2 \n" 281941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vshr.u16 q8, q8, #1 \n" // scale >>= 1 282041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vdup.u16 q9, %3 \n" // interval multiply. 282141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vdup.u16 q10, %4 \n" // interval add 282241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 282341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org // 8 pixel loop. 282441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 282541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 282641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 282741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld4.8 {d0, d2, d4, d6}, [%0] \n" // load 8 pixels of ARGB. 282841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %1, %1, #8 \n" // 8 processed per loop. 282941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmovl.u8 q0, d0 \n" // b (0 .. 255) 283041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmovl.u8 q1, d2 \n" 283141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmovl.u8 q2, d4 \n" 283241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqdmulh.s16 q0, q0, q8 \n" // b * scale 283341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqdmulh.s16 q1, q1, q8 \n" // g 283441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqdmulh.s16 q2, q2, q8 \n" // r 283541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmul.u16 q0, q0, q9 \n" // b * interval_size 283641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmul.u16 q1, q1, q9 \n" // g 283741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmul.u16 q2, q2, q9 \n" // r 283841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vadd.u16 q0, q0, q10 \n" // b + interval_offset 283941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vadd.u16 q1, q1, q10 \n" // g 284041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vadd.u16 q2, q2, q10 \n" // r 284141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqmovn.u16 d0, q0 \n" 284241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqmovn.u16 d2, q1 \n" 284341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqmovn.u16 d4, q2 \n" 284441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 284541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst4.8 {d0, d2, d4, d6}, [%0]! \n" // store 8 pixels of ARGB. 284641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 284741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(dst_argb), // %0 284841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(width) // %1 284941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "r"(scale), // %2 285041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "r"(interval_size), // %3 285141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "r"(interval_offset) // %4 285241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9", "q10" 285341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 285441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 285541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_ARGBQUANTIZEROW_NEON 285641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 285741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Shade 8 pixels at a time by specified value. 285841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// NOTE vqrdmulh.s16 q10, q10, d0[0] must use a scaler register from 0 to 8. 285941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Rounding in vqrdmulh does +1 to high if high bit of low s16 is set. 286041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBSHADEROW_NEON 286141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBShadeRow_NEON(const uint8* src_argb, uint8* dst_argb, int width, 286241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org uint32 value) { 286341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 286441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vdup.u32 q0, %3 \n" // duplicate scale value. 286541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vzip.u8 d0, d1 \n" // d0 aarrggbb. 286641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vshr.u16 q0, q0, #1 \n" // scale / 2. 286741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 286841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org // 8 pixel loop. 286941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 287041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 287141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 287241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld4.8 {d20, d22, d24, d26}, [%0]! \n" // load 8 pixels of ARGB. 287341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %2, %2, #8 \n" // 8 processed per loop. 287441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmovl.u8 q10, d20 \n" // b (0 .. 255) 287541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmovl.u8 q11, d22 \n" 287641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmovl.u8 q12, d24 \n" 287741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmovl.u8 q13, d26 \n" 287841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqrdmulh.s16 q10, q10, d0[0] \n" // b * scale * 2 287941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqrdmulh.s16 q11, q11, d0[1] \n" // g 288041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqrdmulh.s16 q12, q12, d0[2] \n" // r 288141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqrdmulh.s16 q13, q13, d0[3] \n" // a 288241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqmovn.u16 d20, q10 \n" 288341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqmovn.u16 d22, q11 \n" 288441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqmovn.u16 d24, q12 \n" 288541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqmovn.u16 d26, q13 \n" 288641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 288741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst4.8 {d20, d22, d24, d26}, [%1]! \n" // store 8 pixels of ARGB. 288841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 288941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_argb), // %0 289041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_argb), // %1 289141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(width) // %2 289241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "r"(value) // %3 289341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q10", "q11", "q12", "q13" 289441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 289541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 289641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_ARGBSHADEROW_NEON 289741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 289841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels 289941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Similar to ARGBToYJ but stores ARGB. 290041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// C code is (15 * b + 75 * g + 38 * r + 64) >> 7; 290141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBGRAYROW_NEON 290241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) { 290341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 290441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d24, #15 \n" // B * 0.11400 coefficient 290541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d25, #75 \n" // G * 0.58700 coefficient 290641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d26, #38 \n" // R * 0.29900 coefficient 290741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 290841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 290941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 291041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. 291141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %2, %2, #8 \n" // 8 processed per loop. 291241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmull.u8 q2, d0, d24 \n" // B 291341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmlal.u8 q2, d1, d25 \n" // G 291441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmlal.u8 q2, d2, d26 \n" // R 291541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqrshrun.s16 d0, q2, #7 \n" // 15 bit to 8 bit B 291641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov d1, d0 \n" // G 291741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov d2, d0 \n" // R 291841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 291941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 ARGB pixels. 292041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 292141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_argb), // %0 292241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_argb), // %1 292341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(width) // %2 292441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 292541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q1", "q2", "q12", "q13" 292641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 292741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 292841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_ARGBGRAYROW_NEON 292941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 293041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels. 293141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// b = (r * 35 + g * 68 + b * 17) >> 7 293241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// g = (r * 45 + g * 88 + b * 22) >> 7 293341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// r = (r * 50 + g * 98 + b * 24) >> 7 293441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 293541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBSEPIAROW_NEON 293641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBSepiaRow_NEON(uint8* dst_argb, int width) { 293741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 293841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d20, #17 \n" // BB coefficient 293941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d21, #68 \n" // BG coefficient 294041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d22, #35 \n" // BR coefficient 294141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d24, #22 \n" // GB coefficient 294241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d25, #88 \n" // GG coefficient 294341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d26, #45 \n" // GR coefficient 294441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d28, #24 \n" // BB coefficient 294541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d29, #98 \n" // BG coefficient 294641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmov.u8 d30, #50 \n" // BR coefficient 294741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 294841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 294941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 295041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld4.8 {d0, d1, d2, d3}, [%0] \n" // load 8 ARGB pixels. 295141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %1, %1, #8 \n" // 8 processed per loop. 295241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmull.u8 q2, d0, d20 \n" // B to Sepia B 295341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmlal.u8 q2, d1, d21 \n" // G 295441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmlal.u8 q2, d2, d22 \n" // R 295541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmull.u8 q3, d0, d24 \n" // B to Sepia G 295641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmlal.u8 q3, d1, d25 \n" // G 295741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmlal.u8 q3, d2, d26 \n" // R 295841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmull.u8 q8, d0, d28 \n" // B to Sepia R 295941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmlal.u8 q8, d1, d29 \n" // G 296041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmlal.u8 q8, d2, d30 \n" // R 296141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqshrn.u16 d0, q2, #7 \n" // 16 bit to 8 bit B 296241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqshrn.u16 d1, q3, #7 \n" // 16 bit to 8 bit G 296341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqshrn.u16 d2, q8, #7 \n" // 16 bit to 8 bit R 296441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 296541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst4.8 {d0, d1, d2, d3}, [%0]! \n" // store 8 ARGB pixels. 296641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 296741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(dst_argb), // %0 296841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(width) // %1 296941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 297041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q1", "q2", "q3", 297141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "q10", "q11", "q12", "q13", "q14", "q15" 297241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 297341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 297441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_ARGBSEPIAROW_NEON 297541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 297641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Tranform 8 ARGB pixels (32 bytes) with color matrix. 297741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// TODO(fbarchard): Was same as Sepia except matrix is provided. This function 297841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// needs to saturate. Consider doing a non-saturating version. 297941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBCOLORMATRIXROW_NEON 298041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb, 298141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org const int8* matrix_argb, int width) { 298241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 298341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(3) 298441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld1.8 {q2}, [%3] \n" // load 3 ARGB vectors. 298541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmovl.s8 q0, d4 \n" // B,G coefficients s16. 298641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmovl.s8 q1, d5 \n" // R,A coefficients s16. 298741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 298841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 298941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 299041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 299141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vld4.8 {d16, d18, d20, d22}, [%0]! \n" // load 8 ARGB pixels. 299241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %2, %2, #8 \n" // 8 processed per loop. 299341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmovl.u8 q8, d16 \n" // b (0 .. 255) 16 bit 299441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmovl.u8 q9, d18 \n" // g 299541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmovl.u8 q10, d20 \n" // r 299641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmovl.u8 q15, d22 \n" // a 299741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmul.s16 q12, q8, d0[0] \n" // B = B * Matrix B 299841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmul.s16 q13, q8, d1[0] \n" // G = B * Matrix G 299941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmul.s16 q14, q8, d2[0] \n" // R = B * Matrix R 300041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmul.s16 q15, q8, d3[0] \n" // A = B * Matrix A 300141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmul.s16 q4, q9, d0[1] \n" // B += G * Matrix B 300241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmul.s16 q5, q9, d1[1] \n" // G += G * Matrix G 300341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmul.s16 q6, q9, d2[1] \n" // R += G * Matrix R 300441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmul.s16 q7, q9, d3[1] \n" // A += G * Matrix A 300541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqadd.s16 q12, q12, q4 \n" // Accumulate B 300641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqadd.s16 q13, q13, q5 \n" // Accumulate G 300741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqadd.s16 q14, q14, q6 \n" // Accumulate R 300841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqadd.s16 q15, q15, q7 \n" // Accumulate A 300941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmul.s16 q4, q10, d0[2] \n" // B += R * Matrix B 301041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmul.s16 q5, q10, d1[2] \n" // G += R * Matrix G 301141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmul.s16 q6, q10, d2[2] \n" // R += R * Matrix R 301241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmul.s16 q7, q10, d3[2] \n" // A += R * Matrix A 301341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqadd.s16 q12, q12, q4 \n" // Accumulate B 301441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqadd.s16 q13, q13, q5 \n" // Accumulate G 301541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqadd.s16 q14, q14, q6 \n" // Accumulate R 301641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqadd.s16 q15, q15, q7 \n" // Accumulate A 301741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmul.s16 q4, q15, d0[3] \n" // B += A * Matrix B 301841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmul.s16 q5, q15, d1[3] \n" // G += A * Matrix G 301941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmul.s16 q6, q15, d2[3] \n" // R += A * Matrix R 302041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vmul.s16 q7, q15, d3[3] \n" // A += A * Matrix A 302141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqadd.s16 q12, q12, q4 \n" // Accumulate B 302241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqadd.s16 q13, q13, q5 \n" // Accumulate G 302341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqadd.s16 q14, q14, q6 \n" // Accumulate R 302441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqadd.s16 q15, q15, q7 \n" // Accumulate A 302541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqshrun.s16 d16, q12, #6 \n" // 16 bit to 8 bit B 302641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqshrun.s16 d18, q13, #6 \n" // 16 bit to 8 bit G 302741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqshrun.s16 d20, q14, #6 \n" // 16 bit to 8 bit R 302841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vqshrun.s16 d22, q15, #6 \n" // 16 bit to 8 bit A 302941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 303041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "vst4.8 {d16, d18, d20, d22}, [%1]! \n" // store 8 ARGB pixels. 303141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 303241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_argb), // %0 303341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_argb), // %1 303441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(width) // %2 303541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "r"(matrix_argb) // %3 303641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", 303741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "q10", "q11", "q12", "q13", "q14", "q15" 303841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 303941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 304041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_ARGBCOLORMATRIXROW_NEON 304141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 304241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// TODO(fbarchard): fix vqshrun in ARGBMultiplyRow_NEON and reenable. 304341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Multiply 2 rows of ARGB pixels together, 8 pixels at a time. 304441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBMULTIPLYROW_NEON 304541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBMultiplyRow_NEON(const uint8* src_argb0, const uint8* src_argb1, 304641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org uint8* dst_argb, int width) { 304741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 304841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org // 8 pixel loop. 304941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 305041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 305141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 3052d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 8 ARGB pixels. 305341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 3054d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld4 {v4.8b-v7.8b}, [%1], #32 \n" // load 8 more ARGB pixels. 305541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %3, %3, #8 \n" // 8 processed per loop. 3056d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "umull v0.8h, v0.8b, v4.8b \n" // multiply B 3057d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "umull v1.8h, v1.8b, v5.8b \n" // multiply G 3058d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "umull v2.8h, v2.8b, v6.8b \n" // multiply R 3059d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "umull v3.8h, v3.8b, v7.8b \n" // multiply A 3060d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "rshrn v0.8b, v0.8h, #8 \n" // 16 bit to 8 bit B 3061d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "rshrn v1.8b, v1.8h, #8 \n" // 16 bit to 8 bit G 3062d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "rshrn v2.8b, v2.8h, #8 \n" // 16 bit to 8 bit R 3063d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "rshrn v3.8b, v3.8h, #8 \n" // 16 bit to 8 bit A 306441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(2) 3065d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "st4 {v0.8b-v3.8b}, [%2], #32 \n" // store 8 ARGB pixels. 306641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 306741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 306841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_argb0), // %0 306941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_argb1), // %1 307041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_argb), // %2 307141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(width) // %3 307241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 3073d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7" 307441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 307541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 307641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_ARGBMULTIPLYROW_NEON 307741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 307841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Add 2 rows of ARGB pixels together, 8 pixels at a time. 307941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBADDROW_NEON 308041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBAddRow_NEON(const uint8* src_argb0, const uint8* src_argb1, 308141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org uint8* dst_argb, int width) { 308241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 308341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org // 8 pixel loop. 308441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 308541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 308641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 3087d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 8 ARGB pixels. 308841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 3089d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld4 {v4.8b-v7.8b}, [%1], #32 \n" // load 8 more ARGB pixels. 309041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %3, %3, #8 \n" // 8 processed per loop. 3091d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "uqadd v0.8b, v0.8b, v4.8b \n" 3092d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "uqadd v1.8b, v1.8b, v5.8b \n" 3093d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "uqadd v2.8b, v2.8b, v6.8b \n" 3094d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "uqadd v3.8b, v3.8b, v7.8b \n" 309541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(2) 3096d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "st4 {v0.8b-v3.8b}, [%2], #32 \n" // store 8 ARGB pixels. 309741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 309841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 309941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_argb0), // %0 310041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_argb1), // %1 310141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_argb), // %2 310241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(width) // %3 310341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 3104d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7" 310541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 310641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 310741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_ARGBADDROW_NEON 310841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 310941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Subtract 2 rows of ARGB pixels, 8 pixels at a time. 311041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_ARGBSUBTRACTROW_NEON 311141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid ARGBSubtractRow_NEON(const uint8* src_argb0, const uint8* src_argb1, 311241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org uint8* dst_argb, int width) { 311341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 311441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org // 8 pixel loop. 311541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 311641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 311741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 3118d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld4 {v0.8b-v3.8b}, [%0], #32 \n" // load 8 ARGB pixels. 311941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 3120d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld4 {v4.8b-v7.8b}, [%1], #32 \n" // load 8 more ARGB pixels. 312141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %3, %3, #8 \n" // 8 processed per loop. 3122d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "uqsub v0.8b, v0.8b, v4.8b \n" 3123d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "uqsub v1.8b, v1.8b, v5.8b \n" 3124d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "uqsub v2.8b, v2.8b, v6.8b \n" 3125d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "uqsub v3.8b, v3.8b, v7.8b \n" 312641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(2) 3127d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "st4 {v0.8b-v3.8b}, [%2], #32 \n" // store 8 ARGB pixels. 312841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 312941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 313041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_argb0), // %0 313141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_argb1), // %1 313241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_argb), // %2 313341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(width) // %3 313441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 3135d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7" 313641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 313741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 313841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_ARGBSUBTRACTROW_NEON 313941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 314041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Adds Sobel X and Sobel Y and stores Sobel into ARGB. 314141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// A = 255 314241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// R = Sobel 314341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// G = Sobel 314441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// B = Sobel 314541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_SOBELROW_NEON 314641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, 314741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org uint8* dst_argb, int width) { 314841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 3149d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "movi v3.8b, #255 \n" // alpha 315041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org // 8 pixel loop. 315141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 315241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 315341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 3154d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld1 {v0.8b}, [%0], #8 \n" // load 8 sobelx. 315541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 3156d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld1 {v1.8b}, [%1], #8 \n" // load 8 sobely. 315741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %3, %3, #8 \n" // 8 processed per loop. 3158d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "uqadd v0.8b, v0.8b, v1.8b \n" // add 3159d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "mov v1.8b, v0.8b \n" 3160d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "mov v2.8b, v0.8b \n" 316141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(2) 3162d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "st4 {v0.8b-v3.8b}, [%2], #32 \n" // store 8 ARGB pixels. 316341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 316441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_sobelx), // %0 316541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_sobely), // %1 316641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_argb), // %2 316741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(width) // %3 316841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 3169d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org : "cc", "memory", "v0", "v1", "v2", "v3" 317041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 317141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 317241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_SOBELROW_NEON 317341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 317441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Adds Sobel X and Sobel Y and stores Sobel into plane. 317541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_SOBELTOPLANEROW_NEON 317641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, 317741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org uint8* dst_y, int width) { 317841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 317941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org // 16 pixel loop. 318041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 318141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 318241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 3183d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld1 {v0.16b}, [%0], #16 \n" // load 16 sobelx. 318441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 3185d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld1 {v1.16b}, [%1], #16 \n" // load 16 sobely. 318641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %3, %3, #16 \n" // 16 processed per loop. 3187d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "uqadd v0.16b, v0.16b, v1.16b \n" // add 318841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(2) 3189d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "st1 {v0.16b}, [%2], #16 \n" // store 16 pixels. 319041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 319141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_sobelx), // %0 319241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_sobely), // %1 319341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_y), // %2 319441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(width) // %3 319541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 3196d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org : "cc", "memory", "v0", "v1" 319741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 319841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 319941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_SOBELTOPLANEROW_NEON 320041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 320141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// Mixes Sobel X, Sobel Y and Sobel into ARGB. 320241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// A = 255 320341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// R = Sobel X 320441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// G = Sobel 320541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// B = Sobel Y 320641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_SOBELXYROW_NEON 320741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, 320841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org uint8* dst_argb, int width) { 320941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 3210d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "movi v3.8b, #255 \n" // alpha 321141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org // 8 pixel loop. 321241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 321341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 321441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 3215d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld1 {v2.8b}, [%0], #8 \n" // load 8 sobelx. 321641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 3217d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld1 {v0.8b}, [%1], #8 \n" // load 8 sobely. 321841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %3, %3, #8 \n" // 8 processed per loop. 3219d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "uqadd v1.8b, v0.8b, v2.8b \n" // add 322041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(2) 3221d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "st4 {v0.8b-v3.8b}, [%2], #32 \n" // store 8 ARGB pixels. 322241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 322341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_sobelx), // %0 322441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_sobely), // %1 322541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_argb), // %2 322641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(width) // %3 322741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : 3228d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org : "cc", "memory", "v0", "v1", "v2", "v3" 322941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 323041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 323141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_SOBELXYROW_NEON 323241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 323341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// SobelX as a matrix is 323441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// -1 0 1 323541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// -2 0 2 323641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// -1 0 1 323741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_SOBELXROW_NEON 323841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1, 323941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org const uint8* src_y2, uint8* dst_sobelx, int width) { 324041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 324141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 324241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 324341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 3244d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld1 {v0.8b}, [%0],%5 \n" // top 324541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 3246d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld1 {v1.8b}, [%0],%6 \n" 3247d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "usubl v0.8h, v0.8b, v1.8b \n" 324841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 3249d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld1 {v2.8b}, [%1],%5 \n" // center * 2 325041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 3251d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld1 {v3.8b}, [%1],%6 \n" 3252d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "usubl v1.8h, v2.8b, v3.8b \n" 3253d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "add v0.8h, v0.8h, v1.8h \n" 3254d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "add v0.8h, v0.8h, v1.8h \n" 325541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(2) 3256d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld1 {v2.8b}, [%2],%5 \n" // bottom 325741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(2) 3258d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld1 {v3.8b}, [%2],%6 \n" 325941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %4, %4, #8 \n" // 8 pixels 3260d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "usubl v1.8h, v2.8b, v3.8b \n" 3261d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "add v0.8h, v0.8h, v1.8h \n" 3262d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "abs v0.8h, v0.8h \n" 3263d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "uqxtn v0.8b, v0.8h \n" 326441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(3) 3265d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "st1 {v0.8b}, [%3], #8 \n" // store 8 sobelx 326641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 326741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_y0), // %0 326841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_y1), // %1 326941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_y2), // %2 327041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_sobelx), // %3 327141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(width) // %4 327241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "r"(2), // %5 327341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "r"(6) // %6 3274d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List 327541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 327641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 327741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_SOBELXROW_NEON 327841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 327941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// SobelY as a matrix is 328041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// -1 -2 -1 328141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// 0 0 0 328241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org// 1 2 1 328341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef HAS_SOBELYROW_NEON 328441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.orgvoid SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1, 328541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org uint8* dst_sobely, int width) { 328641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org asm volatile ( 328741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ".p2align 2 \n" 328841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "1: \n" 328941294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 3290d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld1 {v0.8b}, [%0],%4 \n" // left 329141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 3292d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld1 {v1.8b}, [%1],%4 \n" 3293d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "usubl v0.8h, v0.8b, v1.8b \n" 329441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 3295d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld1 {v2.8b}, [%0],%4 \n" // center * 2 329641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 3297d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld1 {v3.8b}, [%1],%4 \n" 3298d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "usubl v1.8h, v2.8b, v3.8b \n" 3299d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "add v0.8h, v0.8h, v1.8h \n" 3300d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "add v0.8h, v0.8h, v1.8h \n" 330141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(0) 3302d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld1 {v2.8b}, [%0],%5 \n" // right 330341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(1) 3304d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "ld1 {v3.8b}, [%1],%5 \n" 330541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "subs %3, %3, #8 \n" // 8 pixels 3306d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "usubl v1.8h, v2.8b, v3.8b \n" 3307d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "add v0.8h, v0.8h, v1.8h \n" 3308d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "abs v0.8h, v0.8h \n" 3309d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "uqxtn v0.8b, v0.8h \n" 331041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org MEMACCESS(2) 3311d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org "st1 {v0.8b}, [%2], #8 \n" // store 8 sobely 331241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "bgt 1b \n" 331341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "+r"(src_y0), // %0 331441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(src_y1), // %1 331541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(dst_sobely), // %2 331641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "+r"(width) // %3 331741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org : "r"(1), // %4 331841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org "r"(6) // %5 3319d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List 332041294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org ); 332141294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} 332241294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // HAS_SOBELYROW_NEON 332341294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) 332441294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org 332541294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#ifdef __cplusplus 332641294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} // extern "C" 332741294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org} // namespace libyuv 332841294d96d7dbf9bc215b09832a8336c5fb158f0bjohannkoenig@chromium.org#endif 3329