1ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian/* 2da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * Copyright 2014 The LibYuv Project Authors. All rights reserved. 3ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian * 4ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian * Use of this source code is governed by a BSD-style license 5ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian * that can be found in the LICENSE file in the root of the source 6ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian * tree. An additional intellectual property rights grant can be found 7ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian * in the file PATENTS. All contributing project authors may 8ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian * be found in the AUTHORS file in the root of the source tree. 9ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian */ 10ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 11ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#include "libyuv/row.h" 12ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 13ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#ifdef __cplusplus 14ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramaniannamespace libyuv { 15ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianextern "C" { 16ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#endif 17ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 18da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian// This module is for GCC Neon armv8 64 bit. 19ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) 20ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 21ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// Read 8 Y, 4 U and 4 V from 422 22ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#define READYUV422 \ 23ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) \ 24da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v0.8b}, [%0], #8 \n" \ 25ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) \ 26da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v1.s}[0], [%1], #4 \n" \ 27ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(2) \ 28da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v1.s}[1], [%2], #4 \n" 29ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 30ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// Read 8 Y, 2 U and 2 V from 422 31ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#define READYUV411 \ 32ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) \ 33da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v0.8b}, [%0], #8 \n" \ 34ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) \ 35da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v2.h}[0], [%1], #2 \n" \ 36ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(2) \ 37da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v2.h}[1], [%2], #2 \n" \ 38da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "zip1 v1.8b, v2.8b, v2.8b \n" 39ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 40ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// Read 8 Y, 8 U and 8 V from 444 41ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#define READYUV444 \ 42ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) \ 43da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v0.8b}, [%0], #8 \n" \ 44ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) \ 45da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v1.d}[0], [%1], #8 \n" \ 46ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(2) \ 47da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v1.d}[1], [%2], #8 \n" \ 48da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v1.8h, v1.16b \n" \ 49da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "rshrn v1.8b, v1.8h, #1 \n" 50ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 51ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// Read 8 Y, and set 4 U and 4 V to 128 52ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#define READYUV400 \ 53ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) \ 54da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v0.8b}, [%0], #8 \n" \ 55da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v1.8b , #128 \n" 56ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 57ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// Read 8 Y and 4 UV from NV12 58ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#define READNV12 \ 59ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) \ 60da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v0.8b}, [%0], #8 \n" \ 61ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) \ 62da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v2.8b}, [%1], #8 \n" \ 63da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uzp1 v1.8b, v2.8b, v2.8b \n" \ 64da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uzp2 v3.8b, v2.8b, v2.8b \n" \ 65da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ins v1.s[1], v3.s[0] \n" 66ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 67ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// Read 8 Y and 4 VU from NV21 68ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#define READNV21 \ 69ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) \ 70da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v0.8b}, [%0], #8 \n" \ 71ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) \ 72da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v2.8b}, [%1], #8 \n" \ 73da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uzp1 v3.8b, v2.8b, v2.8b \n" \ 74da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uzp2 v1.8b, v2.8b, v2.8b \n" \ 75da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ins v1.s[1], v3.s[0] \n" 76ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 77ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// Read 8 YUY2 78ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#define READYUY2 \ 79ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) \ 80da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld2 {v0.8b, v1.8b}, [%0], #16 \n" \ 81da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uzp2 v3.8b, v1.8b, v1.8b \n" \ 82da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uzp1 v1.8b, v1.8b, v1.8b \n" \ 83da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ins v1.s[1], v3.s[0] \n" 84ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 85ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// Read 8 UYVY 86ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#define READUYVY \ 87ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) \ 88da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld2 {v2.8b, v3.8b}, [%0], #16 \n" \ 89da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "orr v0.8b, v3.8b, v3.8b \n" \ 90da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uzp1 v1.8b, v2.8b, v2.8b \n" \ 91da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uzp2 v3.8b, v2.8b, v2.8b \n" \ 92da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ins v1.s[1], v3.s[0] \n" 93da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 947bc9febe8749e98a3812a0dc4380ceae75c29450Johann#define YUVTORGB_SETUP \ 95da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1r {v24.8h}, [%[kUVBiasBGR]], #2 \n" \ 96da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1r {v25.8h}, [%[kUVBiasBGR]], #2 \n" \ 97da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1r {v26.8h}, [%[kUVBiasBGR]] \n" \ 98da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1r {v31.4s}, [%[kYToRgb]] \n" \ 997bc9febe8749e98a3812a0dc4380ceae75c29450Johann "ld2 {v27.8h, v28.8h}, [%[kUVToRB]] \n" \ 1007bc9febe8749e98a3812a0dc4380ceae75c29450Johann "ld2 {v29.8h, v30.8h}, [%[kUVToG]] \n" 101da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 1027bc9febe8749e98a3812a0dc4380ceae75c29450Johann#define YUVTORGB(vR, vG, vB) \ 103da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uxtl v0.8h, v0.8b \n" /* Extract Y */ \ 104da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "shll v2.8h, v1.8b, #8 \n" /* Replicate UV */ \ 105da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ushll2 v3.4s, v0.8h, #0 \n" /* Y */ \ 106da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ushll v0.4s, v0.4h, #0 \n" \ 107da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mul v3.4s, v3.4s, v31.4s \n" \ 108da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mul v0.4s, v0.4s, v31.4s \n" \ 109da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqshrun v0.4h, v0.4s, #16 \n" \ 110da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqshrun2 v0.8h, v3.4s, #16 \n" /* Y */ \ 111da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddw v1.8h, v2.8h, v1.8b \n" /* Replicate UV */ \ 112da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mov v2.d[0], v1.d[1] \n" /* Extract V */ \ 113da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uxtl v2.8h, v2.8b \n" \ 114da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uxtl v1.8h, v1.8b \n" /* Extract U */ \ 115da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mul v3.8h, v1.8h, v27.8h \n" \ 116da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mul v5.8h, v1.8h, v29.8h \n" \ 117da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mul v6.8h, v2.8h, v30.8h \n" \ 118da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mul v7.8h, v2.8h, v28.8h \n" \ 119da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqadd v6.8h, v6.8h, v5.8h \n" \ 120da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqadd " #vB ".8h, v24.8h, v0.8h \n" /* B */ \ 121da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqadd " #vG ".8h, v25.8h, v0.8h \n" /* G */ \ 122da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqadd " #vR ".8h, v26.8h, v0.8h \n" /* R */ \ 123da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqadd " #vB ".8h, " #vB ".8h, v3.8h \n" /* B */ \ 124da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqsub " #vG ".8h, " #vG ".8h, v6.8h \n" /* G */ \ 125da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqadd " #vR ".8h, " #vR ".8h, v7.8h \n" /* R */ \ 126da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqshrun " #vB ".8b, " #vB ".8h, #6 \n" /* B */ \ 127da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqshrun " #vG ".8b, " #vG ".8h, #6 \n" /* G */ \ 128da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqshrun " #vR ".8b, " #vR ".8h, #6 \n" /* R */ \ 129da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 130ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid I444ToARGBRow_NEON(const uint8* src_y, 131ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian const uint8* src_u, 132ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian const uint8* src_v, 133ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian uint8* dst_argb, 1347bc9febe8749e98a3812a0dc4380ceae75c29450Johann const struct YuvConstants* yuvconstants, 135ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian int width) { 136ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 1377bc9febe8749e98a3812a0dc4380ceae75c29450Johann YUVTORGB_SETUP 1387bc9febe8749e98a3812a0dc4380ceae75c29450Johann "movi v23.8b, #255 \n" /* A */ 139ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 140ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian READYUV444 1417bc9febe8749e98a3812a0dc4380ceae75c29450Johann YUVTORGB(v22, v21, v20) 1427bc9febe8749e98a3812a0dc4380ceae75c29450Johann "subs %w4, %w4, #8 \n" 143ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(3) 144da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" 145da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 146ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_y), // %0 147ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(src_u), // %1 148ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(src_v), // %2 149ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_argb), // %3 150ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(width) // %4 1517bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [kUVToRB]"r"(&yuvconstants->kUVToRB), 1527bc9febe8749e98a3812a0dc4380ceae75c29450Johann [kUVToG]"r"(&yuvconstants->kUVToG), 1537bc9febe8749e98a3812a0dc4380ceae75c29450Johann [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), 1547bc9febe8749e98a3812a0dc4380ceae75c29450Johann [kYToRgb]"r"(&yuvconstants->kYToRgb) 155da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 156da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 157ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 158ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 159ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 160ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid I422ToARGBRow_NEON(const uint8* src_y, 161ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian const uint8* src_u, 162ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian const uint8* src_v, 163ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian uint8* dst_argb, 1647bc9febe8749e98a3812a0dc4380ceae75c29450Johann const struct YuvConstants* yuvconstants, 165ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian int width) { 166ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 1677bc9febe8749e98a3812a0dc4380ceae75c29450Johann YUVTORGB_SETUP 168da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v23.8b, #255 \n" /* A */ 169ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 1707bc9febe8749e98a3812a0dc4380ceae75c29450Johann READYUV422 1717bc9febe8749e98a3812a0dc4380ceae75c29450Johann YUVTORGB(v22, v21, v20) 172da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w4, %w4, #8 \n" 173ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(3) 174da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" 175da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 176ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_y), // %0 177ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(src_u), // %1 178ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(src_v), // %2 179ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_argb), // %3 180ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(width) // %4 1817bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [kUVToRB]"r"(&yuvconstants->kUVToRB), 1827bc9febe8749e98a3812a0dc4380ceae75c29450Johann [kUVToG]"r"(&yuvconstants->kUVToG), 1837bc9febe8749e98a3812a0dc4380ceae75c29450Johann [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), 1847bc9febe8749e98a3812a0dc4380ceae75c29450Johann [kYToRgb]"r"(&yuvconstants->kYToRgb) 185da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 186da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 187ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 188ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 189ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1907bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid I422AlphaToARGBRow_NEON(const uint8* src_y, 1917bc9febe8749e98a3812a0dc4380ceae75c29450Johann const uint8* src_u, 1927bc9febe8749e98a3812a0dc4380ceae75c29450Johann const uint8* src_v, 1937bc9febe8749e98a3812a0dc4380ceae75c29450Johann const uint8* src_a, 1947bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8* dst_argb, 1957bc9febe8749e98a3812a0dc4380ceae75c29450Johann const struct YuvConstants* yuvconstants, 1967bc9febe8749e98a3812a0dc4380ceae75c29450Johann int width) { 197ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 1987bc9febe8749e98a3812a0dc4380ceae75c29450Johann YUVTORGB_SETUP 199ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 200ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian READYUV422 2017bc9febe8749e98a3812a0dc4380ceae75c29450Johann YUVTORGB(v22, v21, v20) 202ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(3) 2037bc9febe8749e98a3812a0dc4380ceae75c29450Johann "ld1 {v23.8b}, [%3], #8 \n" 2047bc9febe8749e98a3812a0dc4380ceae75c29450Johann "subs %w5, %w5, #8 \n" 2057bc9febe8749e98a3812a0dc4380ceae75c29450Johann MEMACCESS(4) 2067bc9febe8749e98a3812a0dc4380ceae75c29450Johann "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%4], #32 \n" 207da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 208ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_y), // %0 209ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(src_u), // %1 210ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(src_v), // %2 2117bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(src_a), // %3 2127bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(dst_argb), // %4 2137bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %5 2147bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [kUVToRB]"r"(&yuvconstants->kUVToRB), 2157bc9febe8749e98a3812a0dc4380ceae75c29450Johann [kUVToG]"r"(&yuvconstants->kUVToG), 2167bc9febe8749e98a3812a0dc4380ceae75c29450Johann [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), 2177bc9febe8749e98a3812a0dc4380ceae75c29450Johann [kYToRgb]"r"(&yuvconstants->kYToRgb) 218da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 219da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 220ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 221ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 222ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 2237bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid I411ToARGBRow_NEON(const uint8* src_y, 224ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian const uint8* src_u, 225ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian const uint8* src_v, 2267bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8* dst_argb, 2277bc9febe8749e98a3812a0dc4380ceae75c29450Johann const struct YuvConstants* yuvconstants, 228ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian int width) { 229ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 2307bc9febe8749e98a3812a0dc4380ceae75c29450Johann YUVTORGB_SETUP 2317bc9febe8749e98a3812a0dc4380ceae75c29450Johann "movi v23.8b, #255 \n" /* A */ 232ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 2337bc9febe8749e98a3812a0dc4380ceae75c29450Johann READYUV411 2347bc9febe8749e98a3812a0dc4380ceae75c29450Johann YUVTORGB(v22, v21, v20) 235da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w4, %w4, #8 \n" 236ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(3) 237da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" 238da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 239ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_y), // %0 240ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(src_u), // %1 241ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(src_v), // %2 2427bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(dst_argb), // %3 243ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(width) // %4 2447bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [kUVToRB]"r"(&yuvconstants->kUVToRB), 2457bc9febe8749e98a3812a0dc4380ceae75c29450Johann [kUVToG]"r"(&yuvconstants->kUVToG), 2467bc9febe8749e98a3812a0dc4380ceae75c29450Johann [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), 2477bc9febe8749e98a3812a0dc4380ceae75c29450Johann [kYToRgb]"r"(&yuvconstants->kYToRgb) 248da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 249da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 250ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 251ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 252ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 253ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid I422ToRGBARow_NEON(const uint8* src_y, 254ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian const uint8* src_u, 255ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian const uint8* src_v, 256ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian uint8* dst_rgba, 2577bc9febe8749e98a3812a0dc4380ceae75c29450Johann const struct YuvConstants* yuvconstants, 258ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian int width) { 259ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 2607bc9febe8749e98a3812a0dc4380ceae75c29450Johann YUVTORGB_SETUP 2617bc9febe8749e98a3812a0dc4380ceae75c29450Johann "movi v20.8b, #255 \n" /* A */ 262ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 263ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian READYUV422 2647bc9febe8749e98a3812a0dc4380ceae75c29450Johann YUVTORGB(v23, v22, v21) 265da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w4, %w4, #8 \n" 266ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(3) 267da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" 268da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 269ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_y), // %0 270ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(src_u), // %1 271ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(src_v), // %2 272ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_rgba), // %3 273ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(width) // %4 2747bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [kUVToRB]"r"(&yuvconstants->kUVToRB), 2757bc9febe8749e98a3812a0dc4380ceae75c29450Johann [kUVToG]"r"(&yuvconstants->kUVToG), 2767bc9febe8749e98a3812a0dc4380ceae75c29450Johann [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), 2777bc9febe8749e98a3812a0dc4380ceae75c29450Johann [kYToRgb]"r"(&yuvconstants->kYToRgb) 278da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 279da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 280ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 281ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 282ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 283ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid I422ToRGB24Row_NEON(const uint8* src_y, 284ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian const uint8* src_u, 285ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian const uint8* src_v, 286ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian uint8* dst_rgb24, 2877bc9febe8749e98a3812a0dc4380ceae75c29450Johann const struct YuvConstants* yuvconstants, 288ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian int width) { 289ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 2907bc9febe8749e98a3812a0dc4380ceae75c29450Johann YUVTORGB_SETUP 291ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 292ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian READYUV422 2937bc9febe8749e98a3812a0dc4380ceae75c29450Johann YUVTORGB(v22, v21, v20) 294da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w4, %w4, #8 \n" 295ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(3) 296da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st3 {v20.8b,v21.8b,v22.8b}, [%3], #24 \n" 297da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 298da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "+r"(src_y), // %0 299da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "+r"(src_u), // %1 300da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "+r"(src_v), // %2 301da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "+r"(dst_rgb24), // %3 302da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "+r"(width) // %4 3037bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [kUVToRB]"r"(&yuvconstants->kUVToRB), 3047bc9febe8749e98a3812a0dc4380ceae75c29450Johann [kUVToG]"r"(&yuvconstants->kUVToG), 3057bc9febe8749e98a3812a0dc4380ceae75c29450Johann [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), 3067bc9febe8749e98a3812a0dc4380ceae75c29450Johann [kYToRgb]"r"(&yuvconstants->kYToRgb) 307da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 308da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 309ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 310ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 311ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 312ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#define ARGBTORGB565 \ 313da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "shll v0.8h, v22.8b, #8 \n" /* R */ \ 314da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "shll v21.8h, v21.8b, #8 \n" /* G */ \ 3157bc9febe8749e98a3812a0dc4380ceae75c29450Johann "shll v20.8h, v20.8b, #8 \n" /* B */ \ 316da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sri v0.8h, v21.8h, #5 \n" /* RG */ \ 317da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sri v0.8h, v20.8h, #11 \n" /* RGB */ 318ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 319ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid I422ToRGB565Row_NEON(const uint8* src_y, 320ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian const uint8* src_u, 321ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian const uint8* src_v, 322ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian uint8* dst_rgb565, 3237bc9febe8749e98a3812a0dc4380ceae75c29450Johann const struct YuvConstants* yuvconstants, 324ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian int width) { 325ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 3267bc9febe8749e98a3812a0dc4380ceae75c29450Johann YUVTORGB_SETUP 327ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 328ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian READYUV422 3297bc9febe8749e98a3812a0dc4380ceae75c29450Johann YUVTORGB(v22, v21, v20) 330da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w4, %w4, #8 \n" 331ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ARGBTORGB565 332ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(3) 333da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels RGB565. 334da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 335ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_y), // %0 336ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(src_u), // %1 337ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(src_v), // %2 338ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_rgb565), // %3 339ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(width) // %4 3407bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [kUVToRB]"r"(&yuvconstants->kUVToRB), 3417bc9febe8749e98a3812a0dc4380ceae75c29450Johann [kUVToG]"r"(&yuvconstants->kUVToG), 3427bc9febe8749e98a3812a0dc4380ceae75c29450Johann [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), 3437bc9febe8749e98a3812a0dc4380ceae75c29450Johann [kYToRgb]"r"(&yuvconstants->kYToRgb) 344da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 345da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 346ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 347ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 348ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 349ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#define ARGBTOARGB1555 \ 350da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "shll v0.8h, v23.8b, #8 \n" /* A */ \ 351da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "shll v22.8h, v22.8b, #8 \n" /* R */ \ 352da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "shll v21.8h, v21.8b, #8 \n" /* G */ \ 3537bc9febe8749e98a3812a0dc4380ceae75c29450Johann "shll v20.8h, v20.8b, #8 \n" /* B */ \ 354da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sri v0.8h, v22.8h, #1 \n" /* AR */ \ 355da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sri v0.8h, v21.8h, #6 \n" /* ARG */ \ 356da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sri v0.8h, v20.8h, #11 \n" /* ARGB */ 357ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 358ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid I422ToARGB1555Row_NEON(const uint8* src_y, 359ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian const uint8* src_u, 360ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian const uint8* src_v, 361ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian uint8* dst_argb1555, 3627bc9febe8749e98a3812a0dc4380ceae75c29450Johann const struct YuvConstants* yuvconstants, 363ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian int width) { 364ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 3657bc9febe8749e98a3812a0dc4380ceae75c29450Johann YUVTORGB_SETUP 3667bc9febe8749e98a3812a0dc4380ceae75c29450Johann "movi v23.8b, #255 \n" 367ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 368ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian READYUV422 3697bc9febe8749e98a3812a0dc4380ceae75c29450Johann YUVTORGB(v22, v21, v20) 370da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w4, %w4, #8 \n" 371ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ARGBTOARGB1555 372ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(3) 373da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels RGB565. 374da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 375ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_y), // %0 376ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(src_u), // %1 377ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(src_v), // %2 378ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_argb1555), // %3 379ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(width) // %4 3807bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [kUVToRB]"r"(&yuvconstants->kUVToRB), 3817bc9febe8749e98a3812a0dc4380ceae75c29450Johann [kUVToG]"r"(&yuvconstants->kUVToG), 3827bc9febe8749e98a3812a0dc4380ceae75c29450Johann [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), 3837bc9febe8749e98a3812a0dc4380ceae75c29450Johann [kYToRgb]"r"(&yuvconstants->kYToRgb) 384da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 385da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 386ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 387ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 388ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 389ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#define ARGBTOARGB4444 \ 390da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian /* Input v20.8b<=B, v21.8b<=G, v22.8b<=R, v23.8b<=A, v4.8b<=0x0f */ \ 391da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ushr v20.8b, v20.8b, #4 \n" /* B */ \ 392da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "bic v21.8b, v21.8b, v4.8b \n" /* G */ \ 393da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ushr v22.8b, v22.8b, #4 \n" /* R */ \ 394da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "bic v23.8b, v23.8b, v4.8b \n" /* A */ \ 395da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "orr v0.8b, v20.8b, v21.8b \n" /* BG */ \ 396da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "orr v1.8b, v22.8b, v23.8b \n" /* RA */ \ 397da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "zip1 v0.16b, v0.16b, v1.16b \n" /* BGRA */ 398ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 399ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid I422ToARGB4444Row_NEON(const uint8* src_y, 400ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian const uint8* src_u, 401ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian const uint8* src_v, 402ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian uint8* dst_argb4444, 4037bc9febe8749e98a3812a0dc4380ceae75c29450Johann const struct YuvConstants* yuvconstants, 404ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian int width) { 405ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 4067bc9febe8749e98a3812a0dc4380ceae75c29450Johann YUVTORGB_SETUP 407da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v4.16b, #0x0f \n" // bits to clear with vbic. 408ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 409ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian READYUV422 4107bc9febe8749e98a3812a0dc4380ceae75c29450Johann YUVTORGB(v22, v21, v20) 411da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w4, %w4, #8 \n" 412da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v23.8b, #255 \n" 413ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ARGBTOARGB4444 414ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(3) 415da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels ARGB4444. 416da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 417ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_y), // %0 418ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(src_u), // %1 419ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(src_v), // %2 420ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_argb4444), // %3 421ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(width) // %4 4227bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [kUVToRB]"r"(&yuvconstants->kUVToRB), 4237bc9febe8749e98a3812a0dc4380ceae75c29450Johann [kUVToG]"r"(&yuvconstants->kUVToG), 4247bc9febe8749e98a3812a0dc4380ceae75c29450Johann [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), 4257bc9febe8749e98a3812a0dc4380ceae75c29450Johann [kYToRgb]"r"(&yuvconstants->kYToRgb) 426da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 427da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 428ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 429ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 430ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 431da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianvoid I400ToARGBRow_NEON(const uint8* src_y, 432da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8* dst_argb, 433da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int width) { 434ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 4357bc9febe8749e98a3812a0dc4380ceae75c29450Johann YUVTORGB_SETUP 4367bc9febe8749e98a3812a0dc4380ceae75c29450Johann "movi v23.8b, #255 \n" 437ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 438ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian READYUV400 4397bc9febe8749e98a3812a0dc4380ceae75c29450Johann YUVTORGB(v22, v21, v20) 440da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w2, %w2, #8 \n" 441ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 442da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n" 443da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 444ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_y), // %0 445ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_argb), // %1 4467bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %2 4477bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [kUVToRB]"r"(&kYuvI601Constants.kUVToRB), 4487bc9febe8749e98a3812a0dc4380ceae75c29450Johann [kUVToG]"r"(&kYuvI601Constants.kUVToG), 4497bc9febe8749e98a3812a0dc4380ceae75c29450Johann [kUVBiasBGR]"r"(&kYuvI601Constants.kUVBiasBGR), 4507bc9febe8749e98a3812a0dc4380ceae75c29450Johann [kYToRgb]"r"(&kYuvI601Constants.kYToRgb) 451da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 452da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 453ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 454ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 455ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 456da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianvoid J400ToARGBRow_NEON(const uint8* src_y, 457ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian uint8* dst_argb, 458ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian int width) { 459ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 460da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v23.8b, #255 \n" 461ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 462ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 463da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v20.8b}, [%0], #8 \n" 464da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "orr v21.8b, v20.8b, v20.8b \n" 465da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "orr v22.8b, v20.8b, v20.8b \n" 466da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w2, %w2, #8 \n" 467ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 468da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n" 469da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 470ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_y), // %0 471ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_argb), // %1 472ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(width) // %2 473ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 474da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v20", "v21", "v22", "v23" 475ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 476ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 477ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 478ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid NV12ToARGBRow_NEON(const uint8* src_y, 479ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian const uint8* src_uv, 480ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian uint8* dst_argb, 4817bc9febe8749e98a3812a0dc4380ceae75c29450Johann const struct YuvConstants* yuvconstants, 482ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian int width) { 483ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 4847bc9febe8749e98a3812a0dc4380ceae75c29450Johann YUVTORGB_SETUP 4857bc9febe8749e98a3812a0dc4380ceae75c29450Johann "movi v23.8b, #255 \n" 486ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 487ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian READNV12 4887bc9febe8749e98a3812a0dc4380ceae75c29450Johann YUVTORGB(v22, v21, v20) 489da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w3, %w3, #8 \n" 490ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(2) 491da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n" 492da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 493ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_y), // %0 494ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(src_uv), // %1 495ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_argb), // %2 496ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(width) // %3 4977bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [kUVToRB]"r"(&yuvconstants->kUVToRB), 4987bc9febe8749e98a3812a0dc4380ceae75c29450Johann [kUVToG]"r"(&yuvconstants->kUVToG), 4997bc9febe8749e98a3812a0dc4380ceae75c29450Johann [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), 5007bc9febe8749e98a3812a0dc4380ceae75c29450Johann [kYToRgb]"r"(&yuvconstants->kYToRgb) 501da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 502da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 503ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 504ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 505ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 506ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid NV21ToARGBRow_NEON(const uint8* src_y, 5077bc9febe8749e98a3812a0dc4380ceae75c29450Johann const uint8* src_vu, 508ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian uint8* dst_argb, 5097bc9febe8749e98a3812a0dc4380ceae75c29450Johann const struct YuvConstants* yuvconstants, 510ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian int width) { 511ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 5127bc9febe8749e98a3812a0dc4380ceae75c29450Johann YUVTORGB_SETUP 5137bc9febe8749e98a3812a0dc4380ceae75c29450Johann "movi v23.8b, #255 \n" 514ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 515ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian READNV21 5167bc9febe8749e98a3812a0dc4380ceae75c29450Johann YUVTORGB(v22, v21, v20) 517da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w3, %w3, #8 \n" 518ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(2) 519da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n" 520da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 521ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_y), // %0 5227bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(src_vu), // %1 523ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_argb), // %2 524ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(width) // %3 5257bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [kUVToRB]"r"(&yuvconstants->kUVToRB), 5267bc9febe8749e98a3812a0dc4380ceae75c29450Johann [kUVToG]"r"(&yuvconstants->kUVToG), 5277bc9febe8749e98a3812a0dc4380ceae75c29450Johann [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), 5287bc9febe8749e98a3812a0dc4380ceae75c29450Johann [kYToRgb]"r"(&yuvconstants->kYToRgb) 529da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 530da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 531ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 532ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 533ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 534ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid NV12ToRGB565Row_NEON(const uint8* src_y, 535ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian const uint8* src_uv, 536ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian uint8* dst_rgb565, 5377bc9febe8749e98a3812a0dc4380ceae75c29450Johann const struct YuvConstants* yuvconstants, 538ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian int width) { 539ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 5407bc9febe8749e98a3812a0dc4380ceae75c29450Johann YUVTORGB_SETUP 541ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 542ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian READNV12 5437bc9febe8749e98a3812a0dc4380ceae75c29450Johann YUVTORGB(v22, v21, v20) 544da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w3, %w3, #8 \n" 545ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ARGBTORGB565 546ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(2) 547da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.8h}, [%2], 16 \n" // store 8 pixels RGB565. 548da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 549ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_y), // %0 550ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(src_uv), // %1 551ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_rgb565), // %2 552ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(width) // %3 5537bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [kUVToRB]"r"(&yuvconstants->kUVToRB), 5547bc9febe8749e98a3812a0dc4380ceae75c29450Johann [kUVToG]"r"(&yuvconstants->kUVToG), 5557bc9febe8749e98a3812a0dc4380ceae75c29450Johann [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), 5567bc9febe8749e98a3812a0dc4380ceae75c29450Johann [kYToRgb]"r"(&yuvconstants->kYToRgb) 557da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 558da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 559ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 560ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 561ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 562ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid YUY2ToARGBRow_NEON(const uint8* src_yuy2, 563ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian uint8* dst_argb, 5647bc9febe8749e98a3812a0dc4380ceae75c29450Johann const struct YuvConstants* yuvconstants, 565ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian int width) { 566ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 5677bc9febe8749e98a3812a0dc4380ceae75c29450Johann YUVTORGB_SETUP 5687bc9febe8749e98a3812a0dc4380ceae75c29450Johann "movi v23.8b, #255 \n" 569ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 570ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian READYUY2 5717bc9febe8749e98a3812a0dc4380ceae75c29450Johann YUVTORGB(v22, v21, v20) 572da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w2, %w2, #8 \n" 573ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 574da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n" 575da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 576ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_yuy2), // %0 577ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_argb), // %1 5787bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %2 5797bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [kUVToRB]"r"(&yuvconstants->kUVToRB), 5807bc9febe8749e98a3812a0dc4380ceae75c29450Johann [kUVToG]"r"(&yuvconstants->kUVToG), 5817bc9febe8749e98a3812a0dc4380ceae75c29450Johann [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), 5827bc9febe8749e98a3812a0dc4380ceae75c29450Johann [kYToRgb]"r"(&yuvconstants->kYToRgb) 583da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 584da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 585ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 586ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 587ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 588ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid UYVYToARGBRow_NEON(const uint8* src_uyvy, 589ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian uint8* dst_argb, 5907bc9febe8749e98a3812a0dc4380ceae75c29450Johann const struct YuvConstants* yuvconstants, 591ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian int width) { 592ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 5937bc9febe8749e98a3812a0dc4380ceae75c29450Johann YUVTORGB_SETUP 5947bc9febe8749e98a3812a0dc4380ceae75c29450Johann "movi v23.8b, #255 \n" 595ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 596ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian READUYVY 5977bc9febe8749e98a3812a0dc4380ceae75c29450Johann YUVTORGB(v22, v21, v20) 598da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w2, %w2, #8 \n" 599ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 600da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], 32 \n" 601da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 602ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_uyvy), // %0 603ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_argb), // %1 6047bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %2 6057bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [kUVToRB]"r"(&yuvconstants->kUVToRB), 6067bc9febe8749e98a3812a0dc4380ceae75c29450Johann [kUVToG]"r"(&yuvconstants->kUVToG), 6077bc9febe8749e98a3812a0dc4380ceae75c29450Johann [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), 6087bc9febe8749e98a3812a0dc4380ceae75c29450Johann [kYToRgb]"r"(&yuvconstants->kYToRgb) 609da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 610da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 611ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 612ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 613ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 614ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v. 615ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, 616ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian int width) { 617ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 618ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 619ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 620da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld2 {v0.16b,v1.16b}, [%0], #32 \n" // load 16 pairs of UV 621da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w3, %w3, #16 \n" // 16 processed per loop 622ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 623da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.16b}, [%1], #16 \n" // store U 624ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(2) 625da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v1.16b}, [%2], #16 \n" // store V 626da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 627ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_uv), // %0 628ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_u), // %1 629ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_v), // %2 630ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(width) // %3 // Output registers 631ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : // Input registers 632da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1" // Clobber List 633ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 634ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 635ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 636ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// Reads 16 U's and V's and writes out 16 pairs of UV. 637ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv, 638ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian int width) { 639ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 640ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 641ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 642da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v0.16b}, [%0], #16 \n" // load U 643ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 644da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v1.16b}, [%1], #16 \n" // load V 645da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w3, %w3, #16 \n" // 16 processed per loop 646ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(2) 647da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st2 {v0.16b,v1.16b}, [%2], #32 \n" // store 16 pairs of UV 648da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 649ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 650ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(src_u), // %0 651ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(src_v), // %1 652ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_uv), // %2 653ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(width) // %3 // Output registers 654ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : // Input registers 655da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1" // Clobber List 656ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 657ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 658ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 659ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// Copy multiple of 32. vld4.8 allow unaligned and is fastest on a15. 660ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid CopyRow_NEON(const uint8* src, uint8* dst, int count) { 661ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 662ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 663ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 664da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 32 665da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w2, %w2, #32 \n" // 32 processed per loop 666ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 667da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 32 668da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 669ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src), // %0 670ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst), // %1 671ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(count) // %2 // Output registers 672ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : // Input registers 673da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List 674ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 675ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 676ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 677da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian// SetRow writes 'count' bytes using an 8 bit value repeated. 678da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianvoid SetRow_NEON(uint8* dst, uint8 v8, int count) { 679ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 680da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "dup v0.16b, %w2 \n" // duplicate 16 bytes 681da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "1: \n" 6827bc9febe8749e98a3812a0dc4380ceae75c29450Johann "subs %w1, %w1, #16 \n" // 16 bytes per loop 683ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 684da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.16b}, [%0], #16 \n" // store 6857bc9febe8749e98a3812a0dc4380ceae75c29450Johann "b.gt 1b \n" 686ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(dst), // %0 687ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(count) // %1 688da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "r"(v8) // %2 689da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0" 690ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 691ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 692ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 693da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianvoid ARGBSetRow_NEON(uint8* dst, uint32 v32, int count) { 694da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian asm volatile ( 695da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "dup v0.4s, %w2 \n" // duplicate 4 ints 696da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "1: \n" 6977bc9febe8749e98a3812a0dc4380ceae75c29450Johann "subs %w1, %w1, #4 \n" // 4 ints per loop 698da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian MEMACCESS(0) 699da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.16b}, [%0], #16 \n" // store 7007bc9febe8749e98a3812a0dc4380ceae75c29450Johann "b.gt 1b \n" 701da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "+r"(dst), // %0 702da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "+r"(count) // %1 703da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "r"(v32) // %2 704da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0" 705da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ); 706ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 707ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 708ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid MirrorRow_NEON(const uint8* src, uint8* dst, int width) { 709ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 710ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian // Start at end of source row. 7117bc9febe8749e98a3812a0dc4380ceae75c29450Johann "add %0, %0, %w2, sxtw \n" 712da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sub %0, %0, #16 \n" 713ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 714ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 715da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v0.16b}, [%0], %3 \n" // src -= 16 7167bc9febe8749e98a3812a0dc4380ceae75c29450Johann "subs %w2, %w2, #16 \n" // 16 pixels per loop. 717da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "rev64 v0.16b, v0.16b \n" 718ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 719da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.D}[1], [%1], #8 \n" // dst += 16 720ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 721da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.D}[0], [%1], #8 \n" 722da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 723ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src), // %0 724ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst), // %1 7257bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %2 726da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "r"((ptrdiff_t)-16) // %3 727da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0" 728ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 729ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 730ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 731ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, 732ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian int width) { 733ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 734ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian // Start at end of source row. 7357bc9febe8749e98a3812a0dc4380ceae75c29450Johann "add %0, %0, %w3, sxtw #1 \n" 736da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sub %0, %0, #16 \n" 737ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 738ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 739da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld2 {v0.8b, v1.8b}, [%0], %4 \n" // src -= 16 7407bc9febe8749e98a3812a0dc4380ceae75c29450Johann "subs %w3, %w3, #8 \n" // 8 pixels per loop. 741da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "rev64 v0.8b, v0.8b \n" 742da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "rev64 v1.8b, v1.8b \n" 743ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 744da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.8b}, [%1], #8 \n" // dst += 8 745ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(2) 746da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v1.8b}, [%2], #8 \n" 747da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 748ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_uv), // %0 749ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_u), // %1 750ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_v), // %2 7517bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %3 752da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "r"((ptrdiff_t)-16) // %4 753da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1" 754ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 755ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 756ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 757ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) { 758ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 7597bc9febe8749e98a3812a0dc4380ceae75c29450Johann // Start at end of source row. 7607bc9febe8749e98a3812a0dc4380ceae75c29450Johann "add %0, %0, %w2, sxtw #2 \n" 761da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sub %0, %0, #16 \n" 762ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 763ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 764da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v0.16b}, [%0], %3 \n" // src -= 16 7657bc9febe8749e98a3812a0dc4380ceae75c29450Johann "subs %w2, %w2, #4 \n" // 4 pixels per loop. 766da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "rev64 v0.4s, v0.4s \n" 767ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 768da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.D}[1], [%1], #8 \n" // dst += 16 769ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 770da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.D}[0], [%1], #8 \n" 771da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 772ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src), // %0 773ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst), // %1 7747bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %2 775da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "r"((ptrdiff_t)-16) // %3 776da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0" 777ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 778ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 779ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 7807bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int width) { 781ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 782da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v4.8b, #255 \n" // Alpha 783ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 784ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 785da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld3 {v1.8b,v2.8b,v3.8b}, [%0], #24 \n" // load 8 pixels of RGB24. 786da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w2, %w2, #8 \n" // 8 processed per loop. 787ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 788da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st4 {v1.8b,v2.8b,v3.8b,v4.8b}, [%1], #32 \n" // store 8 ARGB pixels 789da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 790ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_rgb24), // %0 791ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_argb), // %1 7927bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %2 793ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 794da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v1", "v2", "v3", "v4" // Clobber List 795ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 796ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 797ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 7987bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int width) { 799ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 800da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v5.8b, #255 \n" // Alpha 801ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 802ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 803da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // read r g b 804da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w2, %w2, #8 \n" // 8 processed per loop. 805da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "orr v3.8b, v1.8b, v1.8b \n" // move g 806da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "orr v4.8b, v0.8b, v0.8b \n" // move r 807ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 808da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st4 {v2.8b,v3.8b,v4.8b,v5.8b}, [%1], #32 \n" // store b g r a 809da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 810ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_raw), // %0 811ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_argb), // %1 8127bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %2 813ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 814da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5" // Clobber List 815ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 816ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 8177bc9febe8749e98a3812a0dc4380ceae75c29450Johann 8187bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid RAWToRGB24Row_NEON(const uint8* src_raw, uint8* dst_rgb24, int width) { 8197bc9febe8749e98a3812a0dc4380ceae75c29450Johann asm volatile ( 8207bc9febe8749e98a3812a0dc4380ceae75c29450Johann "1: \n" 8217bc9febe8749e98a3812a0dc4380ceae75c29450Johann MEMACCESS(0) 8227bc9febe8749e98a3812a0dc4380ceae75c29450Johann "ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // read r g b 8237bc9febe8749e98a3812a0dc4380ceae75c29450Johann "subs %w2, %w2, #8 \n" // 8 processed per loop. 8247bc9febe8749e98a3812a0dc4380ceae75c29450Johann "orr v3.8b, v1.8b, v1.8b \n" // move g 8257bc9febe8749e98a3812a0dc4380ceae75c29450Johann "orr v4.8b, v0.8b, v0.8b \n" // move r 8267bc9febe8749e98a3812a0dc4380ceae75c29450Johann MEMACCESS(1) 8277bc9febe8749e98a3812a0dc4380ceae75c29450Johann "st3 {v2.8b,v3.8b,v4.8b}, [%1], #24 \n" // store b g r 8287bc9febe8749e98a3812a0dc4380ceae75c29450Johann "b.gt 1b \n" 8297bc9febe8749e98a3812a0dc4380ceae75c29450Johann : "+r"(src_raw), // %0 8307bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(dst_rgb24), // %1 8317bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %2 8327bc9febe8749e98a3812a0dc4380ceae75c29450Johann : 8337bc9febe8749e98a3812a0dc4380ceae75c29450Johann : "cc", "memory", "v0", "v1", "v2", "v3", "v4" // Clobber List 8347bc9febe8749e98a3812a0dc4380ceae75c29450Johann ); 8357bc9febe8749e98a3812a0dc4380ceae75c29450Johann} 836ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 837ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#define RGB565TOARGB \ 838da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "shrn v6.8b, v0.8h, #5 \n" /* G xxGGGGGG */ \ 839da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "shl v6.8b, v6.8b, #2 \n" /* G GGGGGG00 upper 6 */ \ 840da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ushr v4.8b, v6.8b, #6 \n" /* G 000000GG lower 2 */ \ 841da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "orr v1.8b, v4.8b, v6.8b \n" /* G */ \ 842da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "xtn v2.8b, v0.8h \n" /* B xxxBBBBB */ \ 843da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ushr v0.8h, v0.8h, #11 \n" /* R 000RRRRR */ \ 844da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "xtn2 v2.16b,v0.8h \n" /* R in upper part */ \ 845da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "shl v2.16b, v2.16b, #3 \n" /* R,B BBBBB000 upper 5 */ \ 846da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ushr v0.16b, v2.16b, #5 \n" /* R,B 00000BBB lower 3 */ \ 847da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "orr v0.16b, v0.16b, v2.16b \n" /* R,B */ \ 848da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "dup v2.2D, v0.D[1] \n" /* R */ 849ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 8507bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int width) { 851ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 852da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v3.8b, #255 \n" // Alpha 853ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 854ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 855da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v0.16b}, [%0], #16 \n" // load 8 RGB565 pixels. 856da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w2, %w2, #8 \n" // 8 processed per loop. 857ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian RGB565TOARGB 858ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 859da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 8 ARGB pixels 860da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 861ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_rgb565), // %0 862ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_argb), // %1 8637bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %2 864ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 865da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v6" // Clobber List 866ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 867ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 868ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 869ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#define ARGB1555TOARGB \ 870da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ushr v2.8h, v0.8h, #10 \n" /* R xxxRRRRR */ \ 871da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "shl v2.8h, v2.8h, #3 \n" /* R RRRRR000 upper 5 */ \ 872da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "xtn v3.8b, v2.8h \n" /* RRRRR000 AAAAAAAA */ \ 873da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian \ 874da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sshr v2.8h, v0.8h, #15 \n" /* A AAAAAAAA */ \ 875da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "xtn2 v3.16b, v2.8h \n" \ 876da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian \ 877da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "xtn v2.8b, v0.8h \n" /* B xxxBBBBB */ \ 878da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "shrn2 v2.16b,v0.8h, #5 \n" /* G xxxGGGGG */ \ 879da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian \ 880da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ushr v1.16b, v3.16b, #5 \n" /* R,A 00000RRR lower 3 */ \ 881da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "shl v0.16b, v2.16b, #3 \n" /* B,G BBBBB000 upper 5 */ \ 882da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ushr v2.16b, v0.16b, #5 \n" /* B,G 00000BBB lower 3 */ \ 883da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian \ 884da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "orr v0.16b, v0.16b, v2.16b \n" /* B,G */ \ 885da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "orr v2.16b, v1.16b, v3.16b \n" /* R,A */ \ 886da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "dup v1.2D, v0.D[1] \n" \ 887da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "dup v3.2D, v2.D[1] \n" 888ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 889ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// RGB555TOARGB is same as ARGB1555TOARGB but ignores alpha. 890ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#define RGB555TOARGB \ 891da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ushr v2.8h, v0.8h, #10 \n" /* R xxxRRRRR */ \ 892da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "shl v2.8h, v2.8h, #3 \n" /* R RRRRR000 upper 5 */ \ 893da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "xtn v3.8b, v2.8h \n" /* RRRRR000 */ \ 894da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian \ 895da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "xtn v2.8b, v0.8h \n" /* B xxxBBBBB */ \ 896da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "shrn2 v2.16b,v0.8h, #5 \n" /* G xxxGGGGG */ \ 897da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian \ 898da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ushr v1.16b, v3.16b, #5 \n" /* R 00000RRR lower 3 */ \ 899da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "shl v0.16b, v2.16b, #3 \n" /* B,G BBBBB000 upper 5 */ \ 900da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ushr v2.16b, v0.16b, #5 \n" /* B,G 00000BBB lower 3 */ \ 901da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian \ 902da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "orr v0.16b, v0.16b, v2.16b \n" /* B,G */ \ 903da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "orr v2.16b, v1.16b, v3.16b \n" /* R */ \ 904da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "dup v1.2D, v0.D[1] \n" /* G */ \ 905ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 906ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb, 9077bc9febe8749e98a3812a0dc4380ceae75c29450Johann int width) { 908ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 909da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v3.8b, #255 \n" // Alpha 910ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 911ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 912da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v0.16b}, [%0], #16 \n" // load 8 ARGB1555 pixels. 913da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w2, %w2, #8 \n" // 8 processed per loop. 914ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ARGB1555TOARGB 915ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 916da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 8 ARGB pixels 917da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 918ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_argb1555), // %0 919ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_argb), // %1 9207bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %2 921ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 922da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List 923ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 924ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 925ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 926ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#define ARGB4444TOARGB \ 927da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "shrn v1.8b, v0.8h, #8 \n" /* v1(l) AR */ \ 928da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "xtn2 v1.16b, v0.8h \n" /* v1(h) GB */ \ 929da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "shl v2.16b, v1.16b, #4 \n" /* B,R BBBB0000 */ \ 930da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ushr v3.16b, v1.16b, #4 \n" /* G,A 0000GGGG */ \ 931da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ushr v0.16b, v2.16b, #4 \n" /* B,R 0000BBBB */ \ 932da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "shl v1.16b, v3.16b, #4 \n" /* G,A GGGG0000 */ \ 933da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "orr v2.16b, v0.16b, v2.16b \n" /* B,R BBBBBBBB */ \ 934da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "orr v3.16b, v1.16b, v3.16b \n" /* G,A GGGGGGGG */ \ 935da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "dup v0.2D, v2.D[1] \n" \ 936da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "dup v1.2D, v3.D[1] \n" 937ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 938ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb, 9397bc9febe8749e98a3812a0dc4380ceae75c29450Johann int width) { 940ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 941ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 942ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 943da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v0.16b}, [%0], #16 \n" // load 8 ARGB4444 pixels. 944da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w2, %w2, #8 \n" // 8 processed per loop. 945ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ARGB4444TOARGB 946ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 947da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 8 ARGB pixels 948da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 949ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_argb4444), // %0 950ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_argb), // %1 9517bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %2 952ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 953da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4" // Clobber List 954ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 955ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 956ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 9577bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb24, int width) { 958ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 959ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 960ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 961da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v1.8b,v2.8b,v3.8b,v4.8b}, [%0], #32 \n" // load 8 ARGB pixels 962da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w2, %w2, #8 \n" // 8 processed per loop. 963ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 964da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st3 {v1.8b,v2.8b,v3.8b}, [%1], #24 \n" // store 8 pixels of RGB24. 965da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 966ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_argb), // %0 967ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_rgb24), // %1 9687bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %2 969ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 970da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v1", "v2", "v3", "v4" // Clobber List 971ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 972ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 973ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 9747bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_raw, int width) { 975ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 976ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 977ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 978da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v1.8b,v2.8b,v3.8b,v4.8b}, [%0], #32 \n" // load b g r a 979da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w2, %w2, #8 \n" // 8 processed per loop. 980da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "orr v4.8b, v2.8b, v2.8b \n" // mov g 981da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "orr v5.8b, v1.8b, v1.8b \n" // mov b 982ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 983da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st3 {v3.8b,v4.8b,v5.8b}, [%1], #24 \n" // store r g b 984da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 985ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_argb), // %0 986ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_raw), // %1 9877bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %2 988ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 989da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v1", "v2", "v3", "v4", "v5" // Clobber List 990ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 991ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 992ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 9937bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int width) { 994ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 995ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 996ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 997da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld2 {v0.16b,v1.16b}, [%0], #32 \n" // load 16 pixels of YUY2. 998da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w2, %w2, #16 \n" // 16 processed per loop. 999ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 1000da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.16b}, [%1], #16 \n" // store 16 pixels of Y. 1001da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 1002ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_yuy2), // %0 1003ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_y), // %1 10047bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %2 1005ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 1006da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1" // Clobber List 1007ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 1008ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 1009ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 10107bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int width) { 1011ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 1012ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 1013ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 1014da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld2 {v0.16b,v1.16b}, [%0], #32 \n" // load 16 pixels of UYVY. 1015da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w2, %w2, #16 \n" // 16 processed per loop. 1016ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 1017da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v1.16b}, [%1], #16 \n" // store 16 pixels of Y. 1018da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 1019ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_uyvy), // %0 1020ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_y), // %1 10217bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %2 1022ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 1023da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1" // Clobber List 1024ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 1025ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 1026ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1027ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid YUY2ToUV422Row_NEON(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v, 10287bc9febe8749e98a3812a0dc4380ceae75c29450Johann int width) { 1029ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 1030ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 1031ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 1032da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 16 YUY2 pixels 1033da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w3, %w3, #16 \n" // 16 pixels = 8 UVs. 1034ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 1035da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v1.8b}, [%1], #8 \n" // store 8 U. 1036ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(2) 1037da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v3.8b}, [%2], #8 \n" // store 8 V. 1038da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 1039ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_yuy2), // %0 1040ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_u), // %1 1041ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_v), // %2 10427bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %3 1043ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 1044da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List 1045ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 1046ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 1047ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1048ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid UYVYToUV422Row_NEON(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v, 10497bc9febe8749e98a3812a0dc4380ceae75c29450Johann int width) { 1050ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 1051ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 1052ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 1053da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 16 UYVY pixels 1054da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w3, %w3, #16 \n" // 16 pixels = 8 UVs. 1055ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 1056da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.8b}, [%1], #8 \n" // store 8 U. 1057ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(2) 1058da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v2.8b}, [%2], #8 \n" // store 8 V. 1059da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 1060ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_uyvy), // %0 1061ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_u), // %1 1062ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_v), // %2 10637bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %3 1064ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 1065da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List 1066ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 1067ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 1068ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1069ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2, 10707bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8* dst_u, uint8* dst_v, int width) { 1071da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const uint8* src_yuy2b = src_yuy2 + stride_yuy2; 1072ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 1073ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 1074ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 1075da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 16 pixels 1076da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w4, %w4, #16 \n" // 16 pixels = 8 UVs. 1077ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 1078da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // load next row 1079da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "urhadd v1.8b, v1.8b, v5.8b \n" // average rows of U 1080da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "urhadd v3.8b, v3.8b, v7.8b \n" // average rows of V 1081ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(2) 1082da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v1.8b}, [%2], #8 \n" // store 8 U. 1083ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(3) 1084da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v3.8b}, [%3], #8 \n" // store 8 V. 1085da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 1086ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_yuy2), // %0 1087da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "+r"(src_yuy2b), // %1 1088ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_u), // %2 1089ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_v), // %3 10907bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %4 1091ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 1092da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", 1093da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "v5", "v6", "v7" // Clobber List 1094ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 1095ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 1096ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1097ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy, 10987bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8* dst_u, uint8* dst_v, int width) { 1099da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const uint8* src_uyvyb = src_uyvy + stride_uyvy; 1100ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 1101ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 1102ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 1103da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 16 pixels 1104da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w4, %w4, #16 \n" // 16 pixels = 8 UVs. 1105ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 1106da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // load next row 1107da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "urhadd v0.8b, v0.8b, v4.8b \n" // average rows of U 1108da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "urhadd v2.8b, v2.8b, v6.8b \n" // average rows of V 1109ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(2) 1110da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.8b}, [%2], #8 \n" // store 8 U. 1111ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(3) 1112da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v2.8b}, [%3], #8 \n" // store 8 V. 1113da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 1114ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_uyvy), // %0 1115da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "+r"(src_uyvyb), // %1 1116ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_u), // %2 1117ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_v), // %3 11187bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %4 1119ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 1120da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", 1121da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "v5", "v6", "v7" // Clobber List 1122ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 1123ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 1124ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1125ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. 1126ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb, 11277bc9febe8749e98a3812a0dc4380ceae75c29450Johann const uint8* shuffler, int width) { 1128ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 1129ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(3) 1130da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v2.16b}, [%3] \n" // shuffler 1131ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 1132ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 1133da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v0.16b}, [%0], #16 \n" // load 4 pixels. 1134da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w2, %w2, #4 \n" // 4 processed per loop 1135da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "tbl v1.16b, {v0.16b}, v2.16b \n" // look up 4 pixels 1136ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 1137da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v1.16b}, [%1], #16 \n" // store 4. 1138da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 1139ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_argb), // %0 1140ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_argb), // %1 11417bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %2 1142ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "r"(shuffler) // %3 1143da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2" // Clobber List 1144ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 1145ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 1146ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1147ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid I422ToYUY2Row_NEON(const uint8* src_y, 1148ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian const uint8* src_u, 1149ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian const uint8* src_v, 1150ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian uint8* dst_yuy2, int width) { 1151ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 1152ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 1153ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 1154da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld2 {v0.8b, v1.8b}, [%0], #16 \n" // load 16 Ys 1155da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "orr v2.8b, v1.8b, v1.8b \n" 1156ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 1157da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v1.8b}, [%1], #8 \n" // load 8 Us 1158ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(2) 1159da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v3.8b}, [%2], #8 \n" // load 8 Vs 1160da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w4, %w4, #16 \n" // 16 pixels 1161ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(3) 1162da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%3], #32 \n" // Store 16 pixels. 1163da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 1164ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_y), // %0 1165ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(src_u), // %1 1166ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(src_v), // %2 1167ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_yuy2), // %3 1168ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(width) // %4 1169ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 1170da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3" 1171ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 1172ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 1173ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1174ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid I422ToUYVYRow_NEON(const uint8* src_y, 1175ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian const uint8* src_u, 1176ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian const uint8* src_v, 1177ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian uint8* dst_uyvy, int width) { 1178ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 1179ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 1180ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 1181da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld2 {v1.8b,v2.8b}, [%0], #16 \n" // load 16 Ys 1182da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "orr v3.8b, v2.8b, v2.8b \n" 1183ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 1184da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v0.8b}, [%1], #8 \n" // load 8 Us 1185ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(2) 1186da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v2.8b}, [%2], #8 \n" // load 8 Vs 1187da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w4, %w4, #16 \n" // 16 pixels 1188ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(3) 1189da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%3], #32 \n" // Store 16 pixels. 1190da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 1191ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_y), // %0 1192ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(src_u), // %1 1193ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(src_v), // %2 1194ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_uyvy), // %3 1195ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(width) // %4 1196ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 1197da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3" 1198ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 1199ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 1200ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 12017bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb565, int width) { 1202ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 1203ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 1204ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 1205da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%0], #32 \n" // load 8 pixels 1206da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w2, %w2, #8 \n" // 8 processed per loop. 1207ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ARGBTORGB565 1208ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 1209da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.16b}, [%1], #16 \n" // store 8 pixels RGB565. 1210da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 1211ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_argb), // %0 1212ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_rgb565), // %1 12137bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %2 1214ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 1215da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v20", "v21", "v22", "v23" 1216da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ); 1217da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 1218da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 1219da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianvoid ARGBToRGB565DitherRow_NEON(const uint8* src_argb, uint8* dst_rgb, 1220da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const uint32 dither4, int width) { 1221da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian asm volatile ( 1222da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "dup v1.4s, %w2 \n" // dither4 1223da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "1: \n" 1224da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian MEMACCESS(1) 1225da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n" // load 8 pixels 1226da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w3, %w3, #8 \n" // 8 processed per loop. 1227da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqadd v20.8b, v20.8b, v1.8b \n" 1228da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqadd v21.8b, v21.8b, v1.8b \n" 1229da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqadd v22.8b, v22.8b, v1.8b \n" 1230da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ARGBTORGB565 1231da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian MEMACCESS(0) 1232da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.16b}, [%0], #16 \n" // store 8 pixels RGB565. 1233da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 1234da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "+r"(dst_rgb) // %0 1235da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "r"(src_argb), // %1 1236da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "r"(dither4), // %2 1237da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "r"(width) // %3 1238da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v20", "v21", "v22", "v23" 1239ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 1240ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 1241ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1242ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_argb1555, 12437bc9febe8749e98a3812a0dc4380ceae75c29450Johann int width) { 1244ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 1245ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 1246ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 1247da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%0], #32 \n" // load 8 pixels 1248da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w2, %w2, #8 \n" // 8 processed per loop. 1249ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ARGBTOARGB1555 1250ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 1251da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.16b}, [%1], #16 \n" // store 8 pixels ARGB1555. 1252da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 1253ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_argb), // %0 1254ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_argb1555), // %1 12557bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %2 1256ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 1257da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v20", "v21", "v22", "v23" 1258ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 1259ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 1260ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1261ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_argb4444, 12627bc9febe8749e98a3812a0dc4380ceae75c29450Johann int width) { 1263ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 1264da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v4.16b, #0x0f \n" // bits to clear with vbic. 1265ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 1266ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 1267da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%0], #32 \n" // load 8 pixels 1268da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w2, %w2, #8 \n" // 8 processed per loop. 1269ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ARGBTOARGB4444 1270ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 1271da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.16b}, [%1], #16 \n" // store 8 pixels ARGB4444. 1272da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 1273ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_argb), // %0 1274ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_argb4444), // %1 12757bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %2 1276ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 1277da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v4", "v20", "v21", "v22", "v23" 1278ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 1279ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 1280ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 12817bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int width) { 1282ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 1283da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v4.8b, #13 \n" // B * 0.1016 coefficient 1284da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v5.8b, #65 \n" // G * 0.5078 coefficient 1285da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v6.8b, #33 \n" // R * 0.2578 coefficient 1286da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v7.8b, #16 \n" // Add 16 constant 1287ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 1288ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 1289da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB pixels. 1290da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w2, %w2, #8 \n" // 8 processed per loop. 1291da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umull v3.8h, v0.8b, v4.8b \n" // B 1292da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umlal v3.8h, v1.8b, v5.8b \n" // G 1293da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umlal v3.8h, v2.8b, v6.8b \n" // R 1294da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqrshrun v0.8b, v3.8h, #7 \n" // 16 bit to 8 bit Y 1295da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqadd v0.8b, v0.8b, v7.8b \n" 1296ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 1297da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. 1298da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 1299ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_argb), // %0 1300ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_y), // %1 13017bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %2 1302ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 1303da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7" 1304ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 1305ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 1306ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 13077bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid ARGBExtractAlphaRow_NEON(const uint8* src_argb, uint8* dst_a, int width) { 13087bc9febe8749e98a3812a0dc4380ceae75c29450Johann asm volatile ( 13097bc9febe8749e98a3812a0dc4380ceae75c29450Johann "1: \n" 13107bc9febe8749e98a3812a0dc4380ceae75c29450Johann MEMACCESS(0) 13117bc9febe8749e98a3812a0dc4380ceae75c29450Johann "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load row 16 pixels 13127bc9febe8749e98a3812a0dc4380ceae75c29450Johann "subs %w2, %w2, #16 \n" // 16 processed per loop 13137bc9febe8749e98a3812a0dc4380ceae75c29450Johann MEMACCESS(1) 13147bc9febe8749e98a3812a0dc4380ceae75c29450Johann "st1 {v3.16b}, [%1], #16 \n" // store 16 A's. 13157bc9febe8749e98a3812a0dc4380ceae75c29450Johann "b.gt 1b \n" 13167bc9febe8749e98a3812a0dc4380ceae75c29450Johann : "+r"(src_argb), // %0 13177bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(dst_a), // %1 13187bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %2 13197bc9febe8749e98a3812a0dc4380ceae75c29450Johann : 13207bc9febe8749e98a3812a0dc4380ceae75c29450Johann : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List 13217bc9febe8749e98a3812a0dc4380ceae75c29450Johann ); 13227bc9febe8749e98a3812a0dc4380ceae75c29450Johann} 13237bc9febe8749e98a3812a0dc4380ceae75c29450Johann 13247bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int width) { 1325ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 1326da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v4.8b, #15 \n" // B * 0.11400 coefficient 1327da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v5.8b, #75 \n" // G * 0.58700 coefficient 1328da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v6.8b, #38 \n" // R * 0.29900 coefficient 1329ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 1330ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 1331da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB pixels. 1332da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w2, %w2, #8 \n" // 8 processed per loop. 1333da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umull v3.8h, v0.8b, v4.8b \n" // B 1334da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umlal v3.8h, v1.8b, v5.8b \n" // G 1335da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umlal v3.8h, v2.8b, v6.8b \n" // R 1336da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqrshrun v0.8b, v3.8h, #7 \n" // 15 bit to 8 bit Y 1337ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 1338da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. 1339da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 1340ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_argb), // %0 1341ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_y), // %1 13427bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %2 1343ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 1344da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6" 1345ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 1346ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 1347ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1348ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// 8x1 pixels. 1349ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, 13507bc9febe8749e98a3812a0dc4380ceae75c29450Johann int width) { 1351ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 1352da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v24.8b, #112 \n" // UB / VR 0.875 coefficient 1353da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v25.8b, #74 \n" // UG -0.5781 coefficient 1354da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v26.8b, #38 \n" // UR -0.2969 coefficient 1355da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v27.8b, #18 \n" // VB -0.1406 coefficient 1356da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v28.8b, #94 \n" // VG -0.7344 coefficient 1357da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v29.16b,#0x80 \n" // 128.5 1358ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 1359ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 1360da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB pixels. 1361da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w3, %w3, #8 \n" // 8 processed per loop. 1362da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umull v4.8h, v0.8b, v24.8b \n" // B 1363da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umlsl v4.8h, v1.8b, v25.8b \n" // G 1364da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umlsl v4.8h, v2.8b, v26.8b \n" // R 1365da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "add v4.8h, v4.8h, v29.8h \n" // +128 -> unsigned 1366ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1367da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umull v3.8h, v2.8b, v24.8b \n" // R 1368da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umlsl v3.8h, v1.8b, v28.8b \n" // G 1369da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umlsl v3.8h, v0.8b, v27.8b \n" // B 1370da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "add v3.8h, v3.8h, v29.8h \n" // +128 -> unsigned 1371ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1372da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqshrn v0.8b, v4.8h, #8 \n" // 16 bit to 8 bit U 1373da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqshrn v1.8b, v3.8h, #8 \n" // 16 bit to 8 bit V 1374ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1375ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 1376da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels U. 1377ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(2) 1378da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v1.8b}, [%2], #8 \n" // store 8 pixels V. 1379da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 1380ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_argb), // %0 1381ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_u), // %1 1382ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_v), // %2 13837bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %3 1384ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 1385da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", 1386da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "v24", "v25", "v26", "v27", "v28", "v29" 1387ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 1388ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 1389ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 13907bc9febe8749e98a3812a0dc4380ceae75c29450Johann#define RGBTOUV_SETUP_REG \ 13917bc9febe8749e98a3812a0dc4380ceae75c29450Johann "movi v20.8h, #56, lsl #0 \n" /* UB/VR coefficient (0.875) / 2 */ \ 13927bc9febe8749e98a3812a0dc4380ceae75c29450Johann "movi v21.8h, #37, lsl #0 \n" /* UG coefficient (-0.5781) / 2 */ \ 13937bc9febe8749e98a3812a0dc4380ceae75c29450Johann "movi v22.8h, #19, lsl #0 \n" /* UR coefficient (-0.2969) / 2 */ \ 13947bc9febe8749e98a3812a0dc4380ceae75c29450Johann "movi v23.8h, #9, lsl #0 \n" /* VB coefficient (-0.1406) / 2 */ \ 13957bc9febe8749e98a3812a0dc4380ceae75c29450Johann "movi v24.8h, #47, lsl #0 \n" /* VG coefficient (-0.7344) / 2 */ \ 13967bc9febe8749e98a3812a0dc4380ceae75c29450Johann "movi v25.16b, #0x80 \n" /* 128.5 (0x8080 in 16-bit) */ 1397ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 13987bc9febe8749e98a3812a0dc4380ceae75c29450Johann// 32x1 pixels -> 8x1. width is number of argb pixels. e.g. 32. 1399ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, 14007bc9febe8749e98a3812a0dc4380ceae75c29450Johann int width) { 1401ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 1402da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian RGBTOUV_SETUP_REG 1403da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "1: \n" 1404da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian MEMACCESS(0) 1405da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels. 1406da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts. 1407da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts. 1408da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts. 1409da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian MEMACCESS(0) 1410da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%0], #64 \n" // load next 16. 1411da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v4.8h, v4.16b \n" // B 16 bytes -> 8 shorts. 1412da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v5.8h, v5.16b \n" // G 16 bytes -> 8 shorts. 1413da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v6.8h, v6.16b \n" // R 16 bytes -> 8 shorts. 1414da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 1415da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "addp v0.8h, v0.8h, v4.8h \n" // B 16 shorts -> 8 shorts. 1416da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "addp v1.8h, v1.8h, v5.8h \n" // G 16 shorts -> 8 shorts. 1417da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "addp v2.8h, v2.8h, v6.8h \n" // R 16 shorts -> 8 shorts. 1418da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 1419da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "urshr v0.8h, v0.8h, #1 \n" // 2x average 1420da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "urshr v1.8h, v1.8h, #1 \n" 1421da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "urshr v2.8h, v2.8h, #1 \n" 1422da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 1423da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w3, %w3, #32 \n" // 32 processed per loop. 1424da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mul v3.8h, v0.8h, v20.8h \n" // B 1425da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mls v3.8h, v1.8h, v21.8h \n" // G 1426da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mls v3.8h, v2.8h, v22.8h \n" // R 1427da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "add v3.8h, v3.8h, v25.8h \n" // +128 -> unsigned 1428da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mul v4.8h, v2.8h, v20.8h \n" // R 1429da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mls v4.8h, v1.8h, v24.8h \n" // G 1430da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mls v4.8h, v0.8h, v23.8h \n" // B 1431da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "add v4.8h, v4.8h, v25.8h \n" // +128 -> unsigned 1432da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqshrn v0.8b, v3.8h, #8 \n" // 16 bit to 8 bit U 1433da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqshrn v1.8b, v4.8h, #8 \n" // 16 bit to 8 bit V 1434da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian MEMACCESS(1) 1435da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels U. 1436ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(2) 1437da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v1.8b}, [%2], #8 \n" // store 8 pixels V. 1438da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 1439ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_argb), // %0 1440ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_u), // %1 1441ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_v), // %2 14427bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %3 1443ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 1444da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", 1445da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "v20", "v21", "v22", "v23", "v24", "v25" 1446ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 1447ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 1448ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 14497bc9febe8749e98a3812a0dc4380ceae75c29450Johann// 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16. 1450ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#define RGBTOUV(QB, QG, QR) \ 1451da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mul v3.8h, " #QB ",v20.8h \n" /* B */ \ 1452da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mul v4.8h, " #QR ",v20.8h \n" /* R */ \ 1453da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mls v3.8h, " #QG ",v21.8h \n" /* G */ \ 1454da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mls v4.8h, " #QG ",v24.8h \n" /* G */ \ 1455da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mls v3.8h, " #QR ",v22.8h \n" /* R */ \ 1456da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mls v4.8h, " #QB ",v23.8h \n" /* B */ \ 1457da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "add v3.8h, v3.8h, v25.8h \n" /* +128 -> unsigned */ \ 1458da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "add v4.8h, v4.8h, v25.8h \n" /* +128 -> unsigned */ \ 1459da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqshrn v0.8b, v3.8h, #8 \n" /* 16 bit to 8 bit U */ \ 1460da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqshrn v1.8b, v4.8h, #8 \n" /* 16 bit to 8 bit V */ 1461ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1462ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// TODO(fbarchard): Consider vhadd vertical, then vpaddl horizontal, avoid shr. 1463da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian// TODO(fbarchard): consider ptrdiff_t for all strides. 1464da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 1465ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb, 14667bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8* dst_u, uint8* dst_v, int width) { 1467da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const uint8* src_argb_1 = src_argb + src_stride_argb; 1468ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 1469da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian RGBTOUV_SETUP_REG 1470ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 1471ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 1472da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels. 1473da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts. 1474da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts. 1475da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts. 1476da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 1477ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 1478da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load next 16 1479da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uadalp v0.8h, v4.16b \n" // B 16 bytes -> 8 shorts. 1480da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uadalp v1.8h, v5.16b \n" // G 16 bytes -> 8 shorts. 1481da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uadalp v2.8h, v6.16b \n" // R 16 bytes -> 8 shorts. 1482ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1483da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "urshr v0.8h, v0.8h, #1 \n" // 2x average 1484da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "urshr v1.8h, v1.8h, #1 \n" 1485da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "urshr v2.8h, v2.8h, #1 \n" 1486ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1487da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w4, %w4, #16 \n" // 32 processed per loop. 1488da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian RGBTOUV(v0.8h, v1.8h, v2.8h) 1489ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(2) 1490da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U. 1491ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(3) 1492da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V. 1493da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 1494ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_argb), // %0 1495da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "+r"(src_argb_1), // %1 1496ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_u), // %2 1497ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_v), // %3 14987bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %4 1499ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 1500da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", 1501da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "v20", "v21", "v22", "v23", "v24", "v25" 1502ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 1503ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 1504ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1505ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// TODO(fbarchard): Subsample match C code. 1506ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb, 15077bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8* dst_u, uint8* dst_v, int width) { 1508da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const uint8* src_argb_1 = src_argb + src_stride_argb; 1509ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 1510da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v20.8h, #63, lsl #0 \n" // UB/VR coeff (0.500) / 2 1511da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v21.8h, #42, lsl #0 \n" // UG coeff (-0.33126) / 2 1512da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v22.8h, #21, lsl #0 \n" // UR coeff (-0.16874) / 2 1513da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v23.8h, #10, lsl #0 \n" // VB coeff (-0.08131) / 2 1514da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v24.8h, #53, lsl #0 \n" // VG coeff (-0.41869) / 2 1515da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v25.16b, #0x80 \n" // 128.5 (0x8080 in 16-bit) 1516ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 1517ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 1518da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels. 1519da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts. 1520da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts. 1521da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts. 1522ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 1523da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load next 16 1524da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uadalp v0.8h, v4.16b \n" // B 16 bytes -> 8 shorts. 1525da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uadalp v1.8h, v5.16b \n" // G 16 bytes -> 8 shorts. 1526da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uadalp v2.8h, v6.16b \n" // R 16 bytes -> 8 shorts. 1527ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1528da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "urshr v0.8h, v0.8h, #1 \n" // 2x average 1529da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "urshr v1.8h, v1.8h, #1 \n" 1530da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "urshr v2.8h, v2.8h, #1 \n" 1531ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1532da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w4, %w4, #16 \n" // 32 processed per loop. 1533da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian RGBTOUV(v0.8h, v1.8h, v2.8h) 1534ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(2) 1535da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U. 1536ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(3) 1537da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V. 1538da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 1539ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_argb), // %0 1540da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "+r"(src_argb_1), // %1 1541ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_u), // %2 1542ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_v), // %3 15437bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %4 1544ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 1545da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", 1546da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "v20", "v21", "v22", "v23", "v24", "v25" 1547ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 1548ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 1549ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1550ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra, 15517bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8* dst_u, uint8* dst_v, int width) { 1552da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const uint8* src_bgra_1 = src_bgra + src_stride_bgra; 1553ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 1554da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian RGBTOUV_SETUP_REG 1555ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 1556ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 1557da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels. 1558da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v0.8h, v3.16b \n" // B 16 bytes -> 8 shorts. 1559da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v3.8h, v2.16b \n" // G 16 bytes -> 8 shorts. 1560da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v2.8h, v1.16b \n" // R 16 bytes -> 8 shorts. 1561ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 1562da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load 16 more 1563da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uadalp v0.8h, v7.16b \n" // B 16 bytes -> 8 shorts. 1564da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uadalp v3.8h, v6.16b \n" // G 16 bytes -> 8 shorts. 1565da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uadalp v2.8h, v5.16b \n" // R 16 bytes -> 8 shorts. 1566ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1567da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "urshr v0.8h, v0.8h, #1 \n" // 2x average 1568da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "urshr v1.8h, v3.8h, #1 \n" 1569da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "urshr v2.8h, v2.8h, #1 \n" 1570ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1571da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w4, %w4, #16 \n" // 32 processed per loop. 1572da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian RGBTOUV(v0.8h, v1.8h, v2.8h) 1573ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(2) 1574da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U. 1575ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(3) 1576da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V. 1577da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 1578ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_bgra), // %0 1579da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "+r"(src_bgra_1), // %1 1580ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_u), // %2 1581ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_v), // %3 15827bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %4 1583ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 1584da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", 1585da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "v20", "v21", "v22", "v23", "v24", "v25" 1586ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 1587ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 1588ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1589ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr, 15907bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8* dst_u, uint8* dst_v, int width) { 1591da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const uint8* src_abgr_1 = src_abgr + src_stride_abgr; 1592ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 1593da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian RGBTOUV_SETUP_REG 1594ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 1595ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 1596da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels. 1597da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v3.8h, v2.16b \n" // B 16 bytes -> 8 shorts. 1598da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v2.8h, v1.16b \n" // G 16 bytes -> 8 shorts. 1599da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v1.8h, v0.16b \n" // R 16 bytes -> 8 shorts. 1600ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 1601da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load 16 more. 1602da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uadalp v3.8h, v6.16b \n" // B 16 bytes -> 8 shorts. 1603da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uadalp v2.8h, v5.16b \n" // G 16 bytes -> 8 shorts. 1604da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uadalp v1.8h, v4.16b \n" // R 16 bytes -> 8 shorts. 1605ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1606da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "urshr v0.8h, v3.8h, #1 \n" // 2x average 1607da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "urshr v2.8h, v2.8h, #1 \n" 1608da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "urshr v1.8h, v1.8h, #1 \n" 1609ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1610da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w4, %w4, #16 \n" // 32 processed per loop. 1611da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian RGBTOUV(v0.8h, v2.8h, v1.8h) 1612ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(2) 1613da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U. 1614ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(3) 1615da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V. 1616da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 1617ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_abgr), // %0 1618da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "+r"(src_abgr_1), // %1 1619ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_u), // %2 1620ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_v), // %3 16217bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %4 1622ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 1623da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", 1624da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "v20", "v21", "v22", "v23", "v24", "v25" 1625ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 1626ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 1627ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1628ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba, 16297bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8* dst_u, uint8* dst_v, int width) { 1630da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const uint8* src_rgba_1 = src_rgba + src_stride_rgba; 1631ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 1632da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian RGBTOUV_SETUP_REG 1633ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 1634ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 1635da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels. 1636da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v0.8h, v1.16b \n" // B 16 bytes -> 8 shorts. 1637da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v1.8h, v2.16b \n" // G 16 bytes -> 8 shorts. 1638da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v2.8h, v3.16b \n" // R 16 bytes -> 8 shorts. 1639ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 1640da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load 16 more. 1641da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uadalp v0.8h, v5.16b \n" // B 16 bytes -> 8 shorts. 1642da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uadalp v1.8h, v6.16b \n" // G 16 bytes -> 8 shorts. 1643da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uadalp v2.8h, v7.16b \n" // R 16 bytes -> 8 shorts. 1644ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1645da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "urshr v0.8h, v0.8h, #1 \n" // 2x average 1646da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "urshr v1.8h, v1.8h, #1 \n" 1647da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "urshr v2.8h, v2.8h, #1 \n" 1648ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1649da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w4, %w4, #16 \n" // 32 processed per loop. 1650da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian RGBTOUV(v0.8h, v1.8h, v2.8h) 1651ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(2) 1652da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U. 1653ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(3) 1654da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V. 1655da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 1656ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_rgba), // %0 1657da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "+r"(src_rgba_1), // %1 1658ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_u), // %2 1659ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_v), // %3 16607bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %4 1661ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 1662da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", 1663da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "v20", "v21", "v22", "v23", "v24", "v25" 1664ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 1665ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 1666ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1667ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24, 16687bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8* dst_u, uint8* dst_v, int width) { 1669da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const uint8* src_rgb24_1 = src_rgb24 + src_stride_rgb24; 1670ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 1671da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian RGBTOUV_SETUP_REG 1672ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 1673ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 1674da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld3 {v0.16b,v1.16b,v2.16b}, [%0], #48 \n" // load 16 pixels. 1675da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts. 1676da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts. 1677da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts. 1678ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 1679da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld3 {v4.16b,v5.16b,v6.16b}, [%1], #48 \n" // load 16 more. 1680da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uadalp v0.8h, v4.16b \n" // B 16 bytes -> 8 shorts. 1681da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uadalp v1.8h, v5.16b \n" // G 16 bytes -> 8 shorts. 1682da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uadalp v2.8h, v6.16b \n" // R 16 bytes -> 8 shorts. 1683ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1684da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "urshr v0.8h, v0.8h, #1 \n" // 2x average 1685da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "urshr v1.8h, v1.8h, #1 \n" 1686da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "urshr v2.8h, v2.8h, #1 \n" 1687ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1688da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w4, %w4, #16 \n" // 32 processed per loop. 1689da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian RGBTOUV(v0.8h, v1.8h, v2.8h) 1690ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(2) 1691da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U. 1692ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(3) 1693da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V. 1694da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 1695ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_rgb24), // %0 1696da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "+r"(src_rgb24_1), // %1 1697ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_u), // %2 1698ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_v), // %3 16997bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %4 1700ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 1701da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", 1702da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "v20", "v21", "v22", "v23", "v24", "v25" 1703ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 1704ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 1705ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1706ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw, 17077bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8* dst_u, uint8* dst_v, int width) { 1708da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const uint8* src_raw_1 = src_raw + src_stride_raw; 1709ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 1710da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian RGBTOUV_SETUP_REG 1711ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 1712ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 1713da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld3 {v0.16b,v1.16b,v2.16b}, [%0], #48 \n" // load 8 RAW pixels. 1714da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v2.8h, v2.16b \n" // B 16 bytes -> 8 shorts. 1715da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts. 1716da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v0.8h, v0.16b \n" // R 16 bytes -> 8 shorts. 1717ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 1718da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld3 {v4.16b,v5.16b,v6.16b}, [%1], #48 \n" // load 8 more RAW pixels 1719da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uadalp v2.8h, v6.16b \n" // B 16 bytes -> 8 shorts. 1720da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uadalp v1.8h, v5.16b \n" // G 16 bytes -> 8 shorts. 1721da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uadalp v0.8h, v4.16b \n" // R 16 bytes -> 8 shorts. 1722ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1723da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "urshr v2.8h, v2.8h, #1 \n" // 2x average 1724da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "urshr v1.8h, v1.8h, #1 \n" 1725da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "urshr v0.8h, v0.8h, #1 \n" 1726ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1727da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w4, %w4, #16 \n" // 32 processed per loop. 1728da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian RGBTOUV(v2.8h, v1.8h, v0.8h) 1729ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(2) 1730da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U. 1731ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(3) 1732da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V. 1733da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 1734ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_raw), // %0 1735da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "+r"(src_raw_1), // %1 1736ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_u), // %2 1737ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_v), // %3 17387bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %4 1739ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 1740da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", 1741da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "v20", "v21", "v22", "v23", "v24", "v25" 1742ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 1743ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 1744ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 17457bc9febe8749e98a3812a0dc4380ceae75c29450Johann// 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16. 1746ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565, 17477bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8* dst_u, uint8* dst_v, int width) { 1748da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const uint8* src_rgb565_1 = src_rgb565 + src_stride_rgb565; 1749ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 1750da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v22.8h, #56, lsl #0 \n" // UB / VR coeff (0.875) / 2 1751da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v23.8h, #37, lsl #0 \n" // UG coeff (-0.5781) / 2 1752da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v24.8h, #19, lsl #0 \n" // UR coeff (-0.2969) / 2 1753da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v25.8h, #9 , lsl #0 \n" // VB coeff (-0.1406) / 2 1754da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v26.8h, #47, lsl #0 \n" // VG coeff (-0.7344) / 2 1755da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v27.16b, #0x80 \n" // 128.5 (0x8080 in 16-bit) 1756ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 1757ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 1758da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v0.16b}, [%0], #16 \n" // load 8 RGB565 pixels. 1759ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian RGB565TOARGB 1760da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v16.4h, v0.8b \n" // B 8 bytes -> 4 shorts. 1761da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v18.4h, v1.8b \n" // G 8 bytes -> 4 shorts. 1762da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v20.4h, v2.8b \n" // R 8 bytes -> 4 shorts. 1763ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 1764da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v0.16b}, [%0], #16 \n" // next 8 RGB565 pixels. 1765ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian RGB565TOARGB 1766da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v17.4h, v0.8b \n" // B 8 bytes -> 4 shorts. 1767da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v19.4h, v1.8b \n" // G 8 bytes -> 4 shorts. 1768da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v21.4h, v2.8b \n" // R 8 bytes -> 4 shorts. 1769ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1770ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 1771da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v0.16b}, [%1], #16 \n" // load 8 RGB565 pixels. 1772ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian RGB565TOARGB 1773da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uadalp v16.4h, v0.8b \n" // B 8 bytes -> 4 shorts. 1774da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uadalp v18.4h, v1.8b \n" // G 8 bytes -> 4 shorts. 1775da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uadalp v20.4h, v2.8b \n" // R 8 bytes -> 4 shorts. 1776ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 1777da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v0.16b}, [%1], #16 \n" // next 8 RGB565 pixels. 1778ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian RGB565TOARGB 1779da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uadalp v17.4h, v0.8b \n" // B 8 bytes -> 4 shorts. 1780da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uadalp v19.4h, v1.8b \n" // G 8 bytes -> 4 shorts. 1781da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uadalp v21.4h, v2.8b \n" // R 8 bytes -> 4 shorts. 1782da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 1783da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ins v16.D[1], v17.D[0] \n" 1784da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ins v18.D[1], v19.D[0] \n" 1785da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ins v20.D[1], v21.D[0] \n" 1786da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 1787da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "urshr v4.8h, v16.8h, #1 \n" // 2x average 1788da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "urshr v5.8h, v18.8h, #1 \n" 1789da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "urshr v6.8h, v20.8h, #1 \n" 1790da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 1791da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w4, %w4, #16 \n" // 16 processed per loop. 1792da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mul v16.8h, v4.8h, v22.8h \n" // B 1793da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mls v16.8h, v5.8h, v23.8h \n" // G 1794da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mls v16.8h, v6.8h, v24.8h \n" // R 1795da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "add v16.8h, v16.8h, v27.8h \n" // +128 -> unsigned 1796da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mul v17.8h, v6.8h, v22.8h \n" // R 1797da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mls v17.8h, v5.8h, v26.8h \n" // G 1798da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mls v17.8h, v4.8h, v25.8h \n" // B 1799da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "add v17.8h, v17.8h, v27.8h \n" // +128 -> unsigned 1800da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqshrn v0.8b, v16.8h, #8 \n" // 16 bit to 8 bit U 1801da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqshrn v1.8b, v17.8h, #8 \n" // 16 bit to 8 bit V 1802ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(2) 1803da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U. 1804ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(3) 1805da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V. 1806da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 1807ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_rgb565), // %0 1808da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "+r"(src_rgb565_1), // %1 1809ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_u), // %2 1810ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_v), // %3 18117bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %4 1812ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 1813da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", 1814da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", 1815da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "v25", "v26", "v27" 1816ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 1817ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 1818ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 18197bc9febe8749e98a3812a0dc4380ceae75c29450Johann// 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16. 1820ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555, 18217bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8* dst_u, uint8* dst_v, int width) { 1822da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const uint8* src_argb1555_1 = src_argb1555 + src_stride_argb1555; 1823ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 1824da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian RGBTOUV_SETUP_REG 1825ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 1826ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 1827da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v0.16b}, [%0], #16 \n" // load 8 ARGB1555 pixels. 1828ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian RGB555TOARGB 1829da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v16.4h, v0.8b \n" // B 8 bytes -> 4 shorts. 1830da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v17.4h, v1.8b \n" // G 8 bytes -> 4 shorts. 1831da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v18.4h, v2.8b \n" // R 8 bytes -> 4 shorts. 1832ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 1833da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v0.16b}, [%0], #16 \n" // next 8 ARGB1555 pixels. 1834ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian RGB555TOARGB 1835da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v26.4h, v0.8b \n" // B 8 bytes -> 4 shorts. 1836da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v27.4h, v1.8b \n" // G 8 bytes -> 4 shorts. 1837da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v28.4h, v2.8b \n" // R 8 bytes -> 4 shorts. 1838ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1839ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 1840da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v0.16b}, [%1], #16 \n" // load 8 ARGB1555 pixels. 1841ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian RGB555TOARGB 1842da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uadalp v16.4h, v0.8b \n" // B 8 bytes -> 4 shorts. 1843da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uadalp v17.4h, v1.8b \n" // G 8 bytes -> 4 shorts. 1844da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uadalp v18.4h, v2.8b \n" // R 8 bytes -> 4 shorts. 1845ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 1846da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v0.16b}, [%1], #16 \n" // next 8 ARGB1555 pixels. 1847ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian RGB555TOARGB 1848da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uadalp v26.4h, v0.8b \n" // B 8 bytes -> 4 shorts. 1849da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uadalp v27.4h, v1.8b \n" // G 8 bytes -> 4 shorts. 1850da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uadalp v28.4h, v2.8b \n" // R 8 bytes -> 4 shorts. 1851da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 1852da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ins v16.D[1], v26.D[0] \n" 1853da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ins v17.D[1], v27.D[0] \n" 1854da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ins v18.D[1], v28.D[0] \n" 1855da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 1856da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "urshr v4.8h, v16.8h, #1 \n" // 2x average 1857da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "urshr v5.8h, v17.8h, #1 \n" 1858da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "urshr v6.8h, v18.8h, #1 \n" 1859da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 1860da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w4, %w4, #16 \n" // 16 processed per loop. 1861da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mul v2.8h, v4.8h, v20.8h \n" // B 1862da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mls v2.8h, v5.8h, v21.8h \n" // G 1863da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mls v2.8h, v6.8h, v22.8h \n" // R 1864da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "add v2.8h, v2.8h, v25.8h \n" // +128 -> unsigned 1865da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mul v3.8h, v6.8h, v20.8h \n" // R 1866da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mls v3.8h, v5.8h, v24.8h \n" // G 1867da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mls v3.8h, v4.8h, v23.8h \n" // B 1868da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "add v3.8h, v3.8h, v25.8h \n" // +128 -> unsigned 1869da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqshrn v0.8b, v2.8h, #8 \n" // 16 bit to 8 bit U 1870da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqshrn v1.8b, v3.8h, #8 \n" // 16 bit to 8 bit V 1871ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(2) 1872da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U. 1873ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(3) 1874da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V. 1875da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 1876ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_argb1555), // %0 1877da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "+r"(src_argb1555_1), // %1 1878ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_u), // %2 1879ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_v), // %3 18807bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %4 1881ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 1882da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", 1883da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", 1884da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "v26", "v27", "v28" 1885ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 1886ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 1887ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 18887bc9febe8749e98a3812a0dc4380ceae75c29450Johann// 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16. 1889ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444, 18907bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8* dst_u, uint8* dst_v, int width) { 1891da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const uint8* src_argb4444_1 = src_argb4444 + src_stride_argb4444; 1892ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 1893da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian RGBTOUV_SETUP_REG 1894ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 1895ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 1896da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v0.16b}, [%0], #16 \n" // load 8 ARGB4444 pixels. 1897ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ARGB4444TOARGB 1898da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v16.4h, v0.8b \n" // B 8 bytes -> 4 shorts. 1899da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v17.4h, v1.8b \n" // G 8 bytes -> 4 shorts. 1900da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v18.4h, v2.8b \n" // R 8 bytes -> 4 shorts. 1901ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 1902da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v0.16b}, [%0], #16 \n" // next 8 ARGB4444 pixels. 1903ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ARGB4444TOARGB 1904da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v26.4h, v0.8b \n" // B 8 bytes -> 4 shorts. 1905da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v27.4h, v1.8b \n" // G 8 bytes -> 4 shorts. 1906da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uaddlp v28.4h, v2.8b \n" // R 8 bytes -> 4 shorts. 1907ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1908ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 1909da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v0.16b}, [%1], #16 \n" // load 8 ARGB4444 pixels. 1910ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ARGB4444TOARGB 1911da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uadalp v16.4h, v0.8b \n" // B 8 bytes -> 4 shorts. 1912da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uadalp v17.4h, v1.8b \n" // G 8 bytes -> 4 shorts. 1913da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uadalp v18.4h, v2.8b \n" // R 8 bytes -> 4 shorts. 1914ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 1915da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v0.16b}, [%1], #16 \n" // next 8 ARGB4444 pixels. 1916ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ARGB4444TOARGB 1917da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uadalp v26.4h, v0.8b \n" // B 8 bytes -> 4 shorts. 1918da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uadalp v27.4h, v1.8b \n" // G 8 bytes -> 4 shorts. 1919da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uadalp v28.4h, v2.8b \n" // R 8 bytes -> 4 shorts. 1920da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 1921da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ins v16.D[1], v26.D[0] \n" 1922da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ins v17.D[1], v27.D[0] \n" 1923da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ins v18.D[1], v28.D[0] \n" 1924da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 1925da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "urshr v4.8h, v16.8h, #1 \n" // 2x average 1926da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "urshr v5.8h, v17.8h, #1 \n" 1927da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "urshr v6.8h, v18.8h, #1 \n" 1928da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 1929da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w4, %w4, #16 \n" // 16 processed per loop. 1930da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mul v2.8h, v4.8h, v20.8h \n" // B 1931da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mls v2.8h, v5.8h, v21.8h \n" // G 1932da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mls v2.8h, v6.8h, v22.8h \n" // R 1933da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "add v2.8h, v2.8h, v25.8h \n" // +128 -> unsigned 1934da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mul v3.8h, v6.8h, v20.8h \n" // R 1935da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mls v3.8h, v5.8h, v24.8h \n" // G 1936da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mls v3.8h, v4.8h, v23.8h \n" // B 1937da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "add v3.8h, v3.8h, v25.8h \n" // +128 -> unsigned 1938da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqshrn v0.8b, v2.8h, #8 \n" // 16 bit to 8 bit U 1939da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqshrn v1.8b, v3.8h, #8 \n" // 16 bit to 8 bit V 1940ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(2) 1941da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U. 1942ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(3) 1943da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V. 1944da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 1945ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_argb4444), // %0 1946da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "+r"(src_argb4444_1), // %1 1947ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_u), // %2 1948ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_v), // %3 19497bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %4 1950ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 1951da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", 1952da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", 1953da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "v26", "v27", "v28" 1954da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 1955ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 1956ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 1957ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 19587bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int width) { 1959ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 1960da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v24.8b, #13 \n" // B * 0.1016 coefficient 1961da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v25.8b, #65 \n" // G * 0.5078 coefficient 1962da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v26.8b, #33 \n" // R * 0.2578 coefficient 1963da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v27.8b, #16 \n" // Add 16 constant 1964ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 1965ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 1966da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v0.16b}, [%0], #16 \n" // load 8 RGB565 pixels. 1967da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w2, %w2, #8 \n" // 8 processed per loop. 1968ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian RGB565TOARGB 1969da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umull v3.8h, v0.8b, v24.8b \n" // B 1970da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umlal v3.8h, v1.8b, v25.8b \n" // G 1971da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umlal v3.8h, v2.8b, v26.8b \n" // R 1972da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqrshrun v0.8b, v3.8h, #7 \n" // 16 bit to 8 bit Y 1973da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqadd v0.8b, v0.8b, v27.8b \n" 1974da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian MEMACCESS(1) 1975da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. 1976da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 1977ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_rgb565), // %0 1978ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_y), // %1 19797bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %2 1980ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 1981da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v6", 1982da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "v24", "v25", "v26", "v27" 1983ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 1984ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 1985ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 19867bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int width) { 1987ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 1988da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v4.8b, #13 \n" // B * 0.1016 coefficient 1989da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v5.8b, #65 \n" // G * 0.5078 coefficient 1990da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v6.8b, #33 \n" // R * 0.2578 coefficient 1991da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v7.8b, #16 \n" // Add 16 constant 1992ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 1993ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 1994da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v0.16b}, [%0], #16 \n" // load 8 ARGB1555 pixels. 1995da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w2, %w2, #8 \n" // 8 processed per loop. 1996ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ARGB1555TOARGB 1997da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umull v3.8h, v0.8b, v4.8b \n" // B 1998da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umlal v3.8h, v1.8b, v5.8b \n" // G 1999da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umlal v3.8h, v2.8b, v6.8b \n" // R 2000da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqrshrun v0.8b, v3.8h, #7 \n" // 16 bit to 8 bit Y 2001da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqadd v0.8b, v0.8b, v7.8b \n" 2002da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian MEMACCESS(1) 2003da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. 2004da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 2005ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_argb1555), // %0 2006ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_y), // %1 20077bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %2 2008ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 2009da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7" 2010ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 2011ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 2012ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 20137bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int width) { 2014ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 2015da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v24.8b, #13 \n" // B * 0.1016 coefficient 2016da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v25.8b, #65 \n" // G * 0.5078 coefficient 2017da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v26.8b, #33 \n" // R * 0.2578 coefficient 2018da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v27.8b, #16 \n" // Add 16 constant 2019ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 2020ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 2021da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v0.16b}, [%0], #16 \n" // load 8 ARGB4444 pixels. 2022da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w2, %w2, #8 \n" // 8 processed per loop. 2023ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ARGB4444TOARGB 2024da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umull v3.8h, v0.8b, v24.8b \n" // B 2025da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umlal v3.8h, v1.8b, v25.8b \n" // G 2026da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umlal v3.8h, v2.8b, v26.8b \n" // R 2027da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqrshrun v0.8b, v3.8h, #7 \n" // 16 bit to 8 bit Y 2028da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqadd v0.8b, v0.8b, v27.8b \n" 2029da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian MEMACCESS(1) 2030da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. 2031da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 2032ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_argb4444), // %0 2033ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_y), // %1 20347bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %2 2035ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 2036da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v24", "v25", "v26", "v27" 2037ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 2038ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 2039ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 20407bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int width) { 2041ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 2042da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v4.8b, #33 \n" // R * 0.2578 coefficient 2043da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v5.8b, #65 \n" // G * 0.5078 coefficient 2044da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v6.8b, #13 \n" // B * 0.1016 coefficient 2045da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v7.8b, #16 \n" // Add 16 constant 2046ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 2047ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 2048da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 pixels. 2049da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w2, %w2, #8 \n" // 8 processed per loop. 2050da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umull v16.8h, v1.8b, v4.8b \n" // R 2051da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umlal v16.8h, v2.8b, v5.8b \n" // G 2052da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umlal v16.8h, v3.8b, v6.8b \n" // B 2053da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqrshrun v0.8b, v16.8h, #7 \n" // 16 bit to 8 bit Y 2054da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqadd v0.8b, v0.8b, v7.8b \n" 2055ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 2056da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. 2057da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 2058ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_bgra), // %0 2059ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_y), // %1 20607bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %2 2061ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 2062da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16" 2063ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 2064ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 2065ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 20667bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int width) { 2067ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 2068da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v4.8b, #33 \n" // R * 0.2578 coefficient 2069da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v5.8b, #65 \n" // G * 0.5078 coefficient 2070da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v6.8b, #13 \n" // B * 0.1016 coefficient 2071da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v7.8b, #16 \n" // Add 16 constant 2072ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 2073ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 2074da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 pixels. 2075da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w2, %w2, #8 \n" // 8 processed per loop. 2076da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umull v16.8h, v0.8b, v4.8b \n" // R 2077da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umlal v16.8h, v1.8b, v5.8b \n" // G 2078da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umlal v16.8h, v2.8b, v6.8b \n" // B 2079da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqrshrun v0.8b, v16.8h, #7 \n" // 16 bit to 8 bit Y 2080da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqadd v0.8b, v0.8b, v7.8b \n" 2081ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 2082da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. 2083da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 2084ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_abgr), // %0 2085da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "+r"(dst_y), // %1 20867bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %2 2087ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 2088da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16" 2089ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 2090ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 2091ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 20927bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int width) { 2093ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 2094da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v4.8b, #13 \n" // B * 0.1016 coefficient 2095da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v5.8b, #65 \n" // G * 0.5078 coefficient 2096da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v6.8b, #33 \n" // R * 0.2578 coefficient 2097da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v7.8b, #16 \n" // Add 16 constant 2098ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 2099ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 2100da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 pixels. 2101da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w2, %w2, #8 \n" // 8 processed per loop. 2102da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umull v16.8h, v1.8b, v4.8b \n" // B 2103da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umlal v16.8h, v2.8b, v5.8b \n" // G 2104da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umlal v16.8h, v3.8b, v6.8b \n" // R 2105da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqrshrun v0.8b, v16.8h, #7 \n" // 16 bit to 8 bit Y 2106da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqadd v0.8b, v0.8b, v7.8b \n" 2107ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 2108da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. 2109da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 2110ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_rgba), // %0 2111da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "+r"(dst_y), // %1 21127bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %2 2113ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 2114da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16" 2115ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 2116ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 2117ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 21187bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int width) { 2119ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 2120da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v4.8b, #13 \n" // B * 0.1016 coefficient 2121da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v5.8b, #65 \n" // G * 0.5078 coefficient 2122da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v6.8b, #33 \n" // R * 0.2578 coefficient 2123da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v7.8b, #16 \n" // Add 16 constant 2124ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 2125ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 2126da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // load 8 pixels. 2127da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w2, %w2, #8 \n" // 8 processed per loop. 2128da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umull v16.8h, v0.8b, v4.8b \n" // B 2129da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umlal v16.8h, v1.8b, v5.8b \n" // G 2130da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umlal v16.8h, v2.8b, v6.8b \n" // R 2131da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqrshrun v0.8b, v16.8h, #7 \n" // 16 bit to 8 bit Y 2132da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqadd v0.8b, v0.8b, v7.8b \n" 2133ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 2134da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. 2135da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 2136ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_rgb24), // %0 2137da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "+r"(dst_y), // %1 21387bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %2 2139ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 2140da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16" 2141ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 2142ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 2143ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 21447bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int width) { 2145ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 2146da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v4.8b, #33 \n" // R * 0.2578 coefficient 2147da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v5.8b, #65 \n" // G * 0.5078 coefficient 2148da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v6.8b, #13 \n" // B * 0.1016 coefficient 2149da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v7.8b, #16 \n" // Add 16 constant 2150ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 2151ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 2152da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // load 8 pixels. 2153da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w2, %w2, #8 \n" // 8 processed per loop. 2154da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umull v16.8h, v0.8b, v4.8b \n" // B 2155da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umlal v16.8h, v1.8b, v5.8b \n" // G 2156da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umlal v16.8h, v2.8b, v6.8b \n" // R 2157da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqrshrun v0.8b, v16.8h, #7 \n" // 16 bit to 8 bit Y 2158da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqadd v0.8b, v0.8b, v7.8b \n" 2159ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 2160da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. 2161da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 2162ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_raw), // %0 2163da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "+r"(dst_y), // %1 21647bc9febe8749e98a3812a0dc4380ceae75c29450Johann "+r"(width) // %2 2165ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 2166da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16" 2167ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 2168ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 2169ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 2170ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// Bilinear filter 16x2 -> 16x1 2171ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid InterpolateRow_NEON(uint8* dst_ptr, 2172ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian const uint8* src_ptr, ptrdiff_t src_stride, 2173ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian int dst_width, int source_y_fraction) { 2174da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int y1_fraction = source_y_fraction; 2175da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int y0_fraction = 256 - y1_fraction; 2176da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const uint8* src_ptr1 = src_ptr + src_stride; 2177da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian asm volatile ( 2178da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "cmp %w4, #0 \n" 2179da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.eq 100f \n" 2180da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "cmp %w4, #128 \n" 2181da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.eq 50f \n" 2182da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 2183da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "dup v5.16b, %w4 \n" 2184da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "dup v4.16b, %w5 \n" 2185ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian // General purpose row blend. 2186ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 2187ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 2188da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v0.16b}, [%1], #16 \n" 2189ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(2) 2190da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v1.16b}, [%2], #16 \n" 2191da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w3, %w3, #16 \n" 2192da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umull v2.8h, v0.8b, v4.8b \n" 2193da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umull2 v3.8h, v0.16b, v4.16b \n" 2194da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umlal v2.8h, v1.8b, v5.8b \n" 2195da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umlal2 v3.8h, v1.16b, v5.16b \n" 2196da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "rshrn v0.8b, v2.8h, #8 \n" 2197da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "rshrn2 v0.16b, v3.8h, #8 \n" 2198da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian MEMACCESS(0) 2199da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.16b}, [%0], #16 \n" 2200da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 2201ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "b 99f \n" 2202ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 2203ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian // Blend 50 / 50. 2204ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "50: \n" 2205ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 2206da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v0.16b}, [%1], #16 \n" 2207ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(2) 2208da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v1.16b}, [%2], #16 \n" 2209da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w3, %w3, #16 \n" 2210da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "urhadd v0.16b, v0.16b, v1.16b \n" 2211ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 2212da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.16b}, [%0], #16 \n" 2213da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 50b \n" 2214ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "b 99f \n" 2215ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 2216ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian // Blend 100 / 0 - Copy row unchanged. 2217ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "100: \n" 2218ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 2219da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v0.16b}, [%1], #16 \n" 2220da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w3, %w3, #16 \n" 2221ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 2222da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.16b}, [%0], #16 \n" 2223da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 100b \n" 2224ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 2225ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "99: \n" 2226ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(dst_ptr), // %0 2227ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(src_ptr), // %1 2228da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "+r"(src_ptr1), // %2 2229ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_width), // %3 2230da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "+r"(y1_fraction), // %4 2231da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "+r"(y0_fraction) // %5 2232ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 2233da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v3", "v4", "v5" 2234ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 2235ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 2236ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 2237ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr 2238ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid ARGBBlendRow_NEON(const uint8* src_argb0, const uint8* src_argb1, 2239ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian uint8* dst_argb, int width) { 2240ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 2241da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w3, %w3, #8 \n" 2242da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.lt 89f \n" 2243ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian // Blend 8 pixels. 2244ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "8: \n" 2245ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 2246da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB0 pixels 2247da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian MEMACCESS(1) 2248da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // load 8 ARGB1 pixels 2249da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w3, %w3, #8 \n" // 8 processed per loop. 2250da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umull v16.8h, v4.8b, v3.8b \n" // db * a 2251da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umull v17.8h, v5.8b, v3.8b \n" // dg * a 2252da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umull v18.8h, v6.8b, v3.8b \n" // dr * a 2253da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqrshrn v16.8b, v16.8h, #8 \n" // db >>= 8 2254da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqrshrn v17.8b, v17.8h, #8 \n" // dg >>= 8 2255da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqrshrn v18.8b, v18.8h, #8 \n" // dr >>= 8 2256da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqsub v4.8b, v4.8b, v16.8b \n" // db - (db * a / 256) 2257da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqsub v5.8b, v5.8b, v17.8b \n" // dg - (dg * a / 256) 2258da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqsub v6.8b, v6.8b, v18.8b \n" // dr - (dr * a / 256) 2259da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqadd v0.8b, v0.8b, v4.8b \n" // + sb 2260da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqadd v1.8b, v1.8b, v5.8b \n" // + sg 2261da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqadd v2.8b, v2.8b, v6.8b \n" // + sr 2262da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v3.8b, #255 \n" // a = 255 2263ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(2) 2264da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB pixels 2265da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.ge 8b \n" 2266ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 2267ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "89: \n" 2268da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "adds %w3, %w3, #8-1 \n" 2269da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.lt 99f \n" 2270ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 2271ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian // Blend 1 pixels. 2272ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 2273ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 2274da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v0.b,v1.b,v2.b,v3.b}[0], [%0], #4 \n" // load 1 pixel ARGB0. 2275da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian MEMACCESS(1) 2276da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v4.b,v5.b,v6.b,v7.b}[0], [%1], #4 \n" // load 1 pixel ARGB1. 2277da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w3, %w3, #1 \n" // 1 processed per loop. 2278da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umull v16.8h, v4.8b, v3.8b \n" // db * a 2279da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umull v17.8h, v5.8b, v3.8b \n" // dg * a 2280da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umull v18.8h, v6.8b, v3.8b \n" // dr * a 2281da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqrshrn v16.8b, v16.8h, #8 \n" // db >>= 8 2282da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqrshrn v17.8b, v17.8h, #8 \n" // dg >>= 8 2283da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqrshrn v18.8b, v18.8h, #8 \n" // dr >>= 8 2284da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqsub v4.8b, v4.8b, v16.8b \n" // db - (db * a / 256) 2285da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqsub v5.8b, v5.8b, v17.8b \n" // dg - (dg * a / 256) 2286da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqsub v6.8b, v6.8b, v18.8b \n" // dr - (dr * a / 256) 2287da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqadd v0.8b, v0.8b, v4.8b \n" // + sb 2288da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqadd v1.8b, v1.8b, v5.8b \n" // + sg 2289da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqadd v2.8b, v2.8b, v6.8b \n" // + sr 2290da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v3.8b, #255 \n" // a = 255 2291ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(2) 2292da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st4 {v0.b,v1.b,v2.b,v3.b}[0], [%2], #4 \n" // store 1 pixel. 2293da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.ge 1b \n" 2294ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 2295ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "99: \n" 2296ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 2297ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_argb0), // %0 2298ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(src_argb1), // %1 2299ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_argb), // %2 2300ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(width) // %3 2301ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 2302da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", 2303da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "v16", "v17", "v18" 2304ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 2305ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 2306ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 2307ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// Attenuate 8 pixels at a time. 2308ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) { 2309ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 2310ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian // Attenuate 8 pixels. 2311ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 2312ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 2313da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB pixels 2314da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w2, %w2, #8 \n" // 8 processed per loop. 2315da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umull v4.8h, v0.8b, v3.8b \n" // b * a 2316da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umull v5.8h, v1.8b, v3.8b \n" // g * a 2317da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umull v6.8h, v2.8b, v3.8b \n" // r * a 2318da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqrshrn v0.8b, v4.8h, #8 \n" // b >>= 8 2319da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqrshrn v1.8b, v5.8h, #8 \n" // g >>= 8 2320da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqrshrn v2.8b, v6.8h, #8 \n" // r >>= 8 2321ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 2322da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 8 ARGB pixels 2323da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 2324ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_argb), // %0 2325ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_argb), // %1 2326ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(width) // %2 2327ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 2328da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6" 2329ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 2330ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 2331ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 2332ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// Quantize 8 ARGB pixels (32 bytes). 2333ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// dst = (dst * scale >> 16) * interval_size + interval_offset; 2334ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid ARGBQuantizeRow_NEON(uint8* dst_argb, int scale, int interval_size, 2335ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian int interval_offset, int width) { 2336ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 2337da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "dup v4.8h, %w2 \n" 2338da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ushr v4.8h, v4.8h, #1 \n" // scale >>= 1 2339da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "dup v5.8h, %w3 \n" // interval multiply. 2340da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "dup v6.8h, %w4 \n" // interval add 2341ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 2342ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian // 8 pixel loop. 2343da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "1: \n" 2344da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian MEMACCESS(0) 2345da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0] \n" // load 8 pixels of ARGB. 2346da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w1, %w1, #8 \n" // 8 processed per loop. 2347da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uxtl v0.8h, v0.8b \n" // b (0 .. 255) 2348da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uxtl v1.8h, v1.8b \n" 2349da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uxtl v2.8h, v2.8b \n" 2350da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqdmulh v0.8h, v0.8h, v4.8h \n" // b * scale 2351da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqdmulh v1.8h, v1.8h, v4.8h \n" // g 2352da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqdmulh v2.8h, v2.8h, v4.8h \n" // r 2353da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mul v0.8h, v0.8h, v5.8h \n" // b * interval_size 2354da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mul v1.8h, v1.8h, v5.8h \n" // g 2355da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mul v2.8h, v2.8h, v5.8h \n" // r 2356da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "add v0.8h, v0.8h, v6.8h \n" // b + interval_offset 2357da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "add v1.8h, v1.8h, v6.8h \n" // g 2358da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "add v2.8h, v2.8h, v6.8h \n" // r 2359da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqxtn v0.8b, v0.8h \n" 2360da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqxtn v1.8b, v1.8h \n" 2361da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqxtn v2.8b, v2.8h \n" 2362da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian MEMACCESS(0) 2363da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // store 8 ARGB pixels 2364da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 2365ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(dst_argb), // %0 2366ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(width) // %1 2367ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "r"(scale), // %2 2368ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "r"(interval_size), // %3 2369ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "r"(interval_offset) // %4 2370da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6" 2371ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 2372ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 2373ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 2374ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// Shade 8 pixels at a time by specified value. 2375ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// NOTE vqrdmulh.s16 q10, q10, d0[0] must use a scaler register from 0 to 8. 2376ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// Rounding in vqrdmulh does +1 to high if high bit of low s16 is set. 2377ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid ARGBShadeRow_NEON(const uint8* src_argb, uint8* dst_argb, int width, 2378ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian uint32 value) { 2379ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 2380da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "dup v0.4s, %w3 \n" // duplicate scale value. 2381da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "zip1 v0.8b, v0.8b, v0.8b \n" // v0.8b aarrggbb. 2382da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ushr v0.8h, v0.8h, #1 \n" // scale / 2. 2383ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 2384ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian // 8 pixel loop. 2385da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "1: \n" 2386da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian MEMACCESS(0) 2387da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%0], #32 \n" // load 8 ARGB pixels. 2388da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w2, %w2, #8 \n" // 8 processed per loop. 2389da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uxtl v4.8h, v4.8b \n" // b (0 .. 255) 2390da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uxtl v5.8h, v5.8b \n" 2391da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uxtl v6.8h, v6.8b \n" 2392da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uxtl v7.8h, v7.8b \n" 2393da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqrdmulh v4.8h, v4.8h, v0.h[0] \n" // b * scale * 2 2394da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqrdmulh v5.8h, v5.8h, v0.h[1] \n" // g 2395da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqrdmulh v6.8h, v6.8h, v0.h[2] \n" // r 2396da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqrdmulh v7.8h, v7.8h, v0.h[3] \n" // a 2397da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqxtn v4.8b, v4.8h \n" 2398da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqxtn v5.8b, v5.8h \n" 2399da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqxtn v6.8b, v6.8h \n" 2400da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqxtn v7.8b, v7.8h \n" 2401da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian MEMACCESS(1) 2402da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // store 8 ARGB pixels 2403da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 2404ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_argb), // %0 2405ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_argb), // %1 2406ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(width) // %2 2407ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "r"(value) // %3 2408da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v4", "v5", "v6", "v7" 2409ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 2410ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 2411ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 2412ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels 2413ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// Similar to ARGBToYJ but stores ARGB. 2414ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// C code is (15 * b + 75 * g + 38 * r + 64) >> 7; 2415ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) { 2416ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 2417da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v24.8b, #15 \n" // B * 0.11400 coefficient 2418da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v25.8b, #75 \n" // G * 0.58700 coefficient 2419da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v26.8b, #38 \n" // R * 0.29900 coefficient 2420ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 2421ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 2422da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB pixels. 2423da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w2, %w2, #8 \n" // 8 processed per loop. 2424da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umull v4.8h, v0.8b, v24.8b \n" // B 2425da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umlal v4.8h, v1.8b, v25.8b \n" // G 2426da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umlal v4.8h, v2.8b, v26.8b \n" // R 2427da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqrshrun v0.8b, v4.8h, #7 \n" // 15 bit to 8 bit B 2428da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "orr v1.8b, v0.8b, v0.8b \n" // G 2429da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "orr v2.8b, v0.8b, v0.8b \n" // R 2430ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 2431da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 8 pixels. 2432da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 2433ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_argb), // %0 2434ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_argb), // %1 2435ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(width) // %2 2436ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 2437da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v24", "v25", "v26" 2438ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 2439ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 2440ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 2441ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels. 2442ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// b = (r * 35 + g * 68 + b * 17) >> 7 2443ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// g = (r * 45 + g * 88 + b * 22) >> 7 2444ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// r = (r * 50 + g * 98 + b * 24) >> 7 2445ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 2446ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid ARGBSepiaRow_NEON(uint8* dst_argb, int width) { 2447ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 2448da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v20.8b, #17 \n" // BB coefficient 2449da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v21.8b, #68 \n" // BG coefficient 2450da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v22.8b, #35 \n" // BR coefficient 2451da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v24.8b, #22 \n" // GB coefficient 2452da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v25.8b, #88 \n" // GG coefficient 2453da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v26.8b, #45 \n" // GR coefficient 2454da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v28.8b, #24 \n" // BB coefficient 2455da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v29.8b, #98 \n" // BG coefficient 2456da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v30.8b, #50 \n" // BR coefficient 2457da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "1: \n" 2458da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian MEMACCESS(0) 2459da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0] \n" // load 8 ARGB pixels. 2460da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w1, %w1, #8 \n" // 8 processed per loop. 2461da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umull v4.8h, v0.8b, v20.8b \n" // B to Sepia B 2462da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umlal v4.8h, v1.8b, v21.8b \n" // G 2463da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umlal v4.8h, v2.8b, v22.8b \n" // R 2464da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umull v5.8h, v0.8b, v24.8b \n" // B to Sepia G 2465da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umlal v5.8h, v1.8b, v25.8b \n" // G 2466da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umlal v5.8h, v2.8b, v26.8b \n" // R 2467da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umull v6.8h, v0.8b, v28.8b \n" // B to Sepia R 2468da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umlal v6.8h, v1.8b, v29.8b \n" // G 2469da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umlal v6.8h, v2.8b, v30.8b \n" // R 2470da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqshrn v0.8b, v4.8h, #7 \n" // 16 bit to 8 bit B 2471da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqshrn v1.8b, v5.8h, #7 \n" // 16 bit to 8 bit G 2472da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqshrn v2.8b, v6.8h, #7 \n" // 16 bit to 8 bit R 2473da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian MEMACCESS(0) 2474da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // store 8 pixels. 2475da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 2476ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(dst_argb), // %0 2477ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(width) // %1 2478ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 2479da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", 2480da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "v20", "v21", "v22", "v24", "v25", "v26", "v28", "v29", "v30" 2481ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 2482ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 2483ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 2484ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// Tranform 8 ARGB pixels (32 bytes) with color matrix. 2485ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// TODO(fbarchard): Was same as Sepia except matrix is provided. This function 2486ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// needs to saturate. Consider doing a non-saturating version. 2487ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb, 2488ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian const int8* matrix_argb, int width) { 2489ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 2490ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(3) 2491da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v2.16b}, [%3] \n" // load 3 ARGB vectors. 2492da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sxtl v0.8h, v2.8b \n" // B,G coefficients s16. 2493da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sxtl2 v1.8h, v2.16b \n" // R,A coefficients s16. 2494da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 2495da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "1: \n" 2496da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian MEMACCESS(0) 2497da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%0], #32 \n" // load 8 pixels. 2498da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w2, %w2, #8 \n" // 8 processed per loop. 2499da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uxtl v16.8h, v16.8b \n" // b (0 .. 255) 16 bit 2500da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uxtl v17.8h, v17.8b \n" // g 2501da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uxtl v18.8h, v18.8b \n" // r 2502da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uxtl v19.8h, v19.8b \n" // a 2503da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mul v22.8h, v16.8h, v0.h[0] \n" // B = B * Matrix B 2504da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mul v23.8h, v16.8h, v0.h[4] \n" // G = B * Matrix G 2505da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mul v24.8h, v16.8h, v1.h[0] \n" // R = B * Matrix R 2506da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mul v25.8h, v16.8h, v1.h[4] \n" // A = B * Matrix A 2507da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mul v4.8h, v17.8h, v0.h[1] \n" // B += G * Matrix B 2508da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mul v5.8h, v17.8h, v0.h[5] \n" // G += G * Matrix G 2509da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mul v6.8h, v17.8h, v1.h[1] \n" // R += G * Matrix R 2510da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mul v7.8h, v17.8h, v1.h[5] \n" // A += G * Matrix A 2511da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqadd v22.8h, v22.8h, v4.8h \n" // Accumulate B 2512da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqadd v23.8h, v23.8h, v5.8h \n" // Accumulate G 2513da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqadd v24.8h, v24.8h, v6.8h \n" // Accumulate R 2514da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqadd v25.8h, v25.8h, v7.8h \n" // Accumulate A 2515da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mul v4.8h, v18.8h, v0.h[2] \n" // B += R * Matrix B 2516da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mul v5.8h, v18.8h, v0.h[6] \n" // G += R * Matrix G 2517da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mul v6.8h, v18.8h, v1.h[2] \n" // R += R * Matrix R 2518da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mul v7.8h, v18.8h, v1.h[6] \n" // A += R * Matrix A 2519da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqadd v22.8h, v22.8h, v4.8h \n" // Accumulate B 2520da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqadd v23.8h, v23.8h, v5.8h \n" // Accumulate G 2521da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqadd v24.8h, v24.8h, v6.8h \n" // Accumulate R 2522da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqadd v25.8h, v25.8h, v7.8h \n" // Accumulate A 2523da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mul v4.8h, v19.8h, v0.h[3] \n" // B += A * Matrix B 2524da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mul v5.8h, v19.8h, v0.h[7] \n" // G += A * Matrix G 2525da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mul v6.8h, v19.8h, v1.h[3] \n" // R += A * Matrix R 2526da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "mul v7.8h, v19.8h, v1.h[7] \n" // A += A * Matrix A 2527da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqadd v22.8h, v22.8h, v4.8h \n" // Accumulate B 2528da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqadd v23.8h, v23.8h, v5.8h \n" // Accumulate G 2529da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqadd v24.8h, v24.8h, v6.8h \n" // Accumulate R 2530da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqadd v25.8h, v25.8h, v7.8h \n" // Accumulate A 2531da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqshrun v16.8b, v22.8h, #6 \n" // 16 bit to 8 bit B 2532da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqshrun v17.8b, v23.8h, #6 \n" // 16 bit to 8 bit G 2533da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqshrun v18.8b, v24.8h, #6 \n" // 16 bit to 8 bit R 2534da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sqshrun v19.8b, v25.8h, #6 \n" // 16 bit to 8 bit A 2535da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian MEMACCESS(1) 2536da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%1], #32 \n" // store 8 pixels. 2537da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 2538ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_argb), // %0 2539ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_argb), // %1 2540ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(width) // %2 2541ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "r"(matrix_argb) // %3 2542da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", 2543da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "v18", "v19", "v22", "v23", "v24", "v25" 2544ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 2545ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 2546ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 2547ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// TODO(fbarchard): fix vqshrun in ARGBMultiplyRow_NEON and reenable. 2548ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// Multiply 2 rows of ARGB pixels together, 8 pixels at a time. 2549ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid ARGBMultiplyRow_NEON(const uint8* src_argb0, const uint8* src_argb1, 2550ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian uint8* dst_argb, int width) { 2551ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 2552ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian // 8 pixel loop. 2553ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 2554ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 2555da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB pixels. 2556ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 2557da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // load 8 more pixels. 2558da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w3, %w3, #8 \n" // 8 processed per loop. 2559da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umull v0.8h, v0.8b, v4.8b \n" // multiply B 2560da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umull v1.8h, v1.8b, v5.8b \n" // multiply G 2561da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umull v2.8h, v2.8b, v6.8b \n" // multiply R 2562da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "umull v3.8h, v3.8b, v7.8b \n" // multiply A 2563da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "rshrn v0.8b, v0.8h, #8 \n" // 16 bit to 8 bit B 2564da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "rshrn v1.8b, v1.8h, #8 \n" // 16 bit to 8 bit G 2565da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "rshrn v2.8b, v2.8h, #8 \n" // 16 bit to 8 bit R 2566da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "rshrn v3.8b, v3.8h, #8 \n" // 16 bit to 8 bit A 2567ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(2) 2568da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB pixels 2569da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 2570ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 2571ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_argb0), // %0 2572ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(src_argb1), // %1 2573ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_argb), // %2 2574ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(width) // %3 2575ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 2576da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7" 2577ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 2578ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 2579ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 2580ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// Add 2 rows of ARGB pixels together, 8 pixels at a time. 2581ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid ARGBAddRow_NEON(const uint8* src_argb0, const uint8* src_argb1, 2582ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian uint8* dst_argb, int width) { 2583ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 2584ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian // 8 pixel loop. 2585ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 2586ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 2587da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB pixels. 2588ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 2589da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // load 8 more pixels. 2590da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w3, %w3, #8 \n" // 8 processed per loop. 2591da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqadd v0.8b, v0.8b, v4.8b \n" 2592da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqadd v1.8b, v1.8b, v5.8b \n" 2593da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqadd v2.8b, v2.8b, v6.8b \n" 2594da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqadd v3.8b, v3.8b, v7.8b \n" 2595ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(2) 2596da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB pixels 2597da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 2598ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 2599ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_argb0), // %0 2600ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(src_argb1), // %1 2601ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_argb), // %2 2602ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(width) // %3 2603ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 2604da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7" 2605ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 2606ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 2607ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 2608ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// Subtract 2 rows of ARGB pixels, 8 pixels at a time. 2609ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid ARGBSubtractRow_NEON(const uint8* src_argb0, const uint8* src_argb1, 2610ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian uint8* dst_argb, int width) { 2611ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 2612ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian // 8 pixel loop. 2613ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 2614ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 2615da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB pixels. 2616ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 2617da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // load 8 more pixels. 2618da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w3, %w3, #8 \n" // 8 processed per loop. 2619da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqsub v0.8b, v0.8b, v4.8b \n" 2620da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqsub v1.8b, v1.8b, v5.8b \n" 2621da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqsub v2.8b, v2.8b, v6.8b \n" 2622da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqsub v3.8b, v3.8b, v7.8b \n" 2623ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(2) 2624da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB pixels 2625da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 2626ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 2627ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_argb0), // %0 2628ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(src_argb1), // %1 2629ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_argb), // %2 2630ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(width) // %3 2631ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 2632da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7" 2633ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 2634ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 2635ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 2636ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// Adds Sobel X and Sobel Y and stores Sobel into ARGB. 2637ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// A = 255 2638ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// R = Sobel 2639ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// G = Sobel 2640ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// B = Sobel 2641ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, 2642ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian uint8* dst_argb, int width) { 2643ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 2644da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v3.8b, #255 \n" // alpha 2645ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian // 8 pixel loop. 2646ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 2647ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 2648da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v0.8b}, [%0], #8 \n" // load 8 sobelx. 2649ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 2650da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v1.8b}, [%1], #8 \n" // load 8 sobely. 2651da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w3, %w3, #8 \n" // 8 processed per loop. 2652da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqadd v0.8b, v0.8b, v1.8b \n" // add 2653da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "orr v1.8b, v0.8b, v0.8b \n" 2654da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "orr v2.8b, v0.8b, v0.8b \n" 2655ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(2) 2656da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB pixels 2657da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 2658ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_sobelx), // %0 2659ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(src_sobely), // %1 2660ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_argb), // %2 2661ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(width) // %3 2662ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 2663da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3" 2664ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 2665ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 2666ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 2667ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// Adds Sobel X and Sobel Y and stores Sobel into plane. 2668ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, 2669ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian uint8* dst_y, int width) { 2670ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 2671ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian // 16 pixel loop. 2672ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 2673ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 2674da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v0.16b}, [%0], #16 \n" // load 16 sobelx. 2675ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 2676da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v1.16b}, [%1], #16 \n" // load 16 sobely. 2677da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w3, %w3, #16 \n" // 16 processed per loop. 2678da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqadd v0.16b, v0.16b, v1.16b \n" // add 2679ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(2) 2680da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.16b}, [%2], #16 \n" // store 16 pixels. 2681da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 2682ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_sobelx), // %0 2683ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(src_sobely), // %1 2684ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_y), // %2 2685ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(width) // %3 2686ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 2687da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1" 2688ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 2689ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 2690ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 2691ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// Mixes Sobel X, Sobel Y and Sobel into ARGB. 2692ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// A = 255 2693ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// R = Sobel X 2694ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// G = Sobel 2695ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// B = Sobel Y 2696ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, 2697ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian uint8* dst_argb, int width) { 2698ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 2699da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "movi v3.8b, #255 \n" // alpha 2700ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian // 8 pixel loop. 2701ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 2702ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 2703da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v2.8b}, [%0], #8 \n" // load 8 sobelx. 2704ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 2705da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v0.8b}, [%1], #8 \n" // load 8 sobely. 2706da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w3, %w3, #8 \n" // 8 processed per loop. 2707da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqadd v1.8b, v0.8b, v2.8b \n" // add 2708ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(2) 2709da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB pixels 2710da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 2711ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_sobelx), // %0 2712ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(src_sobely), // %1 2713ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_argb), // %2 2714ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(width) // %3 2715ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : 2716da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3" 2717ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 2718ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 2719ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 2720ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// SobelX as a matrix is 2721ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// -1 0 1 2722ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// -2 0 2 2723ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// -1 0 1 2724ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1, 2725ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian const uint8* src_y2, uint8* dst_sobelx, int width) { 2726ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 2727ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 2728ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 2729da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v0.8b}, [%0],%5 \n" // top 2730ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 2731da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v1.8b}, [%0],%6 \n" 2732da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "usubl v0.8h, v0.8b, v1.8b \n" 2733ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 2734da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v2.8b}, [%1],%5 \n" // center * 2 2735ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 2736da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v3.8b}, [%1],%6 \n" 2737da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "usubl v1.8h, v2.8b, v3.8b \n" 2738da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "add v0.8h, v0.8h, v1.8h \n" 2739da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "add v0.8h, v0.8h, v1.8h \n" 2740ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(2) 2741da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v2.8b}, [%2],%5 \n" // bottom 2742ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(2) 2743da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v3.8b}, [%2],%6 \n" 2744da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w4, %w4, #8 \n" // 8 pixels 2745da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "usubl v1.8h, v2.8b, v3.8b \n" 2746da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "add v0.8h, v0.8h, v1.8h \n" 2747da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "abs v0.8h, v0.8h \n" 2748da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqxtn v0.8b, v0.8h \n" 2749ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(3) 2750da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.8b}, [%3], #8 \n" // store 8 sobelx 2751da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 2752ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_y0), // %0 2753ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(src_y1), // %1 2754ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(src_y2), // %2 2755ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_sobelx), // %3 2756ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(width) // %4 2757da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "r"(2LL), // %5 2758da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "r"(6LL) // %6 2759da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List 2760ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 2761ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 2762ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 2763ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// SobelY as a matrix is 2764ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// -1 -2 -1 2765ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// 0 0 0 2766ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian// 1 2 1 2767ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1, 2768ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian uint8* dst_sobely, int width) { 2769ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian asm volatile ( 2770ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "1: \n" 2771ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 2772da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v0.8b}, [%0],%4 \n" // left 2773ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 2774da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v1.8b}, [%1],%4 \n" 2775da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "usubl v0.8h, v0.8b, v1.8b \n" 2776ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 2777da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v2.8b}, [%0],%4 \n" // center * 2 2778ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 2779da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v3.8b}, [%1],%4 \n" 2780da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "usubl v1.8h, v2.8b, v3.8b \n" 2781da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "add v0.8h, v0.8h, v1.8h \n" 2782da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "add v0.8h, v0.8h, v1.8h \n" 2783ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(0) 2784da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v2.8b}, [%0],%5 \n" // right 2785ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(1) 2786da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "ld1 {v3.8b}, [%1],%5 \n" 2787da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "subs %w3, %w3, #8 \n" // 8 pixels 2788da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "usubl v1.8h, v2.8b, v3.8b \n" 2789da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "add v0.8h, v0.8h, v1.8h \n" 2790da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "abs v0.8h, v0.8h \n" 2791da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "uqxtn v0.8b, v0.8h \n" 2792ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian MEMACCESS(2) 2793da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "st1 {v0.8b}, [%2], #8 \n" // store 8 sobely 2794da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "b.gt 1b \n" 2795ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian : "+r"(src_y0), // %0 2796ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(src_y1), // %1 2797ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(dst_sobely), // %2 2798ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian "+r"(width) // %3 2799da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "r"(1LL), // %4 2800da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "r"(6LL) // %5 2801da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List 2802ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian ); 2803ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 2804ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) 2805ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 2806ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#ifdef __cplusplus 2807ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} // extern "C" 2808ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} // namespace libyuv 2809ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#endif 2810