193d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com/* 2b0c97975894a5eebebf9d93147cdd941a3accb63fbarchard@google.com * Copyright 2011 The LibYuv Project Authors. All rights reserved. 393d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com * 493d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com * Use of this source code is governed by a BSD-style license 593d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com * that can be found in the LICENSE file in the root of the source 693d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com * tree. An additional intellectual property rights grant can be found 7cde587092fef0dbed2c35602f30b79e7b892e766fbarchard@google.com * in the file PATENTS. All contributing project authors may 893d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com * be found in the AUTHORS file in the root of the source tree. 993d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com */ 1093d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com 11142f6c4ed5eaeec0176f255e64bac8d8c70b42e1fbarchard@google.com#include "libyuv/row.h" 1293d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com 13fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.com#ifdef __cplusplus 14fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.comnamespace libyuv { 15fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.comextern "C" { 16fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.com#endif 17fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.com 182d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com// This module is for GCC Neon 191b9df4c5c85ca9ff161249a8ffbaeda2f5edf5dffbarchard@google.com#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) 202d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com 214807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com// Read 8 Y, 4 U and 4 V from 422 224807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com#define READYUV422 \ 230bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) \ 242c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d0}, [%0]! \n" \ 250bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) \ 262c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.32 {d2[0]}, [%1]! \n" \ 270bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(2) \ 282c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.32 {d2[1]}, [%2]! \n" 294807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com 30b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com// Read 8 Y, 2 U and 2 V from 422 31b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com#define READYUV411 \ 320bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) \ 332c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d0}, [%0]! \n" \ 340bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) \ 352c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.16 {d2[0]}, [%1]! \n" \ 360bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(2) \ 372c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.16 {d2[1]}, [%2]! \n" \ 38b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "vmov.u8 d3, d2 \n" \ 39b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "vzip.u8 d2, d3 \n" 40b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com 41b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com// Read 8 Y, 8 U and 8 V from 444 42b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com#define READYUV444 \ 430bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) \ 442c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d0}, [%0]! \n" \ 450bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) \ 462c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d2}, [%1]! \n" \ 470bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(2) \ 482c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d3}, [%2]! \n" \ 49b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "vpaddl.u8 q1, q1 \n" \ 50b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "vrshrn.u16 d2, q1, #1 \n" 51b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com 5200b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com// Read 8 Y, and set 4 U and 4 V to 128 5300b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com#define READYUV400 \ 540bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) \ 552c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d0}, [%0]! \n" \ 5600b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com "vmov.u8 d2, #128 \n" 5700b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com 584807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com// Read 8 Y and 4 UV from NV12 594807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com#define READNV12 \ 600bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) \ 612c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d0}, [%0]! \n" \ 620bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) \ 632c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d2}, [%1]! \n" \ 644807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "vmov.u8 d3, d2 \n"/* split odd/even uv apart */\ 654807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "vuzp.u8 d2, d3 \n" \ 66793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "vtrn.u32 d2, d3 \n" 674807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com 684807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com// Read 8 Y and 4 VU from NV21 694807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com#define READNV21 \ 700bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) \ 712c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d0}, [%0]! \n" \ 720bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) \ 732c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d2}, [%1]! \n" \ 744807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "vmov.u8 d3, d2 \n"/* split odd/even uv apart */\ 754807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "vuzp.u8 d3, d2 \n" \ 76793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "vtrn.u32 d2, d3 \n" 77793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com 78793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com// Read 8 YUY2 79793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com#define READYUY2 \ 800bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) \ 812c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld2.8 {d0, d2}, [%0]! \n" \ 82793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "vmov.u8 d3, d2 \n" \ 83793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "vuzp.u8 d2, d3 \n" \ 84793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "vtrn.u32 d2, d3 \n" 85793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com 86793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com// Read 8 UYVY 87793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com#define READUYVY \ 880bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) \ 892c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld2.8 {d2, d3}, [%0]! \n" \ 90793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "vmov.u8 d0, d3 \n" \ 91793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "vmov.u8 d3, d2 \n" \ 92793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "vuzp.u8 d2, d3 \n" \ 93793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "vtrn.u32 d2, d3 \n" 944807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com 954807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com#define YUV422TORGB \ 9693d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "veor.u8 d2, d26 \n"/*subtract 128 from u and v*/\ 9793d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vmull.s8 q8, d2, d24 \n"/* u/v B/R component */\ 9893d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vmull.s8 q9, d2, d25 \n"/* u/v G component */\ 9993d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vmov.u8 d1, #0 \n"/* split odd/even y apart */\ 10093d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vtrn.u8 d0, d1 \n" \ 10193d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vsub.s16 q0, q0, q15 \n"/* offset y */\ 10293d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vmul.s16 q0, q0, q14 \n" \ 10393d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vadd.s16 d18, d19 \n" \ 104c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com "vqadd.s16 d20, d0, d16 \n" /* B */ \ 10593d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vqadd.s16 d21, d1, d16 \n" \ 106c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com "vqadd.s16 d22, d0, d17 \n" /* R */ \ 10793d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vqadd.s16 d23, d1, d17 \n" \ 108c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com "vqadd.s16 d16, d0, d18 \n" /* G */ \ 10993d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vqadd.s16 d17, d1, d18 \n" \ 110d39ce16ba0654179bb51c4ecd46d8943cc24d130fbarchard@google.com "vqshrun.s16 d0, q10, #6 \n" /* B */ \ 111d39ce16ba0654179bb51c4ecd46d8943cc24d130fbarchard@google.com "vqshrun.s16 d1, q11, #6 \n" /* G */ \ 112d39ce16ba0654179bb51c4ecd46d8943cc24d130fbarchard@google.com "vqshrun.s16 d2, q8, #6 \n" /* R */ \ 11393d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vmovl.u8 q10, d0 \n"/* set up for reinterleave*/\ 11493d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vmovl.u8 q11, d1 \n" \ 11593d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vmovl.u8 q8, d2 \n" \ 11693d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vtrn.u8 d20, d21 \n" \ 11793d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vtrn.u8 d22, d23 \n" \ 11893d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vtrn.u8 d16, d17 \n" \ 1194807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "vmov.u8 d21, d16 \n" 12093d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com 121f2aa91a1ac08703d5a22af7fa48c59eba8eb397afbarchard@google.comstatic vec8 kUVToRB = { 127, 127, 127, 127, 102, 102, 102, 102, 122f2aa91a1ac08703d5a22af7fa48c59eba8eb397afbarchard@google.com 0, 0, 0, 0, 0, 0, 0, 0 }; 123f2aa91a1ac08703d5a22af7fa48c59eba8eb397afbarchard@google.comstatic vec8 kUVToG = { -25, -25, -25, -25, -52, -52, -52, -52, 124f2aa91a1ac08703d5a22af7fa48c59eba8eb397afbarchard@google.com 0, 0, 0, 0, 0, 0, 0, 0 }; 12593d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com 126b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.comvoid I444ToARGBRow_NEON(const uint8* src_y, 127b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com const uint8* src_u, 128b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com const uint8* src_v, 129b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com uint8* dst_argb, 130b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com int width) { 131b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com asm volatile ( 1320bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(5) 1332c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d24}, [%5] \n" 1340bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(6) 1352c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d25}, [%6] \n" 136b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "vmov.u8 d26, #128 \n" 137b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "vmov.u16 q14, #74 \n" 138b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "vmov.u16 q15, #16 \n" 139c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 140b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "1: \n" 141b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com READYUV444 142b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com YUV422TORGB 143b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "subs %4, %4, #8 \n" 144b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "vmov.u8 d23, #255 \n" 1450bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(3) 146b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "vst4.8 {d20, d21, d22, d23}, [%3]! \n" 147b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "bgt 1b \n" 148b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com : "+r"(src_y), // %0 149b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "+r"(src_u), // %1 150b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "+r"(src_v), // %2 151b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "+r"(dst_argb), // %3 152b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "+r"(width) // %4 153b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com : "r"(&kUVToRB), // %5 154b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "r"(&kUVToG) // %6 155b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", 156b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 157b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com ); 158b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com} 159b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com 1609de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.comvoid I422ToARGBRow_NEON(const uint8* src_y, 1619de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com const uint8* src_u, 1629de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com const uint8* src_v, 1639de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com uint8* dst_argb, 164e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com int width) { 1655b22506b14367f38c1e31d1aab4dc1132158737ffbarchard@google.com asm volatile ( 1660bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(5) 1672c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d24}, [%5] \n" 1680bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(6) 1692c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d25}, [%6] \n" 17093d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vmov.u8 d26, #128 \n" 17193d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vmov.u16 q14, #74 \n" 17293d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vmov.u16 q15, #16 \n" 173c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 17493d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "1: \n" 1754807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com READYUV422 176e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com YUV422TORGB 1774807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "subs %4, %4, #8 \n" 17893d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vmov.u8 d23, #255 \n" 1790bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(3) 180dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "vst4.8 {d20, d21, d22, d23}, [%3]! \n" 18118184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com "bgt 1b \n" 1829de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com : "+r"(src_y), // %0 1839de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(src_u), // %1 1849de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(src_v), // %2 1859de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(dst_argb), // %3 1869de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(width) // %4 1879de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com : "r"(&kUVToRB), // %5 1889de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "r"(&kUVToG) // %6 18964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", 19064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 19193d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com ); 19293d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com} 19393d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com 194b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.comvoid I411ToARGBRow_NEON(const uint8* src_y, 195b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com const uint8* src_u, 196b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com const uint8* src_v, 197b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com uint8* dst_argb, 198b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com int width) { 199b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com asm volatile ( 2000bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(5) 2012c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d24}, [%5] \n" 2020bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(6) 2032c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d25}, [%6] \n" 204b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "vmov.u8 d26, #128 \n" 205b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "vmov.u16 q14, #74 \n" 206b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "vmov.u16 q15, #16 \n" 207c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 208b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "1: \n" 209b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com READYUV411 210b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com YUV422TORGB 211b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "subs %4, %4, #8 \n" 212b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "vmov.u8 d23, #255 \n" 2130bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(3) 214b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "vst4.8 {d20, d21, d22, d23}, [%3]! \n" 215b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "bgt 1b \n" 216b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com : "+r"(src_y), // %0 217b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "+r"(src_u), // %1 218b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "+r"(src_v), // %2 219b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "+r"(dst_argb), // %3 220b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "+r"(width) // %4 221b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com : "r"(&kUVToRB), // %5 222b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "r"(&kUVToG) // %6 223b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", 224b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 225b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com ); 226b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com} 227b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com 2289de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.comvoid I422ToBGRARow_NEON(const uint8* src_y, 2299de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com const uint8* src_u, 2309de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com const uint8* src_v, 2319de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com uint8* dst_bgra, 232e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com int width) { 2335b22506b14367f38c1e31d1aab4dc1132158737ffbarchard@google.com asm volatile ( 2340bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(5) 2352c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d24}, [%5] \n" 2360bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(6) 2372c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d25}, [%6] \n" 23893d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vmov.u8 d26, #128 \n" 23993d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vmov.u16 q14, #74 \n" 24093d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vmov.u16 q15, #16 \n" 241c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 24293d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "1: \n" 2434807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com READYUV422 244e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com YUV422TORGB 2454807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "subs %4, %4, #8 \n" 24693d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vswp.u8 d20, d22 \n" 24793d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vmov.u8 d19, #255 \n" 2480bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(3) 249dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "vst4.8 {d19, d20, d21, d22}, [%3]! \n" 25018184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com "bgt 1b \n" 2519de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com : "+r"(src_y), // %0 2529de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(src_u), // %1 2539de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(src_v), // %2 2549de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(dst_bgra), // %3 2559de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(width) // %4 2569de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com : "r"(&kUVToRB), // %5 2579de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "r"(&kUVToG) // %6 25864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", 25964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 26093d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com ); 26193d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com} 26293d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com 2639de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.comvoid I422ToABGRRow_NEON(const uint8* src_y, 2649de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com const uint8* src_u, 2659de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com const uint8* src_v, 2669de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com uint8* dst_abgr, 267e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com int width) { 2685b22506b14367f38c1e31d1aab4dc1132158737ffbarchard@google.com asm volatile ( 2690bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(5) 2702c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d24}, [%5] \n" 2710bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(6) 2722c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d25}, [%6] \n" 27393d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vmov.u8 d26, #128 \n" 27493d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vmov.u16 q14, #74 \n" 27593d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vmov.u16 q15, #16 \n" 276c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 27793d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "1: \n" 2784807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com READYUV422 279e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com YUV422TORGB 2804807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "subs %4, %4, #8 \n" 28193d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vswp.u8 d20, d22 \n" 28293d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vmov.u8 d23, #255 \n" 2830bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(3) 284dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "vst4.8 {d20, d21, d22, d23}, [%3]! \n" 28518184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com "bgt 1b \n" 2869de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com : "+r"(src_y), // %0 2879de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(src_u), // %1 2889de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(src_v), // %2 2899de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(dst_abgr), // %3 2909de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(width) // %4 2919de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com : "r"(&kUVToRB), // %5 2929de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "r"(&kUVToG) // %6 29364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", 29464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 29593d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com ); 29693d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com} 297fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.com 2989de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.comvoid I422ToRGBARow_NEON(const uint8* src_y, 2999de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com const uint8* src_u, 3009de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com const uint8* src_v, 3019de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com uint8* dst_rgba, 3022d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com int width) { 3032d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com asm volatile ( 3040bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(5) 3052c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d24}, [%5] \n" 3060bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(6) 3072c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d25}, [%6] \n" 3082d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com "vmov.u8 d26, #128 \n" 3092d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com "vmov.u16 q14, #74 \n" 3102d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com "vmov.u16 q15, #16 \n" 311c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 3122d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com "1: \n" 3134807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com READYUV422 3142d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com YUV422TORGB 3154807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "subs %4, %4, #8 \n" 3162d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com "vmov.u8 d19, #255 \n" 3170bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(3) 318dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "vst4.8 {d19, d20, d21, d22}, [%3]! \n" 3192d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com "bgt 1b \n" 3209de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com : "+r"(src_y), // %0 3219de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(src_u), // %1 3229de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(src_v), // %2 3239de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(dst_rgba), // %3 3249de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(width) // %4 3259de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com : "r"(&kUVToRB), // %5 3269de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "r"(&kUVToG) // %6 32764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", 32864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 3292d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com ); 3302d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com} 3312d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com 3329de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.comvoid I422ToRGB24Row_NEON(const uint8* src_y, 333834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com const uint8* src_u, 334834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com const uint8* src_v, 335834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com uint8* dst_rgb24, 336834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com int width) { 33764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com asm volatile ( 3380bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(5) 3392c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d24}, [%5] \n" 3400bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(6) 3412c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d25}, [%6] \n" 34264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmov.u8 d26, #128 \n" 34364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmov.u16 q14, #74 \n" 34464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmov.u16 q15, #16 \n" 345c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 34664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "1: \n" 34764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com READYUV422 34864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com YUV422TORGB 34964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "subs %4, %4, #8 \n" 3500bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(3) 35164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vst3.8 {d20, d21, d22}, [%3]! \n" 35264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "bgt 1b \n" 3539de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com : "+r"(src_y), // %0 3549de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(src_u), // %1 3559de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(src_v), // %2 3569de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(dst_rgb24), // %3 3579de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(width) // %4 3589de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com : "r"(&kUVToRB), // %5 3599de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "r"(&kUVToG) // %6 36064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", 36164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 36264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com ); 36364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com} 36464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 3659de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.comvoid I422ToRAWRow_NEON(const uint8* src_y, 3669de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com const uint8* src_u, 3679de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com const uint8* src_v, 3689de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com uint8* dst_raw, 36964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com int width) { 37064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com asm volatile ( 3710bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(5) 3722c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d24}, [%5] \n" 3730bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(6) 3742c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d25}, [%6] \n" 37564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmov.u8 d26, #128 \n" 37664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmov.u16 q14, #74 \n" 37764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmov.u16 q15, #16 \n" 378c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 37964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "1: \n" 38064ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com READYUV422 38164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com YUV422TORGB 38264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "subs %4, %4, #8 \n" 38364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vswp.u8 d20, d22 \n" 3840bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(3) 38564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vst3.8 {d20, d21, d22}, [%3]! \n" 38664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "bgt 1b \n" 3879de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com : "+r"(src_y), // %0 3889de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(src_u), // %1 3899de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(src_v), // %2 3909de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(dst_raw), // %3 39164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "+r"(width) // %4 39264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com : "r"(&kUVToRB), // %5 39364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "r"(&kUVToG) // %6 39464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", 39564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 39664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com ); 39764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com} 39864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 39911c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com#define ARGBTORGB565 \ 40011c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vshr.u8 d20, d20, #3 \n" /* B */ \ 40111c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vshr.u8 d21, d21, #2 \n" /* G */ \ 40211c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vshr.u8 d22, d22, #3 \n" /* R */ \ 40311c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vmovl.u8 q8, d20 \n" /* B */ \ 40411c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vmovl.u8 q9, d21 \n" /* G */ \ 40511c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vmovl.u8 q10, d22 \n" /* R */ \ 40611c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vshl.u16 q9, q9, #5 \n" /* G */ \ 40711c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vshl.u16 q10, q10, #11 \n" /* R */ \ 40811c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vorr q0, q8, q9 \n" /* BG */ \ 40911c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vorr q0, q0, q10 \n" /* BGR */ 41011c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com 41115449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.comvoid I422ToRGB565Row_NEON(const uint8* src_y, 412834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com const uint8* src_u, 413834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com const uint8* src_v, 414834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com uint8* dst_rgb565, 415834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com int width) { 41615449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com asm volatile ( 4170bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(5) 4182c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d24}, [%5] \n" 4190bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(6) 4202c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d25}, [%6] \n" 42115449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com "vmov.u8 d26, #128 \n" 42215449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com "vmov.u16 q14, #74 \n" 42315449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com "vmov.u16 q15, #16 \n" 424c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 42515449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com "1: \n" 42615449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com READYUV422 42715449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com YUV422TORGB 42815449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com "subs %4, %4, #8 \n" 42911c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com ARGBTORGB565 4300bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(3) 43115449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com "vst1.8 {q0}, [%3]! \n" // store 8 pixels RGB565. 43215449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com "bgt 1b \n" 43315449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com : "+r"(src_y), // %0 43415449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com "+r"(src_u), // %1 43515449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com "+r"(src_v), // %2 43615449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com "+r"(dst_rgb565), // %3 43715449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com "+r"(width) // %4 43815449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com : "r"(&kUVToRB), // %5 43915449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com "r"(&kUVToG) // %6 44015449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", 44115449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 44215449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com ); 44315449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com} 44415449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com 44511c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com#define ARGBTOARGB1555 \ 44611c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vshr.u8 q10, q10, #3 \n" /* B */ \ 44711c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vshr.u8 d22, d22, #3 \n" /* R */ \ 44811c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vshr.u8 d23, d23, #7 \n" /* A */ \ 44911c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vmovl.u8 q8, d20 \n" /* B */ \ 45011c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vmovl.u8 q9, d21 \n" /* G */ \ 45111c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vmovl.u8 q10, d22 \n" /* R */ \ 45211c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vmovl.u8 q11, d23 \n" /* A */ \ 45311c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vshl.u16 q9, q9, #5 \n" /* G */ \ 45411c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vshl.u16 q10, q10, #10 \n" /* R */ \ 45511c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vshl.u16 q11, q11, #15 \n" /* A */ \ 45611c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vorr q0, q8, q9 \n" /* BG */ \ 45711c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vorr q1, q10, q11 \n" /* RA */ \ 45811c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vorr q0, q0, q1 \n" /* BGRA */ 45911c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com 46011c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.comvoid I422ToARGB1555Row_NEON(const uint8* src_y, 461834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com const uint8* src_u, 462834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com const uint8* src_v, 463834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com uint8* dst_argb1555, 464834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com int width) { 46511c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com asm volatile ( 4660bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(5) 4672c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d24}, [%5] \n" 4680bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(6) 4692c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d25}, [%6] \n" 47011c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vmov.u8 d26, #128 \n" 47111c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vmov.u16 q14, #74 \n" 47211c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vmov.u16 q15, #16 \n" 473c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 47411c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "1: \n" 47511c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com READYUV422 47611c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com YUV422TORGB 47711c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "subs %4, %4, #8 \n" 47811c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vmov.u8 d23, #255 \n" 47911c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com ARGBTOARGB1555 4800bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(3) 48111c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vst1.8 {q0}, [%3]! \n" // store 8 pixels ARGB1555. 48211c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "bgt 1b \n" 48311c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com : "+r"(src_y), // %0 48411c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "+r"(src_u), // %1 48511c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "+r"(src_v), // %2 48611c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "+r"(dst_argb1555), // %3 48711c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "+r"(width) // %4 48811c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com : "r"(&kUVToRB), // %5 48911c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "r"(&kUVToG) // %6 49011c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", 49111c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 49211c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com ); 49311c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com} 49411c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com 49511c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com#define ARGBTOARGB4444 \ 49611c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vshr.u8 d20, d20, #4 \n" /* B */ \ 49711c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vbic.32 d21, d21, d4 \n" /* G */ \ 49811c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vshr.u8 d22, d22, #4 \n" /* R */ \ 49911c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vbic.32 d23, d23, d4 \n" /* A */ \ 50011c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vorr d0, d20, d21 \n" /* BG */ \ 50111c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vorr d1, d22, d23 \n" /* RA */ \ 50211c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vzip.u8 d0, d1 \n" /* BGRA */ 50311c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com 50411c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.comvoid I422ToARGB4444Row_NEON(const uint8* src_y, 505834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com const uint8* src_u, 506834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com const uint8* src_v, 507834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com uint8* dst_argb4444, 508834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com int width) { 50911c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com asm volatile ( 5100bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(5) 5112c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d24}, [%5] \n" 5120bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(6) 5132c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d25}, [%6] \n" 51411c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vmov.u8 d26, #128 \n" 51511c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vmov.u16 q14, #74 \n" 51611c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vmov.u16 q15, #16 \n" 51711c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vmov.u8 d4, #0x0f \n" // bits to clear with vbic. 518c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 51911c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "1: \n" 52011c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com READYUV422 52111c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com YUV422TORGB 52211c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "subs %4, %4, #8 \n" 52311c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vmov.u8 d23, #255 \n" 52411c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com ARGBTOARGB4444 5250bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(3) 52611c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vst1.8 {q0}, [%3]! \n" // store 8 pixels ARGB4444. 52711c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "bgt 1b \n" 52811c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com : "+r"(src_y), // %0 52911c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "+r"(src_u), // %1 53011c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "+r"(src_v), // %2 53111c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "+r"(dst_argb4444), // %3 53211c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "+r"(width) // %4 53311c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com : "r"(&kUVToRB), // %5 53411c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "r"(&kUVToG) // %6 53511c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", 53611c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 53711c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com ); 53811c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com} 53911c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com 54000b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.comvoid YToARGBRow_NEON(const uint8* src_y, 54100b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com uint8* dst_argb, 54200b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com int width) { 54300b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com asm volatile ( 5440bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(3) 5450bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(3) 5462c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d24}, [%3] \n" 5470bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(4) 5482c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d25}, [%4] \n" 54900b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com "vmov.u8 d26, #128 \n" 55000b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com "vmov.u16 q14, #74 \n" 55100b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com "vmov.u16 q15, #16 \n" 552c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 55300b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com "1: \n" 55400b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com READYUV400 55500b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com YUV422TORGB 55600b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com "subs %2, %2, #8 \n" 55700b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com "vmov.u8 d23, #255 \n" 5580bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 55900b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com "vst4.8 {d20, d21, d22, d23}, [%1]! \n" 56000b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com "bgt 1b \n" 56100b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com : "+r"(src_y), // %0 56200b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com "+r"(dst_argb), // %1 56300b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com "+r"(width) // %2 56400b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com : "r"(&kUVToRB), // %3 56500b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com "r"(&kUVToG) // %4 56600b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", 56700b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 56800b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com ); 56900b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com} 57000b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com 57100b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.comvoid I400ToARGBRow_NEON(const uint8* src_y, 57200b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com uint8* dst_argb, 57300b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com int width) { 57400b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com asm volatile ( 575c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 57600b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com "vmov.u8 d23, #255 \n" 57700b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com "1: \n" 5780bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 5792c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d20}, [%0]! \n" 58000b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com "vmov d21, d20 \n" 58100b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com "vmov d22, d20 \n" 58200b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com "subs %2, %2, #8 \n" 5830bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 58400b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com "vst4.8 {d20, d21, d22, d23}, [%1]! \n" 58500b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com "bgt 1b \n" 58600b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com : "+r"(src_y), // %0 58700b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com "+r"(dst_argb), // %1 58800b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com "+r"(width) // %2 58900b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com : 59000b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com : "cc", "memory", "d20", "d21", "d22", "d23" 59100b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com ); 59200b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com} 59300b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com 5949de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.comvoid NV12ToARGBRow_NEON(const uint8* src_y, 5959de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com const uint8* src_uv, 5969de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com uint8* dst_argb, 5974807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com int width) { 5984807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com asm volatile ( 5990bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(4) 6002c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d24}, [%4] \n" 6010bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(5) 6022c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d25}, [%5] \n" 6034807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "vmov.u8 d26, #128 \n" 6044807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "vmov.u16 q14, #74 \n" 6054807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "vmov.u16 q15, #16 \n" 606c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 6074807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "1: \n" 6084807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com READNV12 6094807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com YUV422TORGB 6104807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "subs %3, %3, #8 \n" 6114807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "vmov.u8 d23, #255 \n" 6120bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(2) 6134807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "vst4.8 {d20, d21, d22, d23}, [%2]! \n" 6144807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "bgt 1b \n" 6159de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com : "+r"(src_y), // %0 6169de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(src_uv), // %1 6179de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(dst_argb), // %2 6189de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(width) // %3 6199de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com : "r"(&kUVToRB), // %4 6209de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "r"(&kUVToG) // %5 62164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", 62264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 6234807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com ); 6244807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com} 6254807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com 6269de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.comvoid NV21ToARGBRow_NEON(const uint8* src_y, 6279de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com const uint8* src_uv, 6289de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com uint8* dst_argb, 6294807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com int width) { 6304807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com asm volatile ( 6310bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(4) 6322c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d24}, [%4] \n" 6330bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(5) 6342c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d25}, [%5] \n" 6354807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "vmov.u8 d26, #128 \n" 6364807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "vmov.u16 q14, #74 \n" 6374807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "vmov.u16 q15, #16 \n" 638c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 6394807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "1: \n" 6404807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com READNV21 6414807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com YUV422TORGB 6424807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "subs %3, %3, #8 \n" 6434807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "vmov.u8 d23, #255 \n" 6440bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(2) 6454807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "vst4.8 {d20, d21, d22, d23}, [%2]! \n" 6464807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "bgt 1b \n" 6479de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com : "+r"(src_y), // %0 6489de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(src_uv), // %1 6499de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(dst_argb), // %2 6509de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(width) // %3 6519de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com : "r"(&kUVToRB), // %4 6529de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "r"(&kUVToG) // %5 65364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", 65464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 6554807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com ); 6564807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com} 6574807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com 6589f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.comvoid NV12ToRGB565Row_NEON(const uint8* src_y, 659bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com const uint8* src_uv, 660bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com uint8* dst_rgb565, 661bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com int width) { 6629f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com asm volatile ( 6630bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(4) 6642c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d24}, [%4] \n" 6650bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(5) 6662c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d25}, [%5] \n" 6679f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "vmov.u8 d26, #128 \n" 6689f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "vmov.u16 q14, #74 \n" 6699f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "vmov.u16 q15, #16 \n" 670c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 6719f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "1: \n" 6729f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com READNV12 6739f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com YUV422TORGB 6749f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "subs %3, %3, #8 \n" 6759f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com ARGBTORGB565 6760bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(2) 6779f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "vst1.8 {q0}, [%2]! \n" // store 8 pixels RGB565. 6789f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "bgt 1b \n" 6799f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com : "+r"(src_y), // %0 6809f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "+r"(src_uv), // %1 6819f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "+r"(dst_rgb565), // %2 6829f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "+r"(width) // %3 6839f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com : "r"(&kUVToRB), // %4 6849f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "r"(&kUVToG) // %5 6859f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", 6869f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 6879f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com ); 6889f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com} 6899f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com 6909f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.comvoid NV21ToRGB565Row_NEON(const uint8* src_y, 691bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com const uint8* src_uv, 692bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com uint8* dst_rgb565, 693bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com int width) { 6949f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com asm volatile ( 6950bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(4) 6962c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d24}, [%4] \n" 6970bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(5) 6982c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d25}, [%5] \n" 6999f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "vmov.u8 d26, #128 \n" 7009f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "vmov.u16 q14, #74 \n" 7019f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "vmov.u16 q15, #16 \n" 702c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 7039f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "1: \n" 7049f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com READNV21 7059f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com YUV422TORGB 7069f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "subs %3, %3, #8 \n" 7079f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com ARGBTORGB565 7080bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(2) 7099f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "vst1.8 {q0}, [%2]! \n" // store 8 pixels RGB565. 7109f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "bgt 1b \n" 7119f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com : "+r"(src_y), // %0 7129f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "+r"(src_uv), // %1 7139f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "+r"(dst_rgb565), // %2 7149f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "+r"(width) // %3 7159f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com : "r"(&kUVToRB), // %4 7169f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "r"(&kUVToG) // %5 7179f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", 7189f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 7199f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com ); 7209f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com} 7219f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com 722793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.comvoid YUY2ToARGBRow_NEON(const uint8* src_yuy2, 723793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com uint8* dst_argb, 724793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com int width) { 725793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com asm volatile ( 7260bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(3) 7272c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d24}, [%3] \n" 7280bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(4) 7292c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d25}, [%4] \n" 730793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "vmov.u8 d26, #128 \n" 731793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "vmov.u16 q14, #74 \n" 732793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "vmov.u16 q15, #16 \n" 733c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 734793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "1: \n" 735793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com READYUY2 736793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com YUV422TORGB 737793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "subs %2, %2, #8 \n" 738793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "vmov.u8 d23, #255 \n" 7390bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 740793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "vst4.8 {d20, d21, d22, d23}, [%1]! \n" 741793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "bgt 1b \n" 742793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com : "+r"(src_yuy2), // %0 743793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "+r"(dst_argb), // %1 744793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "+r"(width) // %2 745793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com : "r"(&kUVToRB), // %3 746793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "r"(&kUVToG) // %4 747793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", 748793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 749793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com ); 750793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com} 751793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com 752793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.comvoid UYVYToARGBRow_NEON(const uint8* src_uyvy, 753793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com uint8* dst_argb, 754793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com int width) { 755793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com asm volatile ( 7560bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(3) 7572c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d24}, [%3] \n" 7580bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(4) 7592c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d25}, [%4] \n" 760793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "vmov.u8 d26, #128 \n" 761793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "vmov.u16 q14, #74 \n" 762793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "vmov.u16 q15, #16 \n" 763c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 764793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "1: \n" 765793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com READUYVY 766793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com YUV422TORGB 767793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "subs %2, %2, #8 \n" 768793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "vmov.u8 d23, #255 \n" 7690bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 770793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "vst4.8 {d20, d21, d22, d23}, [%1]! \n" 771793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "bgt 1b \n" 772793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com : "+r"(src_uyvy), // %0 773793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "+r"(dst_argb), // %1 774793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "+r"(width) // %2 775793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com : "r"(&kUVToRB), // %3 776793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "r"(&kUVToG) // %4 777793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", 778793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 779793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com ); 780793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com} 781793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com 7824a86a836fcde981b6c3fd3f4a216a3253a2d26bcfbarchard@google.com// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v. 783f08ac6bb095348565b5259f2fab95f259ef47edefbarchard@google.comvoid SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, 784f08ac6bb095348565b5259f2fab95f259ef47edefbarchard@google.com int width) { 7855b22506b14367f38c1e31d1aab4dc1132158737ffbarchard@google.com asm volatile ( 786c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 7872d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com "1: \n" 7880bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 7892c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld2.8 {q0, q1}, [%0]! \n" // load 16 pairs of UV 7904807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "subs %3, %3, #16 \n" // 16 processed per loop 7910bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 7922c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {q0}, [%1]! \n" // store U 7930bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(2) 7942c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {q1}, [%2]! \n" // store V 79518184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com "bgt 1b \n" 7968536b2f389dea8f8b7177f4886d995e3315f12e8fbarchard@google.com : "+r"(src_uv), // %0 7978536b2f389dea8f8b7177f4886d995e3315f12e8fbarchard@google.com "+r"(dst_u), // %1 7988536b2f389dea8f8b7177f4886d995e3315f12e8fbarchard@google.com "+r"(dst_v), // %2 79916a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com "+r"(width) // %3 // Output registers 8002d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com : // Input registers 8018f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1" // Clobber List 8022d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com ); 8032d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com} 8042d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com 80562a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com// Reads 16 U's and V's and writes out 16 pairs of UV. 806f08ac6bb095348565b5259f2fab95f259ef47edefbarchard@google.comvoid MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv, 807f08ac6bb095348565b5259f2fab95f259ef47edefbarchard@google.com int width) { 80862a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com asm volatile ( 809c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 81062a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com "1: \n" 8110bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 8122c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q0}, [%0]! \n" // load U 8130bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 8142c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q1}, [%1]! \n" // load V 81562a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com "subs %3, %3, #16 \n" // 16 processed per loop 8160bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(2) 817d26b4514726a9a7476f6dfb6730cda2b422bf550fbarchard@google.com "vst2.u8 {q0, q1}, [%2]! \n" // store 16 pairs of UV 81862a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com "bgt 1b \n" 81962a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com : 82062a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com "+r"(src_u), // %0 82162a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com "+r"(src_v), // %1 82262a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com "+r"(dst_uv), // %2 82362a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com "+r"(width) // %3 // Output registers 82462a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com : // Input registers 8258f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1" // Clobber List 82662a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com ); 82762a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com} 828834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com 8292c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com// Copy multiple of 32. vld4.8 allow unaligned and is fastest on a15. 83019932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.comvoid CopyRow_NEON(const uint8* src, uint8* dst, int count) { 8315b22506b14367f38c1e31d1aab4dc1132158737ffbarchard@google.com asm volatile ( 832c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 83319932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com "1: \n" 8340bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 8352c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d0, d1, d2, d3}, [%0]! \n" // load 32 83662a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com "subs %2, %2, #32 \n" // 32 processed per loop 8370bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 8382c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {d0, d1, d2, d3}, [%1]! \n" // store 32 83918184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com "bgt 1b \n" 8403e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com : "+r"(src), // %0 8413e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "+r"(dst), // %1 8423e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "+r"(count) // %2 // Output registers 8433e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com : // Input registers 8448f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1" // Clobber List 84519932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com ); 84619932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com} 84719932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com 84864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com// SetRow8 writes 'count' bytes using a 32 bit value repeated. 849f08ac6bb095348565b5259f2fab95f259ef47edefbarchard@google.comvoid SetRow_NEON(uint8* dst, uint32 v32, int count) { 8503e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com asm volatile ( 8514807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "vdup.u32 q0, %2 \n" // duplicate 4 ints 8524807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "1: \n" 8534807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "subs %1, %1, #16 \n" // 16 bytes per loop 8540bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 8552c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {q0}, [%0]! \n" // store 8564807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "bgt 1b \n" 8573e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com : "+r"(dst), // %0 8583e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "+r"(count) // %1 8593e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com : "r"(v32) // %2 8608f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0" 8613e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com ); 8624807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com} 8634807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com 8644807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com// TODO(fbarchard): Make fully assembler 86564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com// SetRow32 writes 'count' words using a 32 bit value repeated. 866f08ac6bb095348565b5259f2fab95f259ef47edefbarchard@google.comvoid ARGBSetRows_NEON(uint8* dst, uint32 v32, int width, 8671b40d8caa2811759aa5fa87f2e23061d26f8968cfbarchard@google.com int dst_stride, int height) { 8684807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com for (int y = 0; y < height; ++y) { 869f08ac6bb095348565b5259f2fab95f259ef47edefbarchard@google.com SetRow_NEON(dst, v32, width << 2); 8704807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com dst += dst_stride; 8714807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com } 8724807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com} 8734807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com 87416a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.comvoid MirrorRow_NEON(const uint8* src, uint8* dst, int width) { 8755b22506b14367f38c1e31d1aab4dc1132158737ffbarchard@google.com asm volatile ( 8763e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com // Start at end of source row. 8773e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "mov r3, #-16 \n" 8783e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "add %0, %0, %2 \n" 8793e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "sub %0, #16 \n" 8803e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com 881c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 88282069e7a7108a956d0d78f7e923851925b2827d4fbarchard@google.com "1: \n" 8830bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 8843e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "vld1.8 {q0}, [%0], r3 \n" // src -= 16 8853e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "subs %2, #16 \n" // 16 pixels per loop. 8863e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "vrev64.8 q0, q0 \n" 8870bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 8883e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "vst1.8 {d1}, [%1]! \n" // dst += 16 8890bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 8903e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "vst1.8 {d0}, [%1]! \n" 8913e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "bgt 1b \n" 8923e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com : "+r"(src), // %0 8933e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "+r"(dst), // %1 8943e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "+r"(width) // %2 8953e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com : 8968f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "r3", "q0" 89716a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com ); 89816a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com} 89916a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com 900752cb9e057c8c36a251810e57f98f195196fedc6fbarchard@google.comvoid MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, 901752cb9e057c8c36a251810e57f98f195196fedc6fbarchard@google.com int width) { 9025b22506b14367f38c1e31d1aab4dc1132158737ffbarchard@google.com asm volatile ( 9033e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com // Start at end of source row. 904752cb9e057c8c36a251810e57f98f195196fedc6fbarchard@google.com "mov r12, #-16 \n" 9053e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "add %0, %0, %3, lsl #1 \n" 9063e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "sub %0, #16 \n" 9073e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com 908c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 90982069e7a7108a956d0d78f7e923851925b2827d4fbarchard@google.com "1: \n" 9100bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 911752cb9e057c8c36a251810e57f98f195196fedc6fbarchard@google.com "vld2.8 {d0, d1}, [%0], r12 \n" // src -= 16 9123e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "subs %3, #8 \n" // 8 pixels per loop. 9133e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "vrev64.8 q0, q0 \n" 9140bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 9153e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "vst1.8 {d0}, [%1]! \n" // dst += 8 9160bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(2) 9173e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "vst1.8 {d1}, [%2]! \n" 9183e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "bgt 1b \n" 9193e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com : "+r"(src_uv), // %0 9203e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "+r"(dst_u), // %1 9213e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "+r"(dst_v), // %2 9223e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "+r"(width) // %3 9233e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com : 9248f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "r12", "q0" 92516a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com ); 92616a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com} 9273e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com 9283e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.comvoid ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) { 9293e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com asm volatile ( 9303e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com // Start at end of source row. 9313e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "mov r3, #-16 \n" 9323e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "add %0, %0, %2, lsl #2 \n" 9333e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "sub %0, #16 \n" 9343e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com 935c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 9363e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "1: \n" 9370bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 9383e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "vld1.8 {q0}, [%0], r3 \n" // src -= 16 9393e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "subs %2, #4 \n" // 4 pixels per loop. 9403e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "vrev64.32 q0, q0 \n" 9410bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 9423e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "vst1.8 {d1}, [%1]! \n" // dst += 16 9430bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 9443e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "vst1.8 {d0}, [%1]! \n" 9453e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "bgt 1b \n" 9463e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com : "+r"(src), // %0 9473e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "+r"(dst), // %1 9483e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "+r"(width) // %2 9493e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com : 9508f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "r3", "q0" 9513e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com ); 9523e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com} 95316a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com 954797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.comvoid RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix) { 955797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com asm volatile ( 956275913410d879df91edd1648dfee19207870264bfbarchard@google.com "vmov.u8 d4, #255 \n" // Alpha 957c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 958797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com "1: \n" 9590bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 960275913410d879df91edd1648dfee19207870264bfbarchard@google.com "vld3.8 {d1, d2, d3}, [%0]! \n" // load 8 pixels of RGB24. 96182069e7a7108a956d0d78f7e923851925b2827d4fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 9620bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 9634807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "vst4.8 {d1, d2, d3, d4}, [%1]! \n" // store 8 pixels of ARGB. 964797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com "bgt 1b \n" 965797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com : "+r"(src_rgb24), // %0 966dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "+r"(dst_argb), // %1 967dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "+r"(pix) // %2 968797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com : 9698f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List 970797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com ); 971797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com} 972797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com 973797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.comvoid RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int pix) { 974797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com asm volatile ( 975275913410d879df91edd1648dfee19207870264bfbarchard@google.com "vmov.u8 d4, #255 \n" // Alpha 976c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 977797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com "1: \n" 9780bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 979275913410d879df91edd1648dfee19207870264bfbarchard@google.com "vld3.8 {d1, d2, d3}, [%0]! \n" // load 8 pixels of RAW. 9804807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 981275913410d879df91edd1648dfee19207870264bfbarchard@google.com "vswp.u8 d1, d3 \n" // swap R, B 9820bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 983275913410d879df91edd1648dfee19207870264bfbarchard@google.com "vst4.8 {d1, d2, d3, d4}, [%1]! \n" // store 8 pixels of ARGB. 984797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com "bgt 1b \n" 985dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com : "+r"(src_raw), // %0 986797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com "+r"(dst_argb), // %1 987797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com "+r"(pix) // %2 988797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com : 9898f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List 990797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com ); 991797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com} 992797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com 993bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com#define RGB565TOARGB \ 994f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vshrn.u16 d6, q0, #5 \n" /* G xxGGGGGG */ \ 995f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vuzp.u8 d0, d1 \n" /* d0 xxxBBBBB RRRRRxxx */ \ 996f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vshl.u8 d6, d6, #2 \n" /* G GGGGGG00 upper 6 */ \ 997f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vshr.u8 d1, d1, #3 \n" /* R 000RRRRR lower 5 */ \ 998f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vshl.u8 q0, q0, #3 \n" /* B,R BBBBB000 upper 5 */ \ 999f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vshr.u8 q2, q0, #5 \n" /* B,R 00000BBB lower 3 */ \ 1000f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vorr.u8 d0, d0, d4 \n" /* B */ \ 1001f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vshr.u8 d4, d6, #6 \n" /* G 000000GG lower 2 */ \ 1002f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vorr.u8 d2, d1, d5 \n" /* R */ \ 1003f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vorr.u8 d1, d4, d6 \n" /* G */ 1004bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com 1005bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.comvoid RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int pix) { 1006bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com asm volatile ( 1007bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d3, #255 \n" // Alpha 1008c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 1009bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "1: \n" 10100bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 10114b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vld1.8 {q0}, [%0]! \n" // load 8 RGB565 pixels. 1012bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 1013bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com RGB565TOARGB 10140bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 1015bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB. 1016bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "bgt 1b \n" 1017bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com : "+r"(src_rgb565), // %0 1018bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "+r"(dst_argb), // %1 1019bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "+r"(pix) // %2 1020bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com : 10218f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List 1022bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com ); 1023bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com} 1024bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com 10254b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com#define ARGB1555TOARGB \ 10264b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vshrn.u16 d7, q0, #8 \n" /* A Arrrrrxx */ \ 10274b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vshr.u8 d6, d7, #2 \n" /* R xxxRRRRR */ \ 10284b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vshrn.u16 d5, q0, #5 \n" /* G xxxGGGGG */ \ 10294b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vmovn.u16 d4, q0 \n" /* B xxxBBBBB */ \ 10304b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vshr.u8 d7, d7, #7 \n" /* A 0000000A */ \ 10314b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vneg.s8 d7, d7 \n" /* A AAAAAAAA upper 8 */ \ 10324b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vshl.u8 d6, d6, #3 \n" /* R RRRRR000 upper 5 */ \ 10334b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vshr.u8 q1, q3, #5 \n" /* R,A 00000RRR lower 3 */ \ 10344b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vshl.u8 q0, q2, #3 \n" /* B,G BBBBB000 upper 5 */ \ 10354b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vshr.u8 q2, q0, #5 \n" /* B,G 00000BBB lower 3 */ \ 10364b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vorr.u8 q1, q1, q3 \n" /* R,A */ \ 10374b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vorr.u8 q0, q0, q2 \n" /* B,G */ \ 10384b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com 1039522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com// RGB555TOARGB is same as ARGB1555TOARGB but ignores alpha. 1040522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com#define RGB555TOARGB \ 1041522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vshrn.u16 d6, q0, #5 \n" /* G xxxGGGGG */ \ 1042522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vuzp.u8 d0, d1 \n" /* d0 xxxBBBBB xRRRRRxx */ \ 1043522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vshl.u8 d6, d6, #3 \n" /* G GGGGG000 upper 5 */ \ 1044522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vshr.u8 d1, d1, #2 \n" /* R 00xRRRRR lower 5 */ \ 1045522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vshl.u8 q0, q0, #3 \n" /* B,R BBBBB000 upper 5 */ \ 1046522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vshr.u8 q2, q0, #5 \n" /* B,R 00000BBB lower 3 */ \ 1047522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vorr.u8 d0, d0, d4 \n" /* B */ \ 1048522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vshr.u8 d4, d6, #5 \n" /* G 00000GGG lower 3 */ \ 1049522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vorr.u8 d2, d1, d5 \n" /* R */ \ 1050522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vorr.u8 d1, d4, d6 \n" /* G */ 1051522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com 10524b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.comvoid ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb, 10534b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com int pix) { 10544b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com asm volatile ( 10554b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vmov.u8 d3, #255 \n" // Alpha 1056c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 10574b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "1: \n" 10580bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 10594b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vld1.8 {q0}, [%0]! \n" // load 8 ARGB1555 pixels. 10604b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 10614b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com ARGB1555TOARGB 10620bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 10634b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB. 10644b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "bgt 1b \n" 10654b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com : "+r"(src_argb1555), // %0 10664b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "+r"(dst_argb), // %1 10674b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "+r"(pix) // %2 10684b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com : 10698f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List 10704b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com ); 10714b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com} 10724b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com 10734b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com#define ARGB4444TOARGB \ 10744b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vuzp.u8 d0, d1 \n" /* d0 BG, d1 RA */ \ 10754b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vshl.u8 q2, q0, #4 \n" /* B,R BBBB0000 */ \ 10764b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vshr.u8 q1, q0, #4 \n" /* G,A 0000GGGG */ \ 10774b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vshr.u8 q0, q2, #4 \n" /* B,R 0000BBBB */ \ 10784b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vorr.u8 q0, q0, q2 \n" /* B,R BBBBBBBB */ \ 10794b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vshl.u8 q2, q1, #4 \n" /* G,A GGGG0000 */ \ 10804b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vorr.u8 q1, q1, q2 \n" /* G,A GGGGGGGG */ \ 10814b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vswp.u8 d1, d2 \n" /* B,R,G,A -> B,G,R,A */ 10824b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com 10834b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.comvoid ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb, 10844b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com int pix) { 10854b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com asm volatile ( 10864b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vmov.u8 d3, #255 \n" // Alpha 1087c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 10884b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "1: \n" 10890bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 10904b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vld1.8 {q0}, [%0]! \n" // load 8 ARGB4444 pixels. 10914b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 10924b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com ARGB4444TOARGB 10930bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 10944b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB. 10954b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "bgt 1b \n" 10964b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com : "+r"(src_argb4444), // %0 10974b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "+r"(dst_argb), // %1 10984b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "+r"(pix) // %2 10994b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com : 11008f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1", "q2" // Clobber List 11014b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com ); 11024b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com} 11034b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com 110464961c01b200a77b4af9629bf1215358ec056f0afbarchard@google.comvoid ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb24, int pix) { 110564961c01b200a77b4af9629bf1215358ec056f0afbarchard@google.com asm volatile ( 1106c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 110764961c01b200a77b4af9629bf1215358ec056f0afbarchard@google.com "1: \n" 11080bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 1109275913410d879df91edd1648dfee19207870264bfbarchard@google.com "vld4.8 {d1, d2, d3, d4}, [%0]! \n" // load 8 pixels of ARGB. 111082069e7a7108a956d0d78f7e923851925b2827d4fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 11110bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 11124807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "vst3.8 {d1, d2, d3}, [%1]! \n" // store 8 pixels of RGB24. 111364961c01b200a77b4af9629bf1215358ec056f0afbarchard@google.com "bgt 1b \n" 111464961c01b200a77b4af9629bf1215358ec056f0afbarchard@google.com : "+r"(src_argb), // %0 111564961c01b200a77b4af9629bf1215358ec056f0afbarchard@google.com "+r"(dst_rgb24), // %1 111664961c01b200a77b4af9629bf1215358ec056f0afbarchard@google.com "+r"(pix) // %2 111764961c01b200a77b4af9629bf1215358ec056f0afbarchard@google.com : 11188f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List 111964961c01b200a77b4af9629bf1215358ec056f0afbarchard@google.com ); 112064961c01b200a77b4af9629bf1215358ec056f0afbarchard@google.com} 112164961c01b200a77b4af9629bf1215358ec056f0afbarchard@google.com 11225808cb22ce60bf963e15bfb1a0958cb362f5efbcfbarchard@google.comvoid ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_raw, int pix) { 11235808cb22ce60bf963e15bfb1a0958cb362f5efbcfbarchard@google.com asm volatile ( 1124c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 11255808cb22ce60bf963e15bfb1a0958cb362f5efbcfbarchard@google.com "1: \n" 11260bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 1127275913410d879df91edd1648dfee19207870264bfbarchard@google.com "vld4.8 {d1, d2, d3, d4}, [%0]! \n" // load 8 pixels of ARGB. 11284807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 1129275913410d879df91edd1648dfee19207870264bfbarchard@google.com "vswp.u8 d1, d3 \n" // swap R, B 11300bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 1131275913410d879df91edd1648dfee19207870264bfbarchard@google.com "vst3.8 {d1, d2, d3}, [%1]! \n" // store 8 pixels of RAW. 11325808cb22ce60bf963e15bfb1a0958cb362f5efbcfbarchard@google.com "bgt 1b \n" 11335808cb22ce60bf963e15bfb1a0958cb362f5efbcfbarchard@google.com : "+r"(src_argb), // %0 11345808cb22ce60bf963e15bfb1a0958cb362f5efbcfbarchard@google.com "+r"(dst_raw), // %1 11355808cb22ce60bf963e15bfb1a0958cb362f5efbcfbarchard@google.com "+r"(pix) // %2 11365808cb22ce60bf963e15bfb1a0958cb362f5efbcfbarchard@google.com : 11378f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List 11385808cb22ce60bf963e15bfb1a0958cb362f5efbcfbarchard@google.com ); 11395808cb22ce60bf963e15bfb1a0958cb362f5efbcfbarchard@google.com} 11405808cb22ce60bf963e15bfb1a0958cb362f5efbcfbarchard@google.com 1141dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.comvoid YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int pix) { 1142dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com asm volatile ( 1143c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 1144dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "1: \n" 11450bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 11462c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld2.8 {q0, q1}, [%0]! \n" // load 16 pixels of YUY2. 114782069e7a7108a956d0d78f7e923851925b2827d4fbarchard@google.com "subs %2, %2, #16 \n" // 16 processed per loop. 11480bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 11492c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {q0}, [%1]! \n" // store 16 pixels of Y. 1150dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "bgt 1b \n" 1151dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com : "+r"(src_yuy2), // %0 1152dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "+r"(dst_y), // %1 1153dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "+r"(pix) // %2 1154dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com : 11558f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1" // Clobber List 1156dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com ); 1157dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com} 1158dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com 1159dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.comvoid UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int pix) { 1160dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com asm volatile ( 1161c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 1162dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "1: \n" 11630bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 11642c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld2.8 {q0, q1}, [%0]! \n" // load 16 pixels of UYVY. 116582069e7a7108a956d0d78f7e923851925b2827d4fbarchard@google.com "subs %2, %2, #16 \n" // 16 processed per loop. 11660bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 11672c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {q1}, [%1]! \n" // store 16 pixels of Y. 1168dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "bgt 1b \n" 1169dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com : "+r"(src_uyvy), // %0 1170dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "+r"(dst_y), // %1 1171dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "+r"(pix) // %2 1172dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com : 11738f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1" // Clobber List 1174dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com ); 1175dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com} 1176dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com 1177dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.comvoid YUY2ToUV422Row_NEON(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v, 1178dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com int pix) { 1179dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com asm volatile ( 1180c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 1181dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "1: \n" 11820bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 1183dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of YUY2. 11844807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "subs %3, %3, #16 \n" // 16 pixels = 8 UVs. 11850bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 11862c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {d1}, [%1]! \n" // store 8 U. 11870bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(2) 11882c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {d3}, [%2]! \n" // store 8 V. 1189dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "bgt 1b \n" 1190dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com : "+r"(src_yuy2), // %0 1191dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "+r"(dst_u), // %1 1192dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "+r"(dst_v), // %2 1193dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "+r"(pix) // %3 1194dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com : 11958f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "d0", "d1", "d2", "d3" // Clobber List 1196dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com ); 1197dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com} 1198dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com 1199dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.comvoid UYVYToUV422Row_NEON(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v, 1200dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com int pix) { 1201dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com asm volatile ( 1202c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 1203dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "1: \n" 12040bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 1205dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of UYVY. 12064807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "subs %3, %3, #16 \n" // 16 pixels = 8 UVs. 12070bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 12082c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {d0}, [%1]! \n" // store 8 U. 12090bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(2) 12102c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {d2}, [%2]! \n" // store 8 V. 1211dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "bgt 1b \n" 1212dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com : "+r"(src_uyvy), // %0 1213dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "+r"(dst_u), // %1 1214dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "+r"(dst_v), // %2 1215dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "+r"(pix) // %3 1216dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com : 12178f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "d0", "d1", "d2", "d3" // Clobber List 1218dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com ); 1219dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com} 1220dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com 1221dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.comvoid YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2, 1222dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com uint8* dst_u, uint8* dst_v, int pix) { 1223dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com asm volatile ( 1224dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com "add %1, %0, %1 \n" // stride + src_yuy2 1225c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 1226dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "1: \n" 12270bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 1228dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of YUY2. 12294807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "subs %4, %4, #16 \n" // 16 pixels = 8 UVs. 12300bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 1231dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load next row YUY2. 1232dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "vrhadd.u8 d1, d1, d5 \n" // average rows of U 1233dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "vrhadd.u8 d3, d3, d7 \n" // average rows of V 12340bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(2) 12352c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {d1}, [%2]! \n" // store 8 U. 12360bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(3) 12372c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {d3}, [%3]! \n" // store 8 V. 1238dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "bgt 1b \n" 1239cc206dee46f09369d5b4dad79ec662fd4289959cfbarchard@google.com : "+r"(src_yuy2), // %0 1240dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "+r"(stride_yuy2), // %1 1241cc206dee46f09369d5b4dad79ec662fd4289959cfbarchard@google.com "+r"(dst_u), // %2 1242cc206dee46f09369d5b4dad79ec662fd4289959cfbarchard@google.com "+r"(dst_v), // %3 1243cc206dee46f09369d5b4dad79ec662fd4289959cfbarchard@google.com "+r"(pix) // %4 1244dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com : 12458f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7" // Clobber List 1246dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com ); 1247dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com} 1248dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com 1249dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.comvoid UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy, 1250dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com uint8* dst_u, uint8* dst_v, int pix) { 1251dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com asm volatile ( 1252dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com "add %1, %0, %1 \n" // stride + src_uyvy 1253c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 1254dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "1: \n" 12550bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 1256dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of UYVY. 12574807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "subs %4, %4, #16 \n" // 16 pixels = 8 UVs. 12580bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 1259dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load next row UYVY. 1260dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "vrhadd.u8 d0, d0, d4 \n" // average rows of U 1261dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "vrhadd.u8 d2, d2, d6 \n" // average rows of V 12620bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(2) 12632c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {d0}, [%2]! \n" // store 8 U. 12640bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(3) 12652c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {d2}, [%3]! \n" // store 8 V. 1266dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "bgt 1b \n" 1267cc206dee46f09369d5b4dad79ec662fd4289959cfbarchard@google.com : "+r"(src_uyvy), // %0 1268dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "+r"(stride_uyvy), // %1 1269cc206dee46f09369d5b4dad79ec662fd4289959cfbarchard@google.com "+r"(dst_u), // %2 1270cc206dee46f09369d5b4dad79ec662fd4289959cfbarchard@google.com "+r"(dst_v), // %3 1271cc206dee46f09369d5b4dad79ec662fd4289959cfbarchard@google.com "+r"(pix) // %4 1272dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com : 12738f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7" // Clobber List 1274dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com ); 1275dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com} 12764807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com 1277e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.comvoid HalfRow_NEON(const uint8* src_uv, int src_uv_stride, 1278e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com uint8* dst_uv, int pix) { 1279e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com asm volatile ( 1280e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com // change the stride to row 2 pointer 1281e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com "add %1, %0 \n" 1282f658aebb7c2e072ad5b94ad82b94b8299649bdf2fbarchard@google.com "1: \n" 12830bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 12842c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q0}, [%0]! \n" // load row 1 16 pixels. 1285e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com "subs %3, %3, #16 \n" // 16 processed per loop 12860bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 12872c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q1}, [%1]! \n" // load row 2 16 pixels. 1288e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com "vrhadd.u8 q0, q1 \n" // average row 1 and 2 12890bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(2) 12902c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {q0}, [%2]! \n" 1291e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com "bgt 1b \n" 12923e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com : "+r"(src_uv), // %0 12933e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "+r"(src_uv_stride), // %1 12943e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "+r"(dst_uv), // %2 12953e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "+r"(pix) // %3 12963e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com : 12978f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1" // Clobber List 12983e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com ); 1299e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com} 1300e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com 13018d37dd5c205216e0ad13c5091061908cb981c5f9fbarchard@google.com// Select 2 channels from ARGB on alternating pixels. e.g. BGBGBGBG 13021096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.comvoid ARGBToBayerRow_NEON(const uint8* src_argb, uint8* dst_bayer, 13031096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com uint32 selector, int pix) { 1304f658aebb7c2e072ad5b94ad82b94b8299649bdf2fbarchard@google.com asm volatile ( 1305c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vmov.u32 d6[0], %3 \n" // selector 1306f658aebb7c2e072ad5b94ad82b94b8299649bdf2fbarchard@google.com "1: \n" 13070bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 13082c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q0, q1}, [%0]! \n" // load row 8 pixels. 1309c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop 1310c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vtbl.8 d4, {d0, d1}, d6 \n" // look up 4 pixels 1311c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vtbl.8 d5, {d2, d3}, d6 \n" // look up 4 pixels 1312c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vtrn.u32 d4, d5 \n" // combine 8 pixels 13130bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 13142c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {d4}, [%1]! \n" // store 8. 1315f658aebb7c2e072ad5b94ad82b94b8299649bdf2fbarchard@google.com "bgt 1b \n" 13164f4c8b544cf0d4c30cca0baaa19d8be29c79ed2ffbarchard@google.com : "+r"(src_argb), // %0 13174f4c8b544cf0d4c30cca0baaa19d8be29c79ed2ffbarchard@google.com "+r"(dst_bayer), // %1 13184f4c8b544cf0d4c30cca0baaa19d8be29c79ed2ffbarchard@google.com "+r"(pix) // %2 13194f4c8b544cf0d4c30cca0baaa19d8be29c79ed2ffbarchard@google.com : "r"(selector) // %3 1320c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List 13211096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com ); 13221096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com} 13231096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com 132408b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com// Select G channels from ARGB. e.g. GGGGGGGG 132508b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.comvoid ARGBToBayerGGRow_NEON(const uint8* src_argb, uint8* dst_bayer, 132608b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com uint32 /*selector*/, int pix) { 132708b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com asm volatile ( 132808b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com "1: \n" 13290bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 133008b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load row 8 pixels. 133108b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop 13320bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 133308b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com "vst1.8 {d1}, [%1]! \n" // store 8 G's. 133408b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com "bgt 1b \n" 133508b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com : "+r"(src_argb), // %0 133608b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com "+r"(dst_bayer), // %1 133708b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com "+r"(pix) // %2 133808b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com : 133908b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com : "cc", "memory", "q0", "q1" // Clobber List 134008b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com ); 134108b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com} 134208b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com 13431096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. 13441096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.comvoid ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb, 13451096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com const uint8* shuffler, int pix) { 13461096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com asm volatile ( 13470bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(3) 13482c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q2}, [%3] \n" // shuffler 13491096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com "1: \n" 13500bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 13512c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q0}, [%0]! \n" // load 4 pixels. 13521096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com "subs %2, %2, #4 \n" // 4 processed per loop 13531096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com "vtbl.8 d2, {d0, d1}, d4 \n" // look up 2 first pixels 13541096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com "vtbl.8 d3, {d0, d1}, d5 \n" // look up 2 next pixels 13550bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 13562c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {q1}, [%1]! \n" // store 4. 13571096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com "bgt 1b \n" 13581096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com : "+r"(src_argb), // %0 13591096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com "+r"(dst_argb), // %1 13601096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com "+r"(pix) // %2 13611096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com : "r"(shuffler) // %3 13621096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com : "cc", "memory", "q0", "q1", "q2" // Clobber List 13633e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com ); 13648d37dd5c205216e0ad13c5091061908cb981c5f9fbarchard@google.com} 13658d37dd5c205216e0ad13c5091061908cb981c5f9fbarchard@google.com 13669de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.comvoid I422ToYUY2Row_NEON(const uint8* src_y, 13679de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com const uint8* src_u, 13689de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com const uint8* src_v, 13699de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com uint8* dst_yuy2, int width) { 13709de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com asm volatile ( 1371c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 13729de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "1: \n" 13730bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 13749de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "vld2.8 {d0, d2}, [%0]! \n" // load 16 Ys 13750bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 13769de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "vld1.8 {d1}, [%1]! \n" // load 8 Us 13770bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(2) 13789de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "vld1.8 {d3}, [%2]! \n" // load 8 Vs 13799de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "subs %4, %4, #16 \n" // 16 pixels 13800bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(3) 13812c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst4.8 {d0, d1, d2, d3}, [%3]! \n" // Store 8 YUY2/16 pixels. 13829de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "bgt 1b \n" 13833e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com : "+r"(src_y), // %0 13843e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "+r"(src_u), // %1 13853e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "+r"(src_v), // %2 13863e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "+r"(dst_yuy2), // %3 13873e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "+r"(width) // %4 13883e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com : 13893e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com : "cc", "memory", "d0", "d1", "d2", "d3" 13909de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com ); 13919de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com} 13929de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com 13939de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.comvoid I422ToUYVYRow_NEON(const uint8* src_y, 13949de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com const uint8* src_u, 13959de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com const uint8* src_v, 13969de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com uint8* dst_uyvy, int width) { 13979de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com asm volatile ( 1398c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 13999de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "1: \n" 14000bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 14019de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "vld2.8 {d1, d3}, [%0]! \n" // load 16 Ys 14020bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 14039de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "vld1.8 {d0}, [%1]! \n" // load 8 Us 14040bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(2) 14059de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "vld1.8 {d2}, [%2]! \n" // load 8 Vs 14069de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "subs %4, %4, #16 \n" // 16 pixels 14070bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(3) 14082c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst4.8 {d0, d1, d2, d3}, [%3]! \n" // Store 8 UYVY/16 pixels. 14099de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "bgt 1b \n" 14103e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com : "+r"(src_y), // %0 14113e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "+r"(src_u), // %1 14123e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "+r"(src_v), // %2 14133e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "+r"(dst_uyvy), // %3 14143e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "+r"(width) // %4 14153e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com : 14163e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com : "cc", "memory", "d0", "d1", "d2", "d3" 14179de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com ); 14189de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com} 14199de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com 14201bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.comvoid ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb565, int pix) { 14211bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com asm volatile ( 1422c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 14231bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com "1: \n" 14240bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 142511c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vld4.8 {d20, d21, d22, d23}, [%0]! \n" // load 8 pixels of ARGB. 14261bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 142711c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com ARGBTORGB565 14280bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 14291bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com "vst1.8 {q0}, [%1]! \n" // store 8 pixels RGB565. 14301bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com "bgt 1b \n" 14311bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com : "+r"(src_argb), // %0 14321bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com "+r"(dst_rgb565), // %1 14331bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com "+r"(pix) // %2 14341bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com : 14358f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q8", "q9", "q10", "q11" 14361bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com ); 14371bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com} 14381bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com 14391bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.comvoid ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_argb1555, 14401bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com int pix) { 14411bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com asm volatile ( 1442c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 14431bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com "1: \n" 14440bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 144511c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vld4.8 {d20, d21, d22, d23}, [%0]! \n" // load 8 pixels of ARGB. 14461bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 144711c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com ARGBTOARGB1555 14480bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 14491bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com "vst1.8 {q0}, [%1]! \n" // store 8 pixels ARGB1555. 14501bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com "bgt 1b \n" 14511bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com : "+r"(src_argb), // %0 14521bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com "+r"(dst_argb1555), // %1 14531bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com "+r"(pix) // %2 14541bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com : 14558f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q8", "q9", "q10", "q11" 14561bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com ); 14571bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com} 14581bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com 1459bb6bddc9fb4aea694ef26d7761d9fbcba8f5b6c1fbarchard@google.comvoid ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_argb4444, 1460bb6bddc9fb4aea694ef26d7761d9fbcba8f5b6c1fbarchard@google.com int pix) { 1461c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com asm volatile ( 1462c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com "vmov.u8 d4, #0x0f \n" // bits to clear with vbic. 1463c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 1464c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com "1: \n" 14650bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 146611c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vld4.8 {d20, d21, d22, d23}, [%0]! \n" // load 8 pixels of ARGB. 1467c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 146811c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com ARGBTOARGB4444 14690bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 1470c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com "vst1.8 {q0}, [%1]! \n" // store 8 pixels ARGB4444. 1471c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com "bgt 1b \n" 1472bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com : "+r"(src_argb), // %0 1473c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com "+r"(dst_argb4444), // %1 1474bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "+r"(pix) // %2 1475c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com : 14768f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q8", "q9", "q10", "q11" 1477c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com ); 1478c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com} 14790908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com 14800908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.comvoid ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) { 14810908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com asm volatile ( 1482bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient 1483bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient 1484bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient 1485bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d27, #16 \n" // Add 16 constant 1486c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 1487bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "1: \n" 14880bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 14891dee6250936424ced8722329369da75935d61580fbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. 1490bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 1491bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmull.u8 q2, d0, d24 \n" // B 1492bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmlal.u8 q2, d1, d25 \n" // G 1493bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmlal.u8 q2, d2, d26 \n" // R 1494bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y 1495bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vqadd.u8 d0, d27 \n" 14960bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 1497bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. 1498bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "bgt 1b \n" 1499bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com : "+r"(src_argb), // %0 1500bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "+r"(dst_y), // %1 1501bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "+r"(pix) // %2 1502bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com : 15038f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q12", "q13" 1504bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com ); 1505bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com} 1506bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com 1507cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.comvoid ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) { 1508cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com asm volatile ( 1509050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "vmov.u8 d24, #15 \n" // B * 0.11400 coefficient 1510050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "vmov.u8 d25, #75 \n" // G * 0.58700 coefficient 1511050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "vmov.u8 d26, #38 \n" // R * 0.29900 coefficient 1512c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 1513cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com "1: \n" 15140bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 1515cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. 1516cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 1517cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com "vmull.u8 q2, d0, d24 \n" // B 1518cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com "vmlal.u8 q2, d1, d25 \n" // G 1519cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com "vmlal.u8 q2, d2, d26 \n" // R 1520050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "vqrshrun.s16 d0, q2, #7 \n" // 15 bit to 8 bit Y 15210bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 1522cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. 1523cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com "bgt 1b \n" 1524cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com : "+r"(src_argb), // %0 1525cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com "+r"(dst_y), // %1 1526cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com "+r"(pix) // %2 1527cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com : 1528cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q12", "q13" 1529cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com ); 1530cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com} 1531cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com 1532c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com// 8x1 pixels. 1533c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.comvoid ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, 1534c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com int pix) { 1535c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com asm volatile ( 1536c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vmov.u8 d24, #112 \n" // UB / VR 0.875 coefficient 1537c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vmov.u8 d25, #74 \n" // UG -0.5781 coefficient 1538c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vmov.u8 d26, #38 \n" // UR -0.2969 coefficient 1539c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vmov.u8 d27, #18 \n" // VB -0.1406 coefficient 1540c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vmov.u8 d28, #94 \n" // VG -0.7344 coefficient 1541c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vmov.u16 q15, #0x8080 \n" // 128.5 1542c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 1543c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com "1: \n" 15440bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 1545c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. 1546c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com "subs %3, %3, #8 \n" // 8 processed per loop. 1547c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com "vmull.u8 q2, d0, d24 \n" // B 1548c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com "vmlsl.u8 q2, d1, d25 \n" // G 1549c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com "vmlsl.u8 q2, d2, d26 \n" // R 1550c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com "vadd.u16 q2, q2, q15 \n" // +128 -> unsigned 1551c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com 1552c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com "vmull.u8 q3, d2, d24 \n" // R 1553c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com "vmlsl.u8 q3, d1, d28 \n" // G 1554c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com "vmlsl.u8 q3, d0, d27 \n" // B 1555c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com "vadd.u16 q3, q3, q15 \n" // +128 -> unsigned 1556c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com 1557c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vqshrn.u16 d0, q2, #8 \n" // 16 bit to 8 bit U 1558c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vqshrn.u16 d1, q3, #8 \n" // 16 bit to 8 bit V 1559c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com 15600bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 1561c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com "vst1.8 {d0}, [%1]! \n" // store 8 pixels U. 15620bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(2) 1563c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vst1.8 {d1}, [%2]! \n" // store 8 pixels V. 1564c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com "bgt 1b \n" 1565c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com : "+r"(src_argb), // %0 1566c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com "+r"(dst_u), // %1 1567c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com "+r"(dst_v), // %2 1568c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com "+r"(pix) // %3 1569c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com : 15708f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q12", "q13", "q14", "q15" 1571c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com ); 1572c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com} 1573c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com 1574c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com// 16x1 pixels -> 8x1. pix is number of argb pixels. e.g. 16. 1575c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.comvoid ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, 1576c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com int pix) { 1577c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com asm volatile ( 1578c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient 1579c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient 1580c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient 1581c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient 158276e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient 1583c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vmov.u16 q15, #0x8080 \n" // 128.5 1584c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 1585c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "1: \n" 15860bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 1587c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. 15880bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 1589c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels. 1590c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com 1591c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts. 1592c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. 1593c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts. 1594c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com 1595c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "subs %3, %3, #16 \n" // 16 processed per loop. 1596c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vmul.s16 q8, q0, q10 \n" // B 1597c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vmls.s16 q8, q1, q11 \n" // G 1598c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vmls.s16 q8, q2, q12 \n" // R 1599c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned 1600c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com 1601c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vmul.s16 q9, q2, q10 \n" // R 1602c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vmls.s16 q9, q1, q14 \n" // G 1603c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vmls.s16 q9, q0, q13 \n" // B 1604c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned 1605c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com 1606c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U 1607c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V 1608c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com 16090bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 1610c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vst1.8 {d0}, [%1]! \n" // store 8 pixels U. 16110bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(2) 1612c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vst1.8 {d1}, [%2]! \n" // store 8 pixels V. 1613c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "bgt 1b \n" 1614c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com : "+r"(src_argb), // %0 1615c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "+r"(dst_u), // %1 1616c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "+r"(dst_v), // %2 1617c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "+r"(pix) // %3 1618c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com : 16198f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", 1620c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 1621c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com ); 1622c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com} 1623c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com 162476e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com// 32x1 pixels -> 8x1. pix is number of argb pixels. e.g. 32. 162576e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.comvoid ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, 162676e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com int pix) { 162776e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com asm volatile ( 162806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient 162906ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient 163006ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient 163106ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient 163206ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient 163376e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vmov.u16 q15, #0x8080 \n" // 128.5 1634c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 163576e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "1: \n" 16360bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 163776e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. 16380bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 163976e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels. 164076e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts. 164176e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. 164276e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts. 16430bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 164476e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vld4.8 {d8, d10, d12, d14}, [%0]! \n" // load 8 more ARGB pixels. 16450bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 164676e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vld4.8 {d9, d11, d13, d15}, [%0]! \n" // load last 8 ARGB pixels. 164776e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vpaddl.u8 q4, q4 \n" // B 16 bytes -> 8 shorts. 164876e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vpaddl.u8 q5, q5 \n" // G 16 bytes -> 8 shorts. 164976e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vpaddl.u8 q6, q6 \n" // R 16 bytes -> 8 shorts. 165006ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com 165176e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vpadd.u16 d0, d0, d1 \n" // B 16 shorts -> 8 shorts. 165276e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vpadd.u16 d1, d8, d9 \n" // B 165376e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vpadd.u16 d2, d2, d3 \n" // G 16 shorts -> 8 shorts. 165476e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vpadd.u16 d3, d10, d11 \n" // G 165576e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vpadd.u16 d4, d4, d5 \n" // R 16 shorts -> 8 shorts. 165676e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vpadd.u16 d5, d12, d13 \n" // R 165706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com 165806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q0, q0, #1 \n" // 2x average 165906ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q1, q1, #1 \n" 166006ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q2, q2, #1 \n" 166106ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com 166276e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "subs %3, %3, #32 \n" // 32 processed per loop. 166376e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vmul.s16 q8, q0, q10 \n" // B 166476e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vmls.s16 q8, q1, q11 \n" // G 166576e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vmls.s16 q8, q2, q12 \n" // R 166676e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned 166776e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vmul.s16 q9, q2, q10 \n" // R 166876e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vmls.s16 q9, q1, q14 \n" // G 166976e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vmls.s16 q9, q0, q13 \n" // B 167076e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned 167176e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U 167276e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V 16730bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 167476e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vst1.8 {d0}, [%1]! \n" // store 8 pixels U. 16750bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(2) 167676e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vst1.8 {d1}, [%2]! \n" // store 8 pixels V. 167776e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "bgt 1b \n" 167876e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com : "+r"(src_argb), // %0 167976e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "+r"(dst_u), // %1 168076e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "+r"(dst_v), // %2 168176e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "+r"(pix) // %3 168276e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com : 16838f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", 168476e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 168576e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com ); 168676e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com} 168776e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com 1688dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com// 16x2 pixels -> 8x1. pix is number of argb pixels. e.g. 16. 168995730719503137a7db61a105bec02220f9ed159efbarchard@google.com#define RGBTOUV(QB, QG, QR) \ 169095730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vmul.s16 q8, " #QB ", q10 \n" /* B */ \ 169195730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vmls.s16 q8, " #QG ", q11 \n" /* G */ \ 169295730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vmls.s16 q8, " #QR ", q12 \n" /* R */ \ 1693522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vadd.u16 q8, q8, q15 \n" /* +128 -> unsigned */ \ 169495730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vmul.s16 q9, " #QR ", q10 \n" /* R */ \ 169595730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vmls.s16 q9, " #QG ", q14 \n" /* G */ \ 169695730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vmls.s16 q9, " #QB ", q13 \n" /* B */ \ 1697522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vadd.u16 q9, q9, q15 \n" /* +128 -> unsigned */ \ 1698522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vqshrn.u16 d0, q8, #8 \n" /* 16 bit to 8 bit U */ \ 1699522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vqshrn.u16 d1, q9, #8 \n" /* 16 bit to 8 bit V */ 1700522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com 170106ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com// TODO(fbarchard): Consider vhadd vertical, then vpaddl horizontal, avoid shr. 1702dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.comvoid ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb, 1703dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com uint8* dst_u, uint8* dst_v, int pix) { 170476e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com asm volatile ( 1705dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com "add %1, %0, %1 \n" // src_stride + src_argb 170606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient 170706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient 170806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient 170906ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient 171006ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient 171176e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vmov.u16 q15, #0x8080 \n" // 128.5 1712c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 171376e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "1: \n" 17140bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 1715dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. 17160bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 1717dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels. 1718dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts. 1719dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. 1720dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts. 17210bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 1722dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more ARGB pixels. 17230bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 1724dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 ARGB pixels. 1725dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com "vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts. 1726dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts. 1727dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com "vpadal.u8 q2, q6 \n" // R 16 bytes -> 8 shorts. 172806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com 172906ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q0, q0, #1 \n" // 2x average 173006ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q1, q1, #1 \n" 173106ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q2, q2, #1 \n" 173206ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com 1733dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com "subs %4, %4, #16 \n" // 32 processed per loop. 173495730719503137a7db61a105bec02220f9ed159efbarchard@google.com RGBTOUV(q0, q1, q2) 17350bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(2) 1736dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. 17370bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(3) 1738dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. 173976e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "bgt 1b \n" 174076e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com : "+r"(src_argb), // %0 1741dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com "+r"(src_stride_argb), // %1 1742dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com "+r"(dst_u), // %2 1743dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com "+r"(dst_v), // %3 1744dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com "+r"(pix) // %4 174576e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com : 17468f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", 174776e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 174876e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com ); 174976e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com} 175095730719503137a7db61a105bec02220f9ed159efbarchard@google.com 1751050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com// TODO(fbarchard): Subsample match C code. 1752050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.comvoid ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb, 1753050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com uint8* dst_u, uint8* dst_v, int pix) { 1754050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com asm volatile ( 1755050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "add %1, %0, %1 \n" // src_stride + src_argb 175606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q10, #127 / 2 \n" // UB / VR 0.500 coefficient 175706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q11, #84 / 2 \n" // UG -0.33126 coefficient 175806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q12, #43 / 2 \n" // UR -0.16874 coefficient 175906ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q13, #20 / 2 \n" // VB -0.08131 coefficient 176006ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q14, #107 / 2 \n" // VG -0.41869 coefficient 1761050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "vmov.u16 q15, #0x8080 \n" // 128.5 1762c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 1763050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "1: \n" 17640bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 1765050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. 17660bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 1767050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels. 1768050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts. 1769050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. 1770050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts. 17710bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 1772050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more ARGB pixels. 17730bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 1774050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 ARGB pixels. 1775050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts. 1776050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts. 1777050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "vpadal.u8 q2, q6 \n" // R 16 bytes -> 8 shorts. 177806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com 177906ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q0, q0, #1 \n" // 2x average 178006ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q1, q1, #1 \n" 178106ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q2, q2, #1 \n" 178206ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com 1783050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "subs %4, %4, #16 \n" // 32 processed per loop. 1784050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com RGBTOUV(q0, q1, q2) 17850bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(2) 1786050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. 17870bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(3) 1788050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. 1789050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "bgt 1b \n" 1790050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com : "+r"(src_argb), // %0 1791050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "+r"(src_stride_argb), // %1 1792050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "+r"(dst_u), // %2 1793050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "+r"(dst_v), // %3 1794050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "+r"(pix) // %4 1795050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com : 1796050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", 1797050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 1798050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com ); 1799050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com} 1800050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com 180195730719503137a7db61a105bec02220f9ed159efbarchard@google.comvoid BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra, 180295730719503137a7db61a105bec02220f9ed159efbarchard@google.com uint8* dst_u, uint8* dst_v, int pix) { 180395730719503137a7db61a105bec02220f9ed159efbarchard@google.com asm volatile ( 180495730719503137a7db61a105bec02220f9ed159efbarchard@google.com "add %1, %0, %1 \n" // src_stride + src_bgra 180506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient 180606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient 180706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient 180806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient 180906ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient 181095730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vmov.u16 q15, #0x8080 \n" // 128.5 1811c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 181295730719503137a7db61a105bec02220f9ed159efbarchard@google.com "1: \n" 18130bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 181495730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 BGRA pixels. 18150bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 181695730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 BGRA pixels. 181795730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpaddl.u8 q3, q3 \n" // B 16 bytes -> 8 shorts. 181895730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpaddl.u8 q2, q2 \n" // G 16 bytes -> 8 shorts. 181995730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpaddl.u8 q1, q1 \n" // R 16 bytes -> 8 shorts. 18200bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 182195730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more BGRA pixels. 18220bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 182395730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 BGRA pixels. 182495730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpadal.u8 q3, q7 \n" // B 16 bytes -> 8 shorts. 182595730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpadal.u8 q2, q6 \n" // G 16 bytes -> 8 shorts. 182695730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpadal.u8 q1, q5 \n" // R 16 bytes -> 8 shorts. 182706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com 182806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q1, q1, #1 \n" // 2x average 182906ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q2, q2, #1 \n" 183006ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q3, q3, #1 \n" 183106ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com 183295730719503137a7db61a105bec02220f9ed159efbarchard@google.com "subs %4, %4, #16 \n" // 32 processed per loop. 183395730719503137a7db61a105bec02220f9ed159efbarchard@google.com RGBTOUV(q3, q2, q1) 18340bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(2) 183595730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. 18360bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(3) 183795730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. 183895730719503137a7db61a105bec02220f9ed159efbarchard@google.com "bgt 1b \n" 183995730719503137a7db61a105bec02220f9ed159efbarchard@google.com : "+r"(src_bgra), // %0 184095730719503137a7db61a105bec02220f9ed159efbarchard@google.com "+r"(src_stride_bgra), // %1 184195730719503137a7db61a105bec02220f9ed159efbarchard@google.com "+r"(dst_u), // %2 184295730719503137a7db61a105bec02220f9ed159efbarchard@google.com "+r"(dst_v), // %3 184395730719503137a7db61a105bec02220f9ed159efbarchard@google.com "+r"(pix) // %4 184495730719503137a7db61a105bec02220f9ed159efbarchard@google.com : 18458f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", 184695730719503137a7db61a105bec02220f9ed159efbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 184795730719503137a7db61a105bec02220f9ed159efbarchard@google.com ); 184895730719503137a7db61a105bec02220f9ed159efbarchard@google.com} 184995730719503137a7db61a105bec02220f9ed159efbarchard@google.com 185095730719503137a7db61a105bec02220f9ed159efbarchard@google.comvoid ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr, 185195730719503137a7db61a105bec02220f9ed159efbarchard@google.com uint8* dst_u, uint8* dst_v, int pix) { 185295730719503137a7db61a105bec02220f9ed159efbarchard@google.com asm volatile ( 185395730719503137a7db61a105bec02220f9ed159efbarchard@google.com "add %1, %0, %1 \n" // src_stride + src_abgr 185406ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient 185506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient 185606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient 185706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient 185806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient 185995730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vmov.u16 q15, #0x8080 \n" // 128.5 1860c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 186195730719503137a7db61a105bec02220f9ed159efbarchard@google.com "1: \n" 18620bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 186395730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ABGR pixels. 18640bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 186595730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ABGR pixels. 186695730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpaddl.u8 q2, q2 \n" // B 16 bytes -> 8 shorts. 186795730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. 186895730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpaddl.u8 q0, q0 \n" // R 16 bytes -> 8 shorts. 18690bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 187095730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more ABGR pixels. 18710bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 187295730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 ABGR pixels. 187395730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpadal.u8 q2, q6 \n" // B 16 bytes -> 8 shorts. 187495730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts. 187595730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpadal.u8 q0, q4 \n" // R 16 bytes -> 8 shorts. 187606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com 187706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q0, q0, #1 \n" // 2x average 187806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q1, q1, #1 \n" 187906ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q2, q2, #1 \n" 188006ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com 188195730719503137a7db61a105bec02220f9ed159efbarchard@google.com "subs %4, %4, #16 \n" // 32 processed per loop. 188295730719503137a7db61a105bec02220f9ed159efbarchard@google.com RGBTOUV(q2, q1, q0) 18830bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(2) 188495730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. 18850bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(3) 188695730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. 188795730719503137a7db61a105bec02220f9ed159efbarchard@google.com "bgt 1b \n" 188895730719503137a7db61a105bec02220f9ed159efbarchard@google.com : "+r"(src_abgr), // %0 188995730719503137a7db61a105bec02220f9ed159efbarchard@google.com "+r"(src_stride_abgr), // %1 189095730719503137a7db61a105bec02220f9ed159efbarchard@google.com "+r"(dst_u), // %2 189195730719503137a7db61a105bec02220f9ed159efbarchard@google.com "+r"(dst_v), // %3 189295730719503137a7db61a105bec02220f9ed159efbarchard@google.com "+r"(pix) // %4 189395730719503137a7db61a105bec02220f9ed159efbarchard@google.com : 18948f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", 189595730719503137a7db61a105bec02220f9ed159efbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 189695730719503137a7db61a105bec02220f9ed159efbarchard@google.com ); 189795730719503137a7db61a105bec02220f9ed159efbarchard@google.com} 189895730719503137a7db61a105bec02220f9ed159efbarchard@google.com 189995730719503137a7db61a105bec02220f9ed159efbarchard@google.comvoid RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba, 190095730719503137a7db61a105bec02220f9ed159efbarchard@google.com uint8* dst_u, uint8* dst_v, int pix) { 190195730719503137a7db61a105bec02220f9ed159efbarchard@google.com asm volatile ( 190295730719503137a7db61a105bec02220f9ed159efbarchard@google.com "add %1, %0, %1 \n" // src_stride + src_rgba 190306ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient 190406ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient 190506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient 190606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient 190706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient 190895730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vmov.u16 q15, #0x8080 \n" // 128.5 1909c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 191095730719503137a7db61a105bec02220f9ed159efbarchard@google.com "1: \n" 19110bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 191295730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 RGBA pixels. 19130bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 191495730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 RGBA pixels. 191595730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpaddl.u8 q0, q1 \n" // B 16 bytes -> 8 shorts. 191695730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpaddl.u8 q1, q2 \n" // G 16 bytes -> 8 shorts. 191795730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpaddl.u8 q2, q3 \n" // R 16 bytes -> 8 shorts. 19180bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 191995730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more RGBA pixels. 19200bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 192195730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 RGBA pixels. 192295730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpadal.u8 q0, q5 \n" // B 16 bytes -> 8 shorts. 192395730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpadal.u8 q1, q6 \n" // G 16 bytes -> 8 shorts. 192495730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpadal.u8 q2, q7 \n" // R 16 bytes -> 8 shorts. 192506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com 192606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q0, q0, #1 \n" // 2x average 192706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q1, q1, #1 \n" 192806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q2, q2, #1 \n" 192906ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com 193095730719503137a7db61a105bec02220f9ed159efbarchard@google.com "subs %4, %4, #16 \n" // 32 processed per loop. 193195730719503137a7db61a105bec02220f9ed159efbarchard@google.com RGBTOUV(q0, q1, q2) 19320bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(2) 193395730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. 19340bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(3) 193595730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. 193695730719503137a7db61a105bec02220f9ed159efbarchard@google.com "bgt 1b \n" 193795730719503137a7db61a105bec02220f9ed159efbarchard@google.com : "+r"(src_rgba), // %0 193895730719503137a7db61a105bec02220f9ed159efbarchard@google.com "+r"(src_stride_rgba), // %1 193995730719503137a7db61a105bec02220f9ed159efbarchard@google.com "+r"(dst_u), // %2 194095730719503137a7db61a105bec02220f9ed159efbarchard@google.com "+r"(dst_v), // %3 194195730719503137a7db61a105bec02220f9ed159efbarchard@google.com "+r"(pix) // %4 194295730719503137a7db61a105bec02220f9ed159efbarchard@google.com : 19438f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", 194495730719503137a7db61a105bec02220f9ed159efbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 194595730719503137a7db61a105bec02220f9ed159efbarchard@google.com ); 194695730719503137a7db61a105bec02220f9ed159efbarchard@google.com} 194795730719503137a7db61a105bec02220f9ed159efbarchard@google.com 194895730719503137a7db61a105bec02220f9ed159efbarchard@google.comvoid RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24, 194995730719503137a7db61a105bec02220f9ed159efbarchard@google.com uint8* dst_u, uint8* dst_v, int pix) { 195095730719503137a7db61a105bec02220f9ed159efbarchard@google.com asm volatile ( 195195730719503137a7db61a105bec02220f9ed159efbarchard@google.com "add %1, %0, %1 \n" // src_stride + src_rgb24 195206ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient 195306ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient 195406ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient 195506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient 195606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient 195795730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vmov.u16 q15, #0x8080 \n" // 128.5 1958c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 195995730719503137a7db61a105bec02220f9ed159efbarchard@google.com "1: \n" 19600bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 196195730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vld3.8 {d0, d2, d4}, [%0]! \n" // load 8 RGB24 pixels. 19620bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 196395730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vld3.8 {d1, d3, d5}, [%0]! \n" // load next 8 RGB24 pixels. 196495730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts. 196595730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. 196695730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts. 19670bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 196895730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vld3.8 {d8, d10, d12}, [%1]! \n" // load 8 more RGB24 pixels. 19690bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 197095730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vld3.8 {d9, d11, d13}, [%1]! \n" // load last 8 RGB24 pixels. 197195730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts. 197295730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts. 197395730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpadal.u8 q2, q6 \n" // R 16 bytes -> 8 shorts. 197406ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com 197506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q0, q0, #1 \n" // 2x average 197606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q1, q1, #1 \n" 197706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q2, q2, #1 \n" 197806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com 197995730719503137a7db61a105bec02220f9ed159efbarchard@google.com "subs %4, %4, #16 \n" // 32 processed per loop. 198095730719503137a7db61a105bec02220f9ed159efbarchard@google.com RGBTOUV(q0, q1, q2) 19810bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(2) 198295730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. 19830bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(3) 198495730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. 198595730719503137a7db61a105bec02220f9ed159efbarchard@google.com "bgt 1b \n" 198695730719503137a7db61a105bec02220f9ed159efbarchard@google.com : "+r"(src_rgb24), // %0 198795730719503137a7db61a105bec02220f9ed159efbarchard@google.com "+r"(src_stride_rgb24), // %1 198895730719503137a7db61a105bec02220f9ed159efbarchard@google.com "+r"(dst_u), // %2 198995730719503137a7db61a105bec02220f9ed159efbarchard@google.com "+r"(dst_v), // %3 199095730719503137a7db61a105bec02220f9ed159efbarchard@google.com "+r"(pix) // %4 199195730719503137a7db61a105bec02220f9ed159efbarchard@google.com : 19928f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", 199395730719503137a7db61a105bec02220f9ed159efbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 199495730719503137a7db61a105bec02220f9ed159efbarchard@google.com ); 199595730719503137a7db61a105bec02220f9ed159efbarchard@google.com} 199695730719503137a7db61a105bec02220f9ed159efbarchard@google.com 199795730719503137a7db61a105bec02220f9ed159efbarchard@google.comvoid RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw, 199895730719503137a7db61a105bec02220f9ed159efbarchard@google.com uint8* dst_u, uint8* dst_v, int pix) { 199995730719503137a7db61a105bec02220f9ed159efbarchard@google.com asm volatile ( 200095730719503137a7db61a105bec02220f9ed159efbarchard@google.com "add %1, %0, %1 \n" // src_stride + src_raw 200106ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient 200206ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient 200306ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient 200406ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient 200506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient 200695730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vmov.u16 q15, #0x8080 \n" // 128.5 2007c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 200895730719503137a7db61a105bec02220f9ed159efbarchard@google.com "1: \n" 20090bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 201095730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vld3.8 {d0, d2, d4}, [%0]! \n" // load 8 RAW pixels. 20110bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 201295730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vld3.8 {d1, d3, d5}, [%0]! \n" // load next 8 RAW pixels. 201395730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpaddl.u8 q2, q2 \n" // B 16 bytes -> 8 shorts. 201495730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. 201595730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpaddl.u8 q0, q0 \n" // R 16 bytes -> 8 shorts. 20160bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 201795730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vld3.8 {d8, d10, d12}, [%1]! \n" // load 8 more RAW pixels. 20180bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 201995730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vld3.8 {d9, d11, d13}, [%1]! \n" // load last 8 RAW pixels. 202095730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpadal.u8 q2, q6 \n" // B 16 bytes -> 8 shorts. 202195730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts. 202295730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpadal.u8 q0, q4 \n" // R 16 bytes -> 8 shorts. 202306ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com 202406ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q0, q0, #1 \n" // 2x average 202506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q1, q1, #1 \n" 202606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q2, q2, #1 \n" 202706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com 202895730719503137a7db61a105bec02220f9ed159efbarchard@google.com "subs %4, %4, #16 \n" // 32 processed per loop. 202995730719503137a7db61a105bec02220f9ed159efbarchard@google.com RGBTOUV(q2, q1, q0) 20300bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(2) 203195730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. 20320bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(3) 203395730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. 203495730719503137a7db61a105bec02220f9ed159efbarchard@google.com "bgt 1b \n" 203595730719503137a7db61a105bec02220f9ed159efbarchard@google.com : "+r"(src_raw), // %0 203695730719503137a7db61a105bec02220f9ed159efbarchard@google.com "+r"(src_stride_raw), // %1 203795730719503137a7db61a105bec02220f9ed159efbarchard@google.com "+r"(dst_u), // %2 203895730719503137a7db61a105bec02220f9ed159efbarchard@google.com "+r"(dst_v), // %3 203995730719503137a7db61a105bec02220f9ed159efbarchard@google.com "+r"(pix) // %4 204095730719503137a7db61a105bec02220f9ed159efbarchard@google.com : 20418f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", 204295730719503137a7db61a105bec02220f9ed159efbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 204395730719503137a7db61a105bec02220f9ed159efbarchard@google.com ); 204495730719503137a7db61a105bec02220f9ed159efbarchard@google.com} 204595730719503137a7db61a105bec02220f9ed159efbarchard@google.com 2046f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com// 16x2 pixels -> 8x1. pix is number of argb pixels. e.g. 16. 2047f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.comvoid RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565, 2048f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com uint8* dst_u, uint8* dst_v, int pix) { 2049f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com asm volatile ( 2050f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "add %1, %0, %1 \n" // src_stride + src_argb 205106ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient 205206ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient 205306ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient 205406ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient 205506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient 2056f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vmov.u16 q15, #0x8080 \n" // 128.5 2057c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 2058f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "1: \n" 20590bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 2060f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vld1.8 {q0}, [%0]! \n" // load 8 RGB565 pixels. 2061f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com RGB565TOARGB 2062f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vpaddl.u8 d8, d0 \n" // B 8 bytes -> 4 shorts. 2063f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vpaddl.u8 d10, d1 \n" // G 8 bytes -> 4 shorts. 2064f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vpaddl.u8 d12, d2 \n" // R 8 bytes -> 4 shorts. 20650bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 2066f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vld1.8 {q0}, [%0]! \n" // next 8 RGB565 pixels. 2067f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com RGB565TOARGB 2068f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vpaddl.u8 d9, d0 \n" // B 8 bytes -> 4 shorts. 2069f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vpaddl.u8 d11, d1 \n" // G 8 bytes -> 4 shorts. 2070f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vpaddl.u8 d13, d2 \n" // R 8 bytes -> 4 shorts. 2071f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com 20720bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 2073f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vld1.8 {q0}, [%1]! \n" // load 8 RGB565 pixels. 2074f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com RGB565TOARGB 2075f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vpadal.u8 d8, d0 \n" // B 8 bytes -> 4 shorts. 2076f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vpadal.u8 d10, d1 \n" // G 8 bytes -> 4 shorts. 2077f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vpadal.u8 d12, d2 \n" // R 8 bytes -> 4 shorts. 20780bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 2079f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vld1.8 {q0}, [%1]! \n" // next 8 RGB565 pixels. 2080f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com RGB565TOARGB 2081f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vpadal.u8 d9, d0 \n" // B 8 bytes -> 4 shorts. 2082f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vpadal.u8 d11, d1 \n" // G 8 bytes -> 4 shorts. 2083f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vpadal.u8 d13, d2 \n" // R 8 bytes -> 4 shorts. 2084f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com 208506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q4, q4, #1 \n" // 2x average 208606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q5, q5, #1 \n" 208706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q6, q6, #1 \n" 208806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com 2089f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "subs %4, %4, #16 \n" // 16 processed per loop. 2090f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vmul.s16 q8, q4, q10 \n" // B 2091f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vmls.s16 q8, q5, q11 \n" // G 2092f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vmls.s16 q8, q6, q12 \n" // R 2093f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned 2094f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vmul.s16 q9, q6, q10 \n" // R 2095f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vmls.s16 q9, q5, q14 \n" // G 2096f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vmls.s16 q9, q4, q13 \n" // B 2097f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned 2098f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U 2099f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V 21000bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(2) 2101f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. 21020bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(3) 2103f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. 2104f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "bgt 1b \n" 2105f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com : "+r"(src_rgb565), // %0 2106f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "+r"(src_stride_rgb565), // %1 2107f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "+r"(dst_u), // %2 2108f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "+r"(dst_v), // %3 2109f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "+r"(pix) // %4 2110f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com : 21118f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", 2112f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 2113f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com ); 2114f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com} 2115522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com 2116522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com// 16x2 pixels -> 8x1. pix is number of argb pixels. e.g. 16. 2117522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.comvoid ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555, 2118522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com uint8* dst_u, uint8* dst_v, int pix) { 2119522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com asm volatile ( 2120522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "add %1, %0, %1 \n" // src_stride + src_argb 212106ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient 212206ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient 212306ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient 212406ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient 212506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient 2126522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vmov.u16 q15, #0x8080 \n" // 128.5 2127c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 2128522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "1: \n" 21290bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 2130522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vld1.8 {q0}, [%0]! \n" // load 8 ARGB1555 pixels. 2131522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com RGB555TOARGB 2132522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpaddl.u8 d8, d0 \n" // B 8 bytes -> 4 shorts. 2133522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpaddl.u8 d10, d1 \n" // G 8 bytes -> 4 shorts. 2134522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpaddl.u8 d12, d2 \n" // R 8 bytes -> 4 shorts. 21350bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 2136522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vld1.8 {q0}, [%0]! \n" // next 8 ARGB1555 pixels. 2137522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com RGB555TOARGB 2138522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpaddl.u8 d9, d0 \n" // B 8 bytes -> 4 shorts. 2139522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpaddl.u8 d11, d1 \n" // G 8 bytes -> 4 shorts. 2140522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpaddl.u8 d13, d2 \n" // R 8 bytes -> 4 shorts. 2141522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com 21420bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 2143522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vld1.8 {q0}, [%1]! \n" // load 8 ARGB1555 pixels. 2144522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com RGB555TOARGB 2145522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpadal.u8 d8, d0 \n" // B 8 bytes -> 4 shorts. 2146522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpadal.u8 d10, d1 \n" // G 8 bytes -> 4 shorts. 2147522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpadal.u8 d12, d2 \n" // R 8 bytes -> 4 shorts. 21480bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 2149522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vld1.8 {q0}, [%1]! \n" // next 8 ARGB1555 pixels. 2150522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com RGB555TOARGB 2151522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpadal.u8 d9, d0 \n" // B 8 bytes -> 4 shorts. 2152522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpadal.u8 d11, d1 \n" // G 8 bytes -> 4 shorts. 2153522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpadal.u8 d13, d2 \n" // R 8 bytes -> 4 shorts. 2154522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com 215506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q4, q4, #1 \n" // 2x average 215606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q5, q5, #1 \n" 215706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q6, q6, #1 \n" 215806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com 2159522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "subs %4, %4, #16 \n" // 16 processed per loop. 2160522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vmul.s16 q8, q4, q10 \n" // B 2161522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vmls.s16 q8, q5, q11 \n" // G 2162522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vmls.s16 q8, q6, q12 \n" // R 2163522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned 2164522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vmul.s16 q9, q6, q10 \n" // R 2165522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vmls.s16 q9, q5, q14 \n" // G 2166522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vmls.s16 q9, q4, q13 \n" // B 2167522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned 2168522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U 2169522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V 21700bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(2) 2171522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. 21720bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(3) 2173522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. 2174522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "bgt 1b \n" 2175522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com : "+r"(src_argb1555), // %0 2176522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "+r"(src_stride_argb1555), // %1 2177522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "+r"(dst_u), // %2 2178522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "+r"(dst_v), // %3 2179522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "+r"(pix) // %4 2180522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com : 21818f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", 2182522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 2183522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com ); 2184522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com} 2185522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com 2186522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com// 16x2 pixels -> 8x1. pix is number of argb pixels. e.g. 16. 2187522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.comvoid ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444, 2188522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com uint8* dst_u, uint8* dst_v, int pix) { 2189522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com asm volatile ( 2190522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "add %1, %0, %1 \n" // src_stride + src_argb 219106ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient 219206ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient 219306ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient 219406ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient 219506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient 2196522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vmov.u16 q15, #0x8080 \n" // 128.5 2197c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 2198522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "1: \n" 21990bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 2200522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vld1.8 {q0}, [%0]! \n" // load 8 ARGB4444 pixels. 2201522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com ARGB4444TOARGB 2202522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpaddl.u8 d8, d0 \n" // B 8 bytes -> 4 shorts. 2203522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpaddl.u8 d10, d1 \n" // G 8 bytes -> 4 shorts. 2204522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpaddl.u8 d12, d2 \n" // R 8 bytes -> 4 shorts. 22050bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 2206522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vld1.8 {q0}, [%0]! \n" // next 8 ARGB4444 pixels. 2207522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com ARGB4444TOARGB 2208522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpaddl.u8 d9, d0 \n" // B 8 bytes -> 4 shorts. 2209522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpaddl.u8 d11, d1 \n" // G 8 bytes -> 4 shorts. 2210522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpaddl.u8 d13, d2 \n" // R 8 bytes -> 4 shorts. 2211522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com 22120bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 2213522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vld1.8 {q0}, [%1]! \n" // load 8 ARGB4444 pixels. 2214522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com ARGB4444TOARGB 2215522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpadal.u8 d8, d0 \n" // B 8 bytes -> 4 shorts. 2216522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpadal.u8 d10, d1 \n" // G 8 bytes -> 4 shorts. 2217522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpadal.u8 d12, d2 \n" // R 8 bytes -> 4 shorts. 22180bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 2219522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vld1.8 {q0}, [%1]! \n" // next 8 ARGB4444 pixels. 2220522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com ARGB4444TOARGB 2221522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpadal.u8 d9, d0 \n" // B 8 bytes -> 4 shorts. 2222522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpadal.u8 d11, d1 \n" // G 8 bytes -> 4 shorts. 2223522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpadal.u8 d13, d2 \n" // R 8 bytes -> 4 shorts. 2224522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com 222506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q4, q4, #1 \n" // 2x average 222606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q5, q5, #1 \n" 222706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q6, q6, #1 \n" 222806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com 2229522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "subs %4, %4, #16 \n" // 16 processed per loop. 2230522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vmul.s16 q8, q4, q10 \n" // B 2231522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vmls.s16 q8, q5, q11 \n" // G 2232522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vmls.s16 q8, q6, q12 \n" // R 2233522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned 2234522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vmul.s16 q9, q6, q10 \n" // R 2235522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vmls.s16 q9, q5, q14 \n" // G 2236522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vmls.s16 q9, q4, q13 \n" // B 2237522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned 2238522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U 2239522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V 22400bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(2) 2241522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. 22420bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(3) 2243522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. 2244522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "bgt 1b \n" 2245522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com : "+r"(src_argb4444), // %0 2246522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "+r"(src_stride_argb4444), // %1 2247522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "+r"(dst_u), // %2 2248522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "+r"(dst_v), // %3 2249522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "+r"(pix) // %4 2250522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com : 22518f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", 2252522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 2253522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com ); 2254522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com} 2255f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com 2256bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.comvoid RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int pix) { 2257bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com asm volatile ( 2258bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient 2259bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient 2260bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient 2261bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d27, #16 \n" // Add 16 constant 2262c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 2263bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "1: \n" 22640bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 22651dee6250936424ced8722329369da75935d61580fbarchard@google.com "vld1.8 {q0}, [%0]! \n" // load 8 RGB565 pixels. 2266bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 2267bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com RGB565TOARGB 2268bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmull.u8 q2, d0, d24 \n" // B 2269bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmlal.u8 q2, d1, d25 \n" // G 2270bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmlal.u8 q2, d2, d26 \n" // R 2271bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y 2272bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vqadd.u8 d0, d27 \n" 22730bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 2274bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. 2275bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "bgt 1b \n" 2276bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com : "+r"(src_rgb565), // %0 2277bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "+r"(dst_y), // %1 2278bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "+r"(pix) // %2 2279bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com : 22808f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13" 2281bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com ); 2282bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com} 2283bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com 22841dee6250936424ced8722329369da75935d61580fbarchard@google.comvoid ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int pix) { 22851dee6250936424ced8722329369da75935d61580fbarchard@google.com asm volatile ( 22861dee6250936424ced8722329369da75935d61580fbarchard@google.com "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient 22871dee6250936424ced8722329369da75935d61580fbarchard@google.com "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient 22881dee6250936424ced8722329369da75935d61580fbarchard@google.com "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient 22891dee6250936424ced8722329369da75935d61580fbarchard@google.com "vmov.u8 d27, #16 \n" // Add 16 constant 2290c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 22911dee6250936424ced8722329369da75935d61580fbarchard@google.com "1: \n" 22920bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 22931dee6250936424ced8722329369da75935d61580fbarchard@google.com "vld1.8 {q0}, [%0]! \n" // load 8 ARGB1555 pixels. 22941dee6250936424ced8722329369da75935d61580fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 22951dee6250936424ced8722329369da75935d61580fbarchard@google.com ARGB1555TOARGB 22961dee6250936424ced8722329369da75935d61580fbarchard@google.com "vmull.u8 q2, d0, d24 \n" // B 22971dee6250936424ced8722329369da75935d61580fbarchard@google.com "vmlal.u8 q2, d1, d25 \n" // G 22981dee6250936424ced8722329369da75935d61580fbarchard@google.com "vmlal.u8 q2, d2, d26 \n" // R 22991dee6250936424ced8722329369da75935d61580fbarchard@google.com "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y 23001dee6250936424ced8722329369da75935d61580fbarchard@google.com "vqadd.u8 d0, d27 \n" 23010bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 23021dee6250936424ced8722329369da75935d61580fbarchard@google.com "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. 23031dee6250936424ced8722329369da75935d61580fbarchard@google.com "bgt 1b \n" 23041dee6250936424ced8722329369da75935d61580fbarchard@google.com : "+r"(src_argb1555), // %0 23051dee6250936424ced8722329369da75935d61580fbarchard@google.com "+r"(dst_y), // %1 23061dee6250936424ced8722329369da75935d61580fbarchard@google.com "+r"(pix) // %2 23071dee6250936424ced8722329369da75935d61580fbarchard@google.com : 23088f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13" 23091dee6250936424ced8722329369da75935d61580fbarchard@google.com ); 23101dee6250936424ced8722329369da75935d61580fbarchard@google.com} 23111dee6250936424ced8722329369da75935d61580fbarchard@google.com 23121dee6250936424ced8722329369da75935d61580fbarchard@google.comvoid ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int pix) { 23131dee6250936424ced8722329369da75935d61580fbarchard@google.com asm volatile ( 23141dee6250936424ced8722329369da75935d61580fbarchard@google.com "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient 23151dee6250936424ced8722329369da75935d61580fbarchard@google.com "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient 23161dee6250936424ced8722329369da75935d61580fbarchard@google.com "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient 23171dee6250936424ced8722329369da75935d61580fbarchard@google.com "vmov.u8 d27, #16 \n" // Add 16 constant 2318c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 23191dee6250936424ced8722329369da75935d61580fbarchard@google.com "1: \n" 23200bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 23211dee6250936424ced8722329369da75935d61580fbarchard@google.com "vld1.8 {q0}, [%0]! \n" // load 8 ARGB4444 pixels. 23221dee6250936424ced8722329369da75935d61580fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 23231dee6250936424ced8722329369da75935d61580fbarchard@google.com ARGB4444TOARGB 23241dee6250936424ced8722329369da75935d61580fbarchard@google.com "vmull.u8 q2, d0, d24 \n" // B 23251dee6250936424ced8722329369da75935d61580fbarchard@google.com "vmlal.u8 q2, d1, d25 \n" // G 23261dee6250936424ced8722329369da75935d61580fbarchard@google.com "vmlal.u8 q2, d2, d26 \n" // R 23271dee6250936424ced8722329369da75935d61580fbarchard@google.com "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y 23281dee6250936424ced8722329369da75935d61580fbarchard@google.com "vqadd.u8 d0, d27 \n" 23290bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 23301dee6250936424ced8722329369da75935d61580fbarchard@google.com "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. 23311dee6250936424ced8722329369da75935d61580fbarchard@google.com "bgt 1b \n" 23321dee6250936424ced8722329369da75935d61580fbarchard@google.com : "+r"(src_argb4444), // %0 23331dee6250936424ced8722329369da75935d61580fbarchard@google.com "+r"(dst_y), // %1 23341dee6250936424ced8722329369da75935d61580fbarchard@google.com "+r"(pix) // %2 23351dee6250936424ced8722329369da75935d61580fbarchard@google.com : 23368f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13" 23371dee6250936424ced8722329369da75935d61580fbarchard@google.com ); 23381dee6250936424ced8722329369da75935d61580fbarchard@google.com} 2339bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com 2340bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.comvoid BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int pix) { 2341bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com asm volatile ( 2342bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d4, #33 \n" // R * 0.2578 coefficient 2343bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient 2344bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d6, #13 \n" // B * 0.1016 coefficient 2345bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d7, #16 \n" // Add 16 constant 2346c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 2347bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "1: \n" 23480bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 2349bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of BGRA. 2350bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 2351bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmull.u8 q8, d1, d4 \n" // R 2352bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmlal.u8 q8, d2, d5 \n" // G 2353bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmlal.u8 q8, d3, d6 \n" // B 2354bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y 2355bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vqadd.u8 d0, d7 \n" 23560bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 2357bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. 2358bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "bgt 1b \n" 2359bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com : "+r"(src_bgra), // %0 2360bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "+r"(dst_y), // %1 2361bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "+r"(pix) // %2 2362bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com : 23638f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8" 2364bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com ); 2365bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com} 2366bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com 2367bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.comvoid ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int pix) { 2368bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com asm volatile ( 2369bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d4, #33 \n" // R * 0.2578 coefficient 2370bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient 2371bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d6, #13 \n" // B * 0.1016 coefficient 2372bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d7, #16 \n" // Add 16 constant 2373c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 2374bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "1: \n" 23750bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 2376bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ABGR. 2377bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 2378bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmull.u8 q8, d0, d4 \n" // R 2379bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmlal.u8 q8, d1, d5 \n" // G 2380bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmlal.u8 q8, d2, d6 \n" // B 2381bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y 2382bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vqadd.u8 d0, d7 \n" 23830bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 2384bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. 2385bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "bgt 1b \n" 2386bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com : "+r"(src_abgr), // %0 2387bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "+r"(dst_y), // %1 2388bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "+r"(pix) // %2 2389bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com : 23908f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8" 2391bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com ); 2392bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com} 2393bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com 2394bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.comvoid RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int pix) { 2395bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com asm volatile ( 23960908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com "vmov.u8 d4, #13 \n" // B * 0.1016 coefficient 23970908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient 23980908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com "vmov.u8 d6, #33 \n" // R * 0.2578 coefficient 23990908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com "vmov.u8 d7, #16 \n" // Add 16 constant 2400c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 24010908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com "1: \n" 24020bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 2403bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of RGBA. 2404bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 2405bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmull.u8 q8, d1, d4 \n" // B 2406bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmlal.u8 q8, d2, d5 \n" // G 2407bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmlal.u8 q8, d3, d6 \n" // R 2408bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y 2409bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vqadd.u8 d0, d7 \n" 24100bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 2411bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. 2412bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "bgt 1b \n" 2413bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com : "+r"(src_rgba), // %0 2414bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "+r"(dst_y), // %1 2415bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "+r"(pix) // %2 2416bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com : 24178f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8" 2418bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com ); 2419bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com} 2420bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com 2421bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.comvoid RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int pix) { 2422bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com asm volatile ( 2423bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d4, #13 \n" // B * 0.1016 coefficient 2424bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient 2425bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d6, #33 \n" // R * 0.2578 coefficient 2426bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d7, #16 \n" // Add 16 constant 2427c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 2428bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "1: \n" 24290bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 2430bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vld3.8 {d0, d1, d2}, [%0]! \n" // load 8 pixels of RGB24. 24310908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 24320908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com "vmull.u8 q8, d0, d4 \n" // B 24330908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com "vmlal.u8 q8, d1, d5 \n" // G 24340908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com "vmlal.u8 q8, d2, d6 \n" // R 24350908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y 24360908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com "vqadd.u8 d0, d7 \n" 24370bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 24380908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. 24390908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com "bgt 1b \n" 2440bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com : "+r"(src_rgb24), // %0 24410908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com "+r"(dst_y), // %1 24420908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com "+r"(pix) // %2 24430908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com : 24448f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8" 24450908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com ); 24460908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com} 2447bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com 2448bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.comvoid RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int pix) { 2449bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com asm volatile ( 2450bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d4, #33 \n" // R * 0.2578 coefficient 2451bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient 2452bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d6, #13 \n" // B * 0.1016 coefficient 2453bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d7, #16 \n" // Add 16 constant 2454c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 2455bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "1: \n" 24560bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 2457bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vld3.8 {d0, d1, d2}, [%0]! \n" // load 8 pixels of RAW. 2458bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 2459bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmull.u8 q8, d0, d4 \n" // B 2460bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmlal.u8 q8, d1, d5 \n" // G 2461bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmlal.u8 q8, d2, d6 \n" // R 2462bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y 2463bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vqadd.u8 d0, d7 \n" 24640bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 2465bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. 2466bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "bgt 1b \n" 2467bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com : "+r"(src_raw), // %0 2468bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "+r"(dst_y), // %1 2469bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "+r"(pix) // %2 2470bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com : 24718f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8" 2472bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com ); 2473bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com} 24740908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com 2475b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com// Bilinear filter 16x2 -> 16x1 2476b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.comvoid InterpolateRow_NEON(uint8* dst_ptr, 2477b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com const uint8* src_ptr, ptrdiff_t src_stride, 2478b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com int dst_width, int source_y_fraction) { 2479b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com asm volatile ( 24808f506332af217882648eed166a257557855b9fdbfbarchard@google.com "cmp %4, #0 \n" 24818f506332af217882648eed166a257557855b9fdbfbarchard@google.com "beq 100f \n" 24828f506332af217882648eed166a257557855b9fdbfbarchard@google.com "add %2, %1 \n" 24838f506332af217882648eed166a257557855b9fdbfbarchard@google.com "cmp %4, #64 \n" 24848f506332af217882648eed166a257557855b9fdbfbarchard@google.com "beq 75f \n" 24858f506332af217882648eed166a257557855b9fdbfbarchard@google.com "cmp %4, #128 \n" 24868f506332af217882648eed166a257557855b9fdbfbarchard@google.com "beq 50f \n" 24878f506332af217882648eed166a257557855b9fdbfbarchard@google.com "cmp %4, #192 \n" 24888f506332af217882648eed166a257557855b9fdbfbarchard@google.com "beq 25f \n" 24898f506332af217882648eed166a257557855b9fdbfbarchard@google.com 24908f506332af217882648eed166a257557855b9fdbfbarchard@google.com "vdup.8 d5, %4 \n" 24918f506332af217882648eed166a257557855b9fdbfbarchard@google.com "rsb %4, #256 \n" 24928f506332af217882648eed166a257557855b9fdbfbarchard@google.com "vdup.8 d4, %4 \n" 2493b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com // General purpose row blend. 2494b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com "1: \n" 24950bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 24962c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q0}, [%1]! \n" 24970bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(2) 24982c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q1}, [%2]! \n" 2499b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com "subs %3, %3, #16 \n" 25008f506332af217882648eed166a257557855b9fdbfbarchard@google.com "vmull.u8 q13, d0, d4 \n" 25018f506332af217882648eed166a257557855b9fdbfbarchard@google.com "vmull.u8 q14, d1, d4 \n" 25028f506332af217882648eed166a257557855b9fdbfbarchard@google.com "vmlal.u8 q13, d2, d5 \n" 25038f506332af217882648eed166a257557855b9fdbfbarchard@google.com "vmlal.u8 q14, d3, d5 \n" 25048f506332af217882648eed166a257557855b9fdbfbarchard@google.com "vrshrn.u16 d0, q13, #8 \n" 25058f506332af217882648eed166a257557855b9fdbfbarchard@google.com "vrshrn.u16 d1, q14, #8 \n" 25060bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 25072c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {q0}, [%0]! \n" 25088f506332af217882648eed166a257557855b9fdbfbarchard@google.com "bgt 1b \n" 25098f506332af217882648eed166a257557855b9fdbfbarchard@google.com "b 99f \n" 2510b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com 2511b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com // Blend 25 / 75. 2512b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com "25: \n" 25130bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 25142c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q0}, [%1]! \n" 25150bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(2) 25162c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q1}, [%2]! \n" 2517b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com "subs %3, %3, #16 \n" 25188f506332af217882648eed166a257557855b9fdbfbarchard@google.com "vrhadd.u8 q0, q1 \n" 25198f506332af217882648eed166a257557855b9fdbfbarchard@google.com "vrhadd.u8 q0, q1 \n" 25200bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 25212c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {q0}, [%0]! \n" 25228f506332af217882648eed166a257557855b9fdbfbarchard@google.com "bgt 25b \n" 25238f506332af217882648eed166a257557855b9fdbfbarchard@google.com "b 99f \n" 2524b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com 2525b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com // Blend 50 / 50. 2526b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com "50: \n" 25270bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 25282c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q0}, [%1]! \n" 25290bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(2) 25302c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q1}, [%2]! \n" 2531b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com "subs %3, %3, #16 \n" 25328f506332af217882648eed166a257557855b9fdbfbarchard@google.com "vrhadd.u8 q0, q1 \n" 25330bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 25342c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {q0}, [%0]! \n" 25358f506332af217882648eed166a257557855b9fdbfbarchard@google.com "bgt 50b \n" 25368f506332af217882648eed166a257557855b9fdbfbarchard@google.com "b 99f \n" 2537b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com 2538b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com // Blend 75 / 25. 2539b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com "75: \n" 25400bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 25412c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q1}, [%1]! \n" 25420bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(2) 25432c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q0}, [%2]! \n" 2544b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com "subs %3, %3, #16 \n" 25458f506332af217882648eed166a257557855b9fdbfbarchard@google.com "vrhadd.u8 q0, q1 \n" 25468f506332af217882648eed166a257557855b9fdbfbarchard@google.com "vrhadd.u8 q0, q1 \n" 25470bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 25482c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {q0}, [%0]! \n" 25498f506332af217882648eed166a257557855b9fdbfbarchard@google.com "bgt 75b \n" 25508f506332af217882648eed166a257557855b9fdbfbarchard@google.com "b 99f \n" 2551b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com 2552b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com // Blend 100 / 0 - Copy row unchanged. 2553b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com "100: \n" 25540bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 25552c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q0}, [%1]! \n" 2556b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com "subs %3, %3, #16 \n" 25570bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 25582c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {q0}, [%0]! \n" 25598f506332af217882648eed166a257557855b9fdbfbarchard@google.com "bgt 100b \n" 2560b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com 2561b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com "99: \n" 2562b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com : "+r"(dst_ptr), // %0 2563b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com "+r"(src_ptr), // %1 2564b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com "+r"(src_stride), // %2 2565b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com "+r"(dst_width), // %3 2566b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com "+r"(source_y_fraction) // %4 2567b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com : 25688f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1", "d4", "d5", "q13", "q14" 2569b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com ); 2570b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com} 25718f506332af217882648eed166a257557855b9fdbfbarchard@google.com 25728f506332af217882648eed166a257557855b9fdbfbarchard@google.com// dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr 25738f506332af217882648eed166a257557855b9fdbfbarchard@google.comvoid ARGBBlendRow_NEON(const uint8* src_argb0, const uint8* src_argb1, 25748f506332af217882648eed166a257557855b9fdbfbarchard@google.com uint8* dst_argb, int width) { 25758f506332af217882648eed166a257557855b9fdbfbarchard@google.com asm volatile ( 25768f506332af217882648eed166a257557855b9fdbfbarchard@google.com "subs %3, #8 \n" 25778f506332af217882648eed166a257557855b9fdbfbarchard@google.com "blt 89f \n" 25788f506332af217882648eed166a257557855b9fdbfbarchard@google.com // Blend 8 pixels. 25798f506332af217882648eed166a257557855b9fdbfbarchard@google.com "8: \n" 25800bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 25818f506332af217882648eed166a257557855b9fdbfbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ARGB0. 25820bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 25838f506332af217882648eed166a257557855b9fdbfbarchard@google.com "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load 8 pixels of ARGB1. 25848f506332af217882648eed166a257557855b9fdbfbarchard@google.com "subs %3, %3, #8 \n" // 8 processed per loop. 2585d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com "vmull.u8 q10, d4, d3 \n" // db * a 2586d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com "vmull.u8 q11, d5, d3 \n" // dg * a 2587d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com "vmull.u8 q12, d6, d3 \n" // dr * a 2588d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com "vqrshrn.u16 d20, q10, #8 \n" // db >>= 8 2589d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com "vqrshrn.u16 d21, q11, #8 \n" // dg >>= 8 2590d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com "vqrshrn.u16 d22, q12, #8 \n" // dr >>= 8 2591d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com "vqsub.u8 q2, q2, q10 \n" // dbg - dbg * a / 256 2592d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com "vqsub.u8 d6, d6, d22 \n" // dr - dr * a / 256 2593d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com "vqadd.u8 q0, q0, q2 \n" // + sbg 2594d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com "vqadd.u8 d2, d2, d6 \n" // + sr 25958f506332af217882648eed166a257557855b9fdbfbarchard@google.com "vmov.u8 d3, #255 \n" // a = 255 25960bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(2) 25978f506332af217882648eed166a257557855b9fdbfbarchard@google.com "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 pixels of ARGB. 25988f506332af217882648eed166a257557855b9fdbfbarchard@google.com "bge 8b \n" 25998f506332af217882648eed166a257557855b9fdbfbarchard@google.com 26008f506332af217882648eed166a257557855b9fdbfbarchard@google.com "89: \n" 26018f506332af217882648eed166a257557855b9fdbfbarchard@google.com "adds %3, #8-1 \n" 26028f506332af217882648eed166a257557855b9fdbfbarchard@google.com "blt 99f \n" 26038f506332af217882648eed166a257557855b9fdbfbarchard@google.com 26048f506332af217882648eed166a257557855b9fdbfbarchard@google.com // Blend 1 pixels. 26058f506332af217882648eed166a257557855b9fdbfbarchard@google.com "1: \n" 26060bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 26078f506332af217882648eed166a257557855b9fdbfbarchard@google.com "vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [%0]! \n" // load 1 pixel ARGB0. 26080bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 26098f506332af217882648eed166a257557855b9fdbfbarchard@google.com "vld4.8 {d4[0],d5[0],d6[0],d7[0]}, [%1]! \n" // load 1 pixel ARGB1. 26108f506332af217882648eed166a257557855b9fdbfbarchard@google.com "subs %3, %3, #1 \n" // 1 processed per loop. 2611d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com "vmull.u8 q10, d4, d3 \n" // db * a 2612d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com "vmull.u8 q11, d5, d3 \n" // dg * a 2613d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com "vmull.u8 q12, d6, d3 \n" // dr * a 2614d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com "vqrshrn.u16 d20, q10, #8 \n" // db >>= 8 2615d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com "vqrshrn.u16 d21, q11, #8 \n" // dg >>= 8 2616d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com "vqrshrn.u16 d22, q12, #8 \n" // dr >>= 8 2617d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com "vqsub.u8 q2, q2, q10 \n" // dbg - dbg * a / 256 2618d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com "vqsub.u8 d6, d6, d22 \n" // dr - dr * a / 256 2619d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com "vqadd.u8 q0, q0, q2 \n" // + sbg 2620d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com "vqadd.u8 d2, d2, d6 \n" // + sr 26218f506332af217882648eed166a257557855b9fdbfbarchard@google.com "vmov.u8 d3, #255 \n" // a = 255 26220bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(2) 26238f506332af217882648eed166a257557855b9fdbfbarchard@google.com "vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [%2]! \n" // store 1 pixel. 26248f506332af217882648eed166a257557855b9fdbfbarchard@google.com "bge 1b \n" 26258f506332af217882648eed166a257557855b9fdbfbarchard@google.com 26268f506332af217882648eed166a257557855b9fdbfbarchard@google.com "99: \n" 26278f506332af217882648eed166a257557855b9fdbfbarchard@google.com 26288f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "+r"(src_argb0), // %0 26298f506332af217882648eed166a257557855b9fdbfbarchard@google.com "+r"(src_argb1), // %1 26308f506332af217882648eed166a257557855b9fdbfbarchard@google.com "+r"(dst_argb), // %2 26318f506332af217882648eed166a257557855b9fdbfbarchard@google.com "+r"(width) // %3 26328f506332af217882648eed166a257557855b9fdbfbarchard@google.com : 2633d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", "q10", "q11", "q12" 26348f506332af217882648eed166a257557855b9fdbfbarchard@google.com ); 26358f506332af217882648eed166a257557855b9fdbfbarchard@google.com} 26368f506332af217882648eed166a257557855b9fdbfbarchard@google.com 26371d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com// Attenuate 8 pixels at a time. 26381d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.comvoid ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) { 26391d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com asm volatile ( 26401d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com // Attenuate 8 pixels. 26411d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com "1: \n" 26420bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 26431d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ARGB. 26441d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 26451d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com "vmull.u8 q10, d0, d3 \n" // b * a 26461d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com "vmull.u8 q11, d1, d3 \n" // g * a 26471d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com "vmull.u8 q12, d2, d3 \n" // r * a 26481d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com "vqrshrn.u16 d0, q10, #8 \n" // b >>= 8 26491d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com "vqrshrn.u16 d1, q11, #8 \n" // g >>= 8 26501d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com "vqrshrn.u16 d2, q12, #8 \n" // r >>= 8 26510bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 26521d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB. 26531d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com "bgt 1b \n" 26541d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com : "+r"(src_argb), // %0 26551d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com "+r"(dst_argb), // %1 26561d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com "+r"(width) // %2 26571d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com : 26581d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com : "cc", "memory", "q0", "q1", "q10", "q11", "q12" 26591d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com ); 26601d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com} 26611d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com 2662ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com// Quantize 8 ARGB pixels (32 bytes). 2663ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com// dst = (dst * scale >> 16) * interval_size + interval_offset; 2664ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.comvoid ARGBQuantizeRow_NEON(uint8* dst_argb, int scale, int interval_size, 2665ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com int interval_offset, int width) { 2666ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com asm volatile ( 2667ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "vdup.u16 q8, %2 \n" 2668ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "vshr.u16 q8, q8, #1 \n" // scale >>= 1 2669ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "vdup.u16 q9, %3 \n" // interval multiply. 2670ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "vdup.u16 q10, %4 \n" // interval add 2671ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com 2672ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com // 8 pixel loop. 2673ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com ".p2align 2 \n" 2674ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "1: \n" 26750bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 2676ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "vld4.8 {d0, d2, d4, d6}, [%0] \n" // load 8 pixels of ARGB. 2677ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "subs %1, %1, #8 \n" // 8 processed per loop. 2678ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "vmovl.u8 q0, d0 \n" // b (0 .. 255) 2679ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "vmovl.u8 q1, d2 \n" 2680ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "vmovl.u8 q2, d4 \n" 2681ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "vqdmulh.s16 q0, q0, q8 \n" // b * scale 2682ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "vqdmulh.s16 q1, q1, q8 \n" // g 2683ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "vqdmulh.s16 q2, q2, q8 \n" // r 2684ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "vmul.u16 q0, q0, q9 \n" // b * interval_size 2685ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "vmul.u16 q1, q1, q9 \n" // g 2686ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "vmul.u16 q2, q2, q9 \n" // r 2687ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "vadd.u16 q0, q0, q10 \n" // b + interval_offset 2688ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "vadd.u16 q1, q1, q10 \n" // g 2689ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "vadd.u16 q2, q2, q10 \n" // r 2690ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "vqmovn.u16 d0, q0 \n" 2691ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "vqmovn.u16 d2, q1 \n" 2692ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "vqmovn.u16 d4, q2 \n" 26930bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 2694ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "vst4.8 {d0, d2, d4, d6}, [%0]! \n" // store 8 pixels of ARGB. 2695ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "bgt 1b \n" 2696ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com : "+r"(dst_argb), // %0 2697ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "+r"(width) // %1 2698ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com : "r"(scale), // %2 2699ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "r"(interval_size), // %3 2700ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "r"(interval_offset) // %4 2701b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9", "q10" 2702b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com ); 2703b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com} 2704b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com 2705b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com// Shade 8 pixels at a time by specified value. 2706b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com// NOTE vqrdmulh.s16 q10, q10, d0[0] must use a scaler register from 0 to 8. 2707fa5d5fb491b7b0a654871295bddbbe4e0e69542cfbarchard@google.com// Rounding in vqrdmulh does +1 to high if high bit of low s16 is set. 2708b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.comvoid ARGBShadeRow_NEON(const uint8* src_argb, uint8* dst_argb, int width, 2709b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com uint32 value) { 2710b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com asm volatile ( 2711b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com "vdup.u32 q0, %3 \n" // duplicate scale value. 2712fa5d5fb491b7b0a654871295bddbbe4e0e69542cfbarchard@google.com "vzip.u8 d0, d1 \n" // d0 aarrggbb. 2713fa5d5fb491b7b0a654871295bddbbe4e0e69542cfbarchard@google.com "vshr.u16 q0, q0, #1 \n" // scale / 2. 2714b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com 2715b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com // 8 pixel loop. 2716b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com ".p2align 2 \n" 2717b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com "1: \n" 27180bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 2719b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com "vld4.8 {d20, d22, d24, d26}, [%0]! \n" // load 8 pixels of ARGB. 2720b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 2721b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com "vmovl.u8 q10, d20 \n" // b (0 .. 255) 2722b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com "vmovl.u8 q11, d22 \n" 2723b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com "vmovl.u8 q12, d24 \n" 2724b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com "vmovl.u8 q13, d26 \n" 2725578c88a9f7114b3ede887b3c6d9a11d8d06b043bfbarchard@google.com "vqrdmulh.s16 q10, q10, d0[0] \n" // b * scale * 2 2726fa5d5fb491b7b0a654871295bddbbe4e0e69542cfbarchard@google.com "vqrdmulh.s16 q11, q11, d0[1] \n" // g 2727fa5d5fb491b7b0a654871295bddbbe4e0e69542cfbarchard@google.com "vqrdmulh.s16 q12, q12, d0[2] \n" // r 2728fa5d5fb491b7b0a654871295bddbbe4e0e69542cfbarchard@google.com "vqrdmulh.s16 q13, q13, d0[3] \n" // a 2729b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com "vqmovn.u16 d20, q10 \n" 2730b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com "vqmovn.u16 d22, q11 \n" 2731b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com "vqmovn.u16 d24, q12 \n" 2732b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com "vqmovn.u16 d26, q13 \n" 27330bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 2734b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com "vst4.8 {d20, d22, d24, d26}, [%1]! \n" // store 8 pixels of ARGB. 2735b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com "bgt 1b \n" 2736b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com : "+r"(src_argb), // %0 2737b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com "+r"(dst_argb), // %1 2738b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com "+r"(width) // %2 2739b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com : "r"(value) // %3 2740b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com : "cc", "memory", "q0", "q10", "q11", "q12", "q13" 2741ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com ); 2742ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com} 2743ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com 274482375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels 2745050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com// Similar to ARGBToYJ but stores ARGB. 2746050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com// C code is (15 * b + 75 * g + 38 * r + 64) >> 7; 274782375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.comvoid ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) { 274882375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com asm volatile ( 2749050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "vmov.u8 d24, #15 \n" // B * 0.11400 coefficient 2750050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "vmov.u8 d25, #75 \n" // G * 0.58700 coefficient 2751050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "vmov.u8 d26, #38 \n" // R * 0.29900 coefficient 2752c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 275382375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com "1: \n" 27540bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 275582375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. 275682375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 275782375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com "vmull.u8 q2, d0, d24 \n" // B 275882375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com "vmlal.u8 q2, d1, d25 \n" // G 275982375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com "vmlal.u8 q2, d2, d26 \n" // R 2760050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "vqrshrun.s16 d0, q2, #7 \n" // 15 bit to 8 bit B 276182375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com "vmov d1, d0 \n" // G 276282375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com "vmov d2, d0 \n" // R 27630bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 276482375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 ARGB pixels. 276582375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com "bgt 1b \n" 276682375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com : "+r"(src_argb), // %0 276782375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com "+r"(dst_argb), // %1 276882375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com "+r"(width) // %2 276982375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com : 277082375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q12", "q13" 277182375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com ); 277282375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com} 277382375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com 2774c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com// Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels. 2775c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com// b = (r * 35 + g * 68 + b * 17) >> 7 2776c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com// g = (r * 45 + g * 88 + b * 22) >> 7 2777c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com// r = (r * 50 + g * 98 + b * 24) >> 7 2778c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.comvoid ARGBSepiaRow_NEON(uint8* dst_argb, int width) { 2779c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com asm volatile ( 2780c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "vmov.u8 d20, #17 \n" // BB coefficient 2781c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "vmov.u8 d21, #68 \n" // BG coefficient 2782c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "vmov.u8 d22, #35 \n" // BR coefficient 2783c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "vmov.u8 d24, #22 \n" // GB coefficient 2784c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "vmov.u8 d25, #88 \n" // GG coefficient 2785c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "vmov.u8 d26, #45 \n" // GR coefficient 2786c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "vmov.u8 d28, #24 \n" // BB coefficient 2787c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "vmov.u8 d29, #98 \n" // BG coefficient 2788c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "vmov.u8 d30, #50 \n" // BR coefficient 2789c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 2790c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "1: \n" 27910bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 2792c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0] \n" // load 8 ARGB pixels. 2793c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "subs %1, %1, #8 \n" // 8 processed per loop. 2794c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "vmull.u8 q2, d0, d20 \n" // B to Sepia B 2795c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "vmlal.u8 q2, d1, d21 \n" // G 2796c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "vmlal.u8 q2, d2, d22 \n" // R 2797c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "vmull.u8 q3, d0, d24 \n" // B to Sepia G 2798c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "vmlal.u8 q3, d1, d25 \n" // G 2799c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "vmlal.u8 q3, d2, d26 \n" // R 2800c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "vmull.u8 q8, d0, d28 \n" // B to Sepia R 2801c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "vmlal.u8 q8, d1, d29 \n" // G 2802c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "vmlal.u8 q8, d2, d30 \n" // R 280387adfaa61ea7704874ad3494a3bce3e7364b146afbarchard@google.com "vqshrn.u16 d0, q2, #7 \n" // 16 bit to 8 bit B 280487adfaa61ea7704874ad3494a3bce3e7364b146afbarchard@google.com "vqshrn.u16 d1, q3, #7 \n" // 16 bit to 8 bit G 280587adfaa61ea7704874ad3494a3bce3e7364b146afbarchard@google.com "vqshrn.u16 d2, q8, #7 \n" // 16 bit to 8 bit R 28060bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 2807c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "vst4.8 {d0, d1, d2, d3}, [%0]! \n" // store 8 ARGB pixels. 2808c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "bgt 1b \n" 2809c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com : "+r"(dst_argb), // %0 2810c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "+r"(width) // %1 2811c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com : 2812c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", 2813c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "q10", "q11", "q12", "q13", "q14", "q15" 2814c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com ); 2815c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com} 2816c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com 281762154e53a2820136f6a8cab8cc029622e3b100c1fbarchard@google.com// Tranform 8 ARGB pixels (32 bytes) with color matrix. 2818c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com// TODO(fbarchard): Was same as Sepia except matrix is provided. This function 2819c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com// needs to saturate. Consider doing a non-saturating version. 2820c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.comvoid ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb, 2821c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com const int8* matrix_argb, int width) { 282262154e53a2820136f6a8cab8cc029622e3b100c1fbarchard@google.com asm volatile ( 28230bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(3) 2824c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com "vld1.8 {q2}, [%3] \n" // load 3 ARGB vectors. 282562154e53a2820136f6a8cab8cc029622e3b100c1fbarchard@google.com "vmovl.s8 q0, d4 \n" // B,G coefficients s16. 2826c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com "vmovl.s8 q1, d5 \n" // R,A coefficients s16. 282762154e53a2820136f6a8cab8cc029622e3b100c1fbarchard@google.com 2828c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 282962154e53a2820136f6a8cab8cc029622e3b100c1fbarchard@google.com "1: \n" 28300bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 2831c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com "vld4.8 {d16, d18, d20, d22}, [%0]! \n" // load 8 ARGB pixels. 2832c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 28330cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vmovl.u8 q8, d16 \n" // b (0 .. 255) 16 bit 28340cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vmovl.u8 q9, d18 \n" // g 28350cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vmovl.u8 q10, d20 \n" // r 28360cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vmovl.u8 q15, d22 \n" // a 28370cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vmul.s16 q12, q8, d0[0] \n" // B = B * Matrix B 28380cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vmul.s16 q13, q8, d1[0] \n" // G = B * Matrix G 28390cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vmul.s16 q14, q8, d2[0] \n" // R = B * Matrix R 2840c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com "vmul.s16 q15, q8, d3[0] \n" // A = B * Matrix A 28410cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vmul.s16 q4, q9, d0[1] \n" // B += G * Matrix B 28420cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vmul.s16 q5, q9, d1[1] \n" // G += G * Matrix G 28430cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vmul.s16 q6, q9, d2[1] \n" // R += G * Matrix R 2844c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com "vmul.s16 q7, q9, d3[1] \n" // A += G * Matrix A 28450cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vqadd.s16 q12, q12, q4 \n" // Accumulate B 28460cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vqadd.s16 q13, q13, q5 \n" // Accumulate G 28470cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vqadd.s16 q14, q14, q6 \n" // Accumulate R 2848c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com "vqadd.s16 q15, q15, q7 \n" // Accumulate A 28490cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vmul.s16 q4, q10, d0[2] \n" // B += R * Matrix B 28500cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vmul.s16 q5, q10, d1[2] \n" // G += R * Matrix G 28510cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vmul.s16 q6, q10, d2[2] \n" // R += R * Matrix R 2852c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com "vmul.s16 q7, q10, d3[2] \n" // A += R * Matrix A 28530cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vqadd.s16 q12, q12, q4 \n" // Accumulate B 28540cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vqadd.s16 q13, q13, q5 \n" // Accumulate G 28550cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vqadd.s16 q14, q14, q6 \n" // Accumulate R 2856c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com "vqadd.s16 q15, q15, q7 \n" // Accumulate A 28570cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vmul.s16 q4, q15, d0[3] \n" // B += A * Matrix B 28580cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vmul.s16 q5, q15, d1[3] \n" // G += A * Matrix G 28590cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vmul.s16 q6, q15, d2[3] \n" // R += A * Matrix R 2860c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com "vmul.s16 q7, q15, d3[3] \n" // A += A * Matrix A 28610cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vqadd.s16 q12, q12, q4 \n" // Accumulate B 28620cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vqadd.s16 q13, q13, q5 \n" // Accumulate G 28630cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vqadd.s16 q14, q14, q6 \n" // Accumulate R 2864c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com "vqadd.s16 q15, q15, q7 \n" // Accumulate A 2865c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com "vqshrun.s16 d16, q12, #6 \n" // 16 bit to 8 bit B 2866c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com "vqshrun.s16 d18, q13, #6 \n" // 16 bit to 8 bit G 2867c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com "vqshrun.s16 d20, q14, #6 \n" // 16 bit to 8 bit R 2868c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com "vqshrun.s16 d22, q15, #6 \n" // 16 bit to 8 bit A 28690bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 2870c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com "vst4.8 {d16, d18, d20, d22}, [%1]! \n" // store 8 ARGB pixels. 2871c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com "bgt 1b \n" 2872c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com : "+r"(src_argb), // %0 2873c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com "+r"(dst_argb), // %1 2874c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com "+r"(width) // %2 2875c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com : "r"(matrix_argb) // %3 2876c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", 28770cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "q10", "q11", "q12", "q13", "q14", "q15" 287862154e53a2820136f6a8cab8cc029622e3b100c1fbarchard@google.com ); 287962154e53a2820136f6a8cab8cc029622e3b100c1fbarchard@google.com} 288062154e53a2820136f6a8cab8cc029622e3b100c1fbarchard@google.com 2881512bec91edaea60129d08c2d8053653b9fe51db4fbarchard@google.com// TODO(fbarchard): fix vqshrun in ARGBMultiplyRow_NEON and reenable. 2882512bec91edaea60129d08c2d8053653b9fe51db4fbarchard@google.com#ifdef HAS_ARGBMULTIPLYROW_NEON 28835b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com// Multiply 2 rows of ARGB pixels together, 8 pixels at a time. 28845b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.comvoid ARGBMultiplyRow_NEON(const uint8* src_argb0, const uint8* src_argb1, 28855b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com uint8* dst_argb, int width) { 28865b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com asm volatile ( 28875b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com // 8 pixel loop. 2888c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 28895b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "1: \n" 28900bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 28915b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. 28920bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 28935b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "vld4.8 {d1, d3, d5, d7}, [%1]! \n" // load 8 more ARGB pixels. 28945b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "subs %3, %3, #8 \n" // 8 processed per loop. 28955b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "vmull.u8 q0, d0, d1 \n" // multiply B 28965b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "vmull.u8 q1, d2, d3 \n" // multiply G 28975b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "vmull.u8 q2, d4, d5 \n" // multiply R 28985b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "vmull.u8 q3, d6, d7 \n" // multiply A 28996a352141ef2167c204a450cea179c65e4e34cb65fbarchard@google.com "vrshrn.u16 d0, q0, #8 \n" // 16 bit to 8 bit B 29006a352141ef2167c204a450cea179c65e4e34cb65fbarchard@google.com "vrshrn.u16 d1, q1, #8 \n" // 16 bit to 8 bit G 29016a352141ef2167c204a450cea179c65e4e34cb65fbarchard@google.com "vrshrn.u16 d2, q2, #8 \n" // 16 bit to 8 bit R 29026a352141ef2167c204a450cea179c65e4e34cb65fbarchard@google.com "vrshrn.u16 d3, q3, #8 \n" // 16 bit to 8 bit A 29030bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(2) 29045b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels. 29055b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "bgt 1b \n" 29065b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com 29075b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com : "+r"(src_argb0), // %0 29085b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "+r"(src_argb1), // %1 29095b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "+r"(dst_argb), // %2 29105b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "+r"(width) // %3 29115b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com : 29125b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3" 29135b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com ); 29145b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com} 2915512bec91edaea60129d08c2d8053653b9fe51db4fbarchard@google.com#endif // HAS_ARGBMULTIPLYROW_NEON 29165b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com 29175b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com// Add 2 rows of ARGB pixels together, 8 pixels at a time. 29185b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.comvoid ARGBAddRow_NEON(const uint8* src_argb0, const uint8* src_argb1, 29195b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com uint8* dst_argb, int width) { 29205b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com asm volatile ( 29215b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com // 8 pixel loop. 2922c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 29235b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "1: \n" 29240bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 29255b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. 29260bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 29275b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load 8 more ARGB pixels. 29285b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "subs %3, %3, #8 \n" // 8 processed per loop. 29295b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "vqadd.u8 q0, q0, q2 \n" // add B, G 29305b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "vqadd.u8 q1, q1, q3 \n" // add R, A 29310bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(2) 29325b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels. 29335b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "bgt 1b \n" 29345b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com 29355b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com : "+r"(src_argb0), // %0 29365b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "+r"(src_argb1), // %1 29375b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "+r"(dst_argb), // %2 29385b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "+r"(width) // %3 29395b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com : 2940573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3" 2941573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com ); 2942573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com} 2943573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com 2944573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com// Subtract 2 rows of ARGB pixels, 8 pixels at a time. 2945573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.comvoid ARGBSubtractRow_NEON(const uint8* src_argb0, const uint8* src_argb1, 2946573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com uint8* dst_argb, int width) { 2947573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com asm volatile ( 2948573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com // 8 pixel loop. 2949c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 2950573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com "1: \n" 29510bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 2952573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. 29530bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 2954573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load 8 more ARGB pixels. 2955573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com "subs %3, %3, #8 \n" // 8 processed per loop. 2956573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com "vqsub.u8 q0, q0, q2 \n" // subtract B, G 2957573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com "vqsub.u8 q1, q1, q3 \n" // subtract R, A 29580bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(2) 2959573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels. 2960573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com "bgt 1b \n" 2961573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com 2962573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com : "+r"(src_argb0), // %0 2963573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com "+r"(src_argb1), // %1 2964573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com "+r"(dst_argb), // %2 2965573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com "+r"(width) // %3 2966573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com : 2967573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3" 29685b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com ); 29695b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com} 29705b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com 2971c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// Adds Sobel X and Sobel Y and stores Sobel into ARGB. 2972c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// A = 255 2973c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// R = Sobel 2974c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// G = Sobel 2975c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// B = Sobel 2976c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.comvoid SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, 2977c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com uint8* dst_argb, int width) { 2978c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com asm volatile ( 2979c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vmov.u8 d3, #255 \n" // alpha 2980c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com // 8 pixel loop. 2981c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 2982c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "1: \n" 29830bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 2984c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vld1.8 {d0}, [%0]! \n" // load 8 sobelx. 29850bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 2986c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vld1.8 {d1}, [%1]! \n" // load 8 sobely. 2987c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "subs %3, %3, #8 \n" // 8 processed per loop. 2988c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vqadd.u8 d0, d0, d1 \n" // add 2989c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vmov.u8 d1, d0 \n" 2990c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vmov.u8 d2, d0 \n" 29910bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(2) 2992c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels. 2993c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "bgt 1b \n" 2994c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com : "+r"(src_sobelx), // %0 2995c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "+r"(src_sobely), // %1 29968be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com "+r"(dst_argb), // %2 29978be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com "+r"(width) // %3 29988be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com : 29998be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com : "cc", "memory", "q0", "q1" 30008be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com ); 30018be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com} 30028be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com 30038be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com// Adds Sobel X and Sobel Y and stores Sobel into plane. 30048be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.comvoid SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, 30058be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com uint8* dst_y, int width) { 30068be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com asm volatile ( 30078be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com // 16 pixel loop. 3008c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 30098be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com "1: \n" 30100bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 30118be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com "vld1.8 {q0}, [%0]! \n" // load 16 sobelx. 30120bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 30138be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com "vld1.8 {q1}, [%1]! \n" // load 16 sobely. 30148be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com "subs %3, %3, #16 \n" // 16 processed per loop. 30158be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com "vqadd.u8 q0, q0, q1 \n" // add 30160bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(2) 30178be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com "vst1.8 {q0}, [%2]! \n" // store 16 pixels. 30188be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com "bgt 1b \n" 30198be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com : "+r"(src_sobelx), // %0 30208be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com "+r"(src_sobely), // %1 30218be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com "+r"(dst_y), // %2 30228be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com "+r"(width) // %3 3023c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com : 3024c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com : "cc", "memory", "q0", "q1" 3025c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com ); 3026c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com} 3027c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com 3028c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// Mixes Sobel X, Sobel Y and Sobel into ARGB. 3029c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// A = 255 3030c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// R = Sobel X 3031c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// G = Sobel 3032c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// B = Sobel Y 3033c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.comvoid SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, 3034c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com uint8* dst_argb, int width) { 3035c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com asm volatile ( 3036c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vmov.u8 d3, #255 \n" // alpha 3037c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com // 8 pixel loop. 3038c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 3039c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "1: \n" 30400bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 3041c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vld1.8 {d2}, [%0]! \n" // load 8 sobelx. 30420bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 3043c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vld1.8 {d0}, [%1]! \n" // load 8 sobely. 3044c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "subs %3, %3, #8 \n" // 8 processed per loop. 3045c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vqadd.u8 d1, d0, d2 \n" // add 30460bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(2) 3047c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels. 3048c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "bgt 1b \n" 3049c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com : "+r"(src_sobelx), // %0 3050c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "+r"(src_sobely), // %1 3051ff4a84168d731c33ce7dedcfb497376a8669cecafbarchard@google.com "+r"(dst_argb), // %2 3052ff4a84168d731c33ce7dedcfb497376a8669cecafbarchard@google.com "+r"(width) // %3 3053c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com : 3054c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com : "cc", "memory", "q0", "q1" 3055c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com ); 3056c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com} 3057c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com 3058c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// SobelX as a matrix is 3059c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// -1 0 1 3060c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// -2 0 2 3061c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// -1 0 1 3062c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.comvoid SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1, 3063c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com const uint8* src_y2, uint8* dst_sobelx, int width) { 3064c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com asm volatile ( 3065c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 3066c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "1: \n" 30670bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 30682c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d0}, [%0],%5 \n" // top 30690bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 30702c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d1}, [%0],%6 \n" 3071c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vsubl.u8 q0, d0, d1 \n" 30720bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 30732c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d2}, [%1],%5 \n" // center * 2 30740bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 30752c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d3}, [%1],%6 \n" 3076c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vsubl.u8 q1, d2, d3 \n" 3077c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vadd.s16 q0, q0, q1 \n" 3078c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vadd.s16 q0, q0, q1 \n" 30790bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(2) 30802c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d2}, [%2],%5 \n" // bottom 30810bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(2) 30822c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d3}, [%2],%6 \n" 3083c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "subs %4, %4, #8 \n" // 8 pixels 3084c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vsubl.u8 q1, d2, d3 \n" 3085c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vadd.s16 q0, q0, q1 \n" 3086c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vabs.s16 q0, q0 \n" 3087c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vqmovn.u16 d0, q0 \n" 30880bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(3) 30892c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {d0}, [%3]! \n" // store 8 sobelx 3090c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "bgt 1b \n" 3091c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com : "+r"(src_y0), // %0 3092c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "+r"(src_y1), // %1 3093c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "+r"(src_y2), // %2 3094c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "+r"(dst_sobelx), // %3 3095c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "+r"(width) // %4 3096c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com : "r"(2), // %5 3097c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "r"(6) // %6 3098c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com : "cc", "memory", "q0", "q1" // Clobber List 3099c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com ); 3100c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com} 3101c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com 3102c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// SobelY as a matrix is 3103c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// -1 -2 -1 3104c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// 0 0 0 3105c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// 1 2 1 3106c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.comvoid SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1, 3107c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com uint8* dst_sobely, int width) { 3108c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com asm volatile ( 3109c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 3110c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "1: \n" 31110bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 31122c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d0}, [%0],%4 \n" // left 31130bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 31142c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d1}, [%1],%4 \n" 3115c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vsubl.u8 q0, d0, d1 \n" 31160bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 31172c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d2}, [%0],%4 \n" // center * 2 31180bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 31192c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d3}, [%1],%4 \n" 3120c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vsubl.u8 q1, d2, d3 \n" 3121c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vadd.s16 q0, q0, q1 \n" 3122c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vadd.s16 q0, q0, q1 \n" 31230bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(0) 31242c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d2}, [%0],%5 \n" // right 31250bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(1) 31262c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d3}, [%1],%5 \n" 3127c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "subs %3, %3, #8 \n" // 8 pixels 3128c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vsubl.u8 q1, d2, d3 \n" 3129c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vadd.s16 q0, q0, q1 \n" 3130c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vabs.s16 q0, q0 \n" 3131c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vqmovn.u16 d0, q0 \n" 31320bb310ebc4b1333bd4560e587be05da995577009fbarchard@google.com MEMACCESS(2) 31332c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {d0}, [%2]! \n" // store 8 sobely 3134c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "bgt 1b \n" 3135c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com : "+r"(src_y0), // %0 3136c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "+r"(src_y1), // %1 3137c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "+r"(dst_sobely), // %2 3138c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "+r"(width) // %3 3139c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com : "r"(1), // %4 3140c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "r"(6) // %5 3141c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com : "cc", "memory", "q0", "q1" // Clobber List 3142c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com ); 3143c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com} 314419932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com#endif // __ARM_NEON__ 31452d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com 3146fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.com#ifdef __cplusplus 3147fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.com} // extern "C" 3148fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.com} // namespace libyuv 3149fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.com#endif 3150