193d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com/* 2b0c97975894a5eebebf9d93147cdd941a3accb63fbarchard@google.com * Copyright 2011 The LibYuv Project Authors. All rights reserved. 393d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com * 493d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com * Use of this source code is governed by a BSD-style license 593d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com * that can be found in the LICENSE file in the root of the source 693d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com * tree. An additional intellectual property rights grant can be found 7cde587092fef0dbed2c35602f30b79e7b892e766fbarchard@google.com * in the file PATENTS. All contributing project authors may 893d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com * be found in the AUTHORS file in the root of the source tree. 993d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com */ 1093d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com 11142f6c4ed5eaeec0176f255e64bac8d8c70b42e1fbarchard@google.com#include "libyuv/row.h" 1293d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com 13fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.com#ifdef __cplusplus 14fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.comnamespace libyuv { 15fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.comextern "C" { 16fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.com#endif 17fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.com 182d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com// This module is for GCC Neon 1937ad8b650717568e34a5ac807b63cc9f072c96b6fbarchard@google.com#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \ 2037ad8b650717568e34a5ac807b63cc9f072c96b6fbarchard@google.com !defined(__native_client__) 212d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com 224807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com// Read 8 Y, 4 U and 4 V from 422 234807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com#define READYUV422 \ 242c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d0}, [%0]! \n" \ 252c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.32 {d2[0]}, [%1]! \n" \ 262c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.32 {d2[1]}, [%2]! \n" 274807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com 28b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com// Read 8 Y, 2 U and 2 V from 422 29b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com#define READYUV411 \ 302c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d0}, [%0]! \n" \ 312c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.16 {d2[0]}, [%1]! \n" \ 322c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.16 {d2[1]}, [%2]! \n" \ 33b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "vmov.u8 d3, d2 \n" \ 34b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "vzip.u8 d2, d3 \n" 35b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com 36b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com// Read 8 Y, 8 U and 8 V from 444 37b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com#define READYUV444 \ 382c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d0}, [%0]! \n" \ 392c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d2}, [%1]! \n" \ 402c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d3}, [%2]! \n" \ 41b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "vpaddl.u8 q1, q1 \n" \ 42b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "vrshrn.u16 d2, q1, #1 \n" 43b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com 4400b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com// Read 8 Y, and set 4 U and 4 V to 128 4500b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com#define READYUV400 \ 462c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d0}, [%0]! \n" \ 4700b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com "vmov.u8 d2, #128 \n" 4800b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com 494807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com// Read 8 Y and 4 UV from NV12 504807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com#define READNV12 \ 512c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d0}, [%0]! \n" \ 522c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d2}, [%1]! \n" \ 534807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "vmov.u8 d3, d2 \n"/* split odd/even uv apart */\ 544807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "vuzp.u8 d2, d3 \n" \ 55793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "vtrn.u32 d2, d3 \n" 564807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com 574807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com// Read 8 Y and 4 VU from NV21 584807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com#define READNV21 \ 592c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d0}, [%0]! \n" \ 602c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d2}, [%1]! \n" \ 614807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "vmov.u8 d3, d2 \n"/* split odd/even uv apart */\ 624807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "vuzp.u8 d3, d2 \n" \ 63793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "vtrn.u32 d2, d3 \n" 64793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com 65793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com// Read 8 YUY2 66793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com#define READYUY2 \ 672c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld2.8 {d0, d2}, [%0]! \n" \ 68793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "vmov.u8 d3, d2 \n" \ 69793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "vuzp.u8 d2, d3 \n" \ 70793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "vtrn.u32 d2, d3 \n" 71793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com 72793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com// Read 8 UYVY 73793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com#define READUYVY \ 742c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld2.8 {d2, d3}, [%0]! \n" \ 75793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "vmov.u8 d0, d3 \n" \ 76793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "vmov.u8 d3, d2 \n" \ 77793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "vuzp.u8 d2, d3 \n" \ 78793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "vtrn.u32 d2, d3 \n" 794807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com 804807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com#define YUV422TORGB \ 8193d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "veor.u8 d2, d26 \n"/*subtract 128 from u and v*/\ 8293d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vmull.s8 q8, d2, d24 \n"/* u/v B/R component */\ 8393d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vmull.s8 q9, d2, d25 \n"/* u/v G component */\ 8493d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vmov.u8 d1, #0 \n"/* split odd/even y apart */\ 8593d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vtrn.u8 d0, d1 \n" \ 8693d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vsub.s16 q0, q0, q15 \n"/* offset y */\ 8793d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vmul.s16 q0, q0, q14 \n" \ 8893d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vadd.s16 d18, d19 \n" \ 89c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com "vqadd.s16 d20, d0, d16 \n" /* B */ \ 9093d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vqadd.s16 d21, d1, d16 \n" \ 91c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com "vqadd.s16 d22, d0, d17 \n" /* R */ \ 9293d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vqadd.s16 d23, d1, d17 \n" \ 93c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com "vqadd.s16 d16, d0, d18 \n" /* G */ \ 9493d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vqadd.s16 d17, d1, d18 \n" \ 95d39ce16ba0654179bb51c4ecd46d8943cc24d130fbarchard@google.com "vqshrun.s16 d0, q10, #6 \n" /* B */ \ 96d39ce16ba0654179bb51c4ecd46d8943cc24d130fbarchard@google.com "vqshrun.s16 d1, q11, #6 \n" /* G */ \ 97d39ce16ba0654179bb51c4ecd46d8943cc24d130fbarchard@google.com "vqshrun.s16 d2, q8, #6 \n" /* R */ \ 9893d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vmovl.u8 q10, d0 \n"/* set up for reinterleave*/\ 9993d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vmovl.u8 q11, d1 \n" \ 10093d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vmovl.u8 q8, d2 \n" \ 10193d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vtrn.u8 d20, d21 \n" \ 10293d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vtrn.u8 d22, d23 \n" \ 10393d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vtrn.u8 d16, d17 \n" \ 1044807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "vmov.u8 d21, d16 \n" 10593d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com 106f2aa91a1ac08703d5a22af7fa48c59eba8eb397afbarchard@google.comstatic vec8 kUVToRB = { 127, 127, 127, 127, 102, 102, 102, 102, 107f2aa91a1ac08703d5a22af7fa48c59eba8eb397afbarchard@google.com 0, 0, 0, 0, 0, 0, 0, 0 }; 108f2aa91a1ac08703d5a22af7fa48c59eba8eb397afbarchard@google.comstatic vec8 kUVToG = { -25, -25, -25, -25, -52, -52, -52, -52, 109f2aa91a1ac08703d5a22af7fa48c59eba8eb397afbarchard@google.com 0, 0, 0, 0, 0, 0, 0, 0 }; 11093d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com 111b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.comvoid I444ToARGBRow_NEON(const uint8* src_y, 112b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com const uint8* src_u, 113b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com const uint8* src_v, 114b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com uint8* dst_argb, 115b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com int width) { 116b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com asm volatile ( 1172c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d24}, [%5] \n" 1182c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d25}, [%6] \n" 119b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "vmov.u8 d26, #128 \n" 120b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "vmov.u16 q14, #74 \n" 121b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "vmov.u16 q15, #16 \n" 122c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 123b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "1: \n" 124b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com READYUV444 125b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com YUV422TORGB 126b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "subs %4, %4, #8 \n" 127b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "vmov.u8 d23, #255 \n" 128b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "vst4.8 {d20, d21, d22, d23}, [%3]! \n" 129b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "bgt 1b \n" 130b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com : "+r"(src_y), // %0 131b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "+r"(src_u), // %1 132b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "+r"(src_v), // %2 133b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "+r"(dst_argb), // %3 134b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "+r"(width) // %4 135b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com : "r"(&kUVToRB), // %5 136b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "r"(&kUVToG) // %6 137b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", 138b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 139b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com ); 140b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com} 141b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com 1429de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.comvoid I422ToARGBRow_NEON(const uint8* src_y, 1439de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com const uint8* src_u, 1449de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com const uint8* src_v, 1459de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com uint8* dst_argb, 146e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com int width) { 1475b22506b14367f38c1e31d1aab4dc1132158737ffbarchard@google.com asm volatile ( 1482c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d24}, [%5] \n" 1492c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d25}, [%6] \n" 15093d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vmov.u8 d26, #128 \n" 15193d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vmov.u16 q14, #74 \n" 15293d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vmov.u16 q15, #16 \n" 153c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 15493d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "1: \n" 1554807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com READYUV422 156e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com YUV422TORGB 1574807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "subs %4, %4, #8 \n" 15893d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vmov.u8 d23, #255 \n" 159dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "vst4.8 {d20, d21, d22, d23}, [%3]! \n" 16018184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com "bgt 1b \n" 1619de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com : "+r"(src_y), // %0 1629de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(src_u), // %1 1639de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(src_v), // %2 1649de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(dst_argb), // %3 1659de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(width) // %4 1669de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com : "r"(&kUVToRB), // %5 1679de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "r"(&kUVToG) // %6 16864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", 16964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 17093d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com ); 17193d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com} 17293d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com 173b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.comvoid I411ToARGBRow_NEON(const uint8* src_y, 174b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com const uint8* src_u, 175b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com const uint8* src_v, 176b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com uint8* dst_argb, 177b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com int width) { 178b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com asm volatile ( 1792c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d24}, [%5] \n" 1802c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d25}, [%6] \n" 181b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "vmov.u8 d26, #128 \n" 182b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "vmov.u16 q14, #74 \n" 183b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "vmov.u16 q15, #16 \n" 184c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 185b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "1: \n" 186b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com READYUV411 187b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com YUV422TORGB 188b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "subs %4, %4, #8 \n" 189b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "vmov.u8 d23, #255 \n" 190b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "vst4.8 {d20, d21, d22, d23}, [%3]! \n" 191b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "bgt 1b \n" 192b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com : "+r"(src_y), // %0 193b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "+r"(src_u), // %1 194b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "+r"(src_v), // %2 195b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "+r"(dst_argb), // %3 196b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "+r"(width) // %4 197b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com : "r"(&kUVToRB), // %5 198b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "r"(&kUVToG) // %6 199b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", 200b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 201b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com ); 202b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com} 203b883ce6e2c15627ab9fa95e1bb6eca0dc399d364fbarchard@google.com 2049de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.comvoid I422ToBGRARow_NEON(const uint8* src_y, 2059de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com const uint8* src_u, 2069de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com const uint8* src_v, 2079de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com uint8* dst_bgra, 208e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com int width) { 2095b22506b14367f38c1e31d1aab4dc1132158737ffbarchard@google.com asm volatile ( 2102c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d24}, [%5] \n" 2112c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d25}, [%6] \n" 21293d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vmov.u8 d26, #128 \n" 21393d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vmov.u16 q14, #74 \n" 21493d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vmov.u16 q15, #16 \n" 215c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 21693d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "1: \n" 2174807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com READYUV422 218e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com YUV422TORGB 2194807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "subs %4, %4, #8 \n" 22093d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vswp.u8 d20, d22 \n" 22193d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vmov.u8 d19, #255 \n" 222dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "vst4.8 {d19, d20, d21, d22}, [%3]! \n" 22318184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com "bgt 1b \n" 2249de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com : "+r"(src_y), // %0 2259de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(src_u), // %1 2269de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(src_v), // %2 2279de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(dst_bgra), // %3 2289de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(width) // %4 2299de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com : "r"(&kUVToRB), // %5 2309de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "r"(&kUVToG) // %6 23164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", 23264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 23393d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com ); 23493d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com} 23593d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com 2369de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.comvoid I422ToABGRRow_NEON(const uint8* src_y, 2379de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com const uint8* src_u, 2389de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com const uint8* src_v, 2399de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com uint8* dst_abgr, 240e5f3fd4cc870b9b22112b3b2f25af06e067c8b7dfbarchard@google.com int width) { 2415b22506b14367f38c1e31d1aab4dc1132158737ffbarchard@google.com asm volatile ( 2422c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d24}, [%5] \n" 2432c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d25}, [%6] \n" 24493d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vmov.u8 d26, #128 \n" 24593d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vmov.u16 q14, #74 \n" 24693d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vmov.u16 q15, #16 \n" 247c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 24893d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "1: \n" 2494807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com READYUV422 250e214fe3f070d47d34e3cfbf4431994f97c9e0d1bfbarchard@google.com YUV422TORGB 2514807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "subs %4, %4, #8 \n" 25293d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vswp.u8 d20, d22 \n" 25393d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com "vmov.u8 d23, #255 \n" 254dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "vst4.8 {d20, d21, d22, d23}, [%3]! \n" 25518184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com "bgt 1b \n" 2569de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com : "+r"(src_y), // %0 2579de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(src_u), // %1 2589de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(src_v), // %2 2599de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(dst_abgr), // %3 2609de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(width) // %4 2619de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com : "r"(&kUVToRB), // %5 2629de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "r"(&kUVToG) // %6 26364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", 26464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 26593d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com ); 26693d003f87426197b3e4891348960decf07c8742cfrkoenig@google.com} 267fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.com 2689de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.comvoid I422ToRGBARow_NEON(const uint8* src_y, 2699de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com const uint8* src_u, 2709de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com const uint8* src_v, 2719de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com uint8* dst_rgba, 2722d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com int width) { 2732d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com asm volatile ( 2742c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d24}, [%5] \n" 2752c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d25}, [%6] \n" 2762d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com "vmov.u8 d26, #128 \n" 2772d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com "vmov.u16 q14, #74 \n" 2782d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com "vmov.u16 q15, #16 \n" 279c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 2802d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com "1: \n" 2814807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com READYUV422 2822d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com YUV422TORGB 2834807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "subs %4, %4, #8 \n" 2842d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com "vmov.u8 d19, #255 \n" 285dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "vst4.8 {d19, d20, d21, d22}, [%3]! \n" 2862d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com "bgt 1b \n" 2879de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com : "+r"(src_y), // %0 2889de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(src_u), // %1 2899de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(src_v), // %2 2909de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(dst_rgba), // %3 2919de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(width) // %4 2929de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com : "r"(&kUVToRB), // %5 2939de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "r"(&kUVToG) // %6 29464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", 29564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 2962d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com ); 2972d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com} 2982d61b7495dadc22eb1fa6aae29271f0f01bfdc02fbarchard@google.com 2999de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.comvoid I422ToRGB24Row_NEON(const uint8* src_y, 300834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com const uint8* src_u, 301834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com const uint8* src_v, 302834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com uint8* dst_rgb24, 303834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com int width) { 30464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com asm volatile ( 3052c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d24}, [%5] \n" 3062c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d25}, [%6] \n" 30764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmov.u8 d26, #128 \n" 30864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmov.u16 q14, #74 \n" 30964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmov.u16 q15, #16 \n" 310c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 31164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "1: \n" 31264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com READYUV422 31364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com YUV422TORGB 31464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "subs %4, %4, #8 \n" 31564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vst3.8 {d20, d21, d22}, [%3]! \n" 31664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "bgt 1b \n" 3179de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com : "+r"(src_y), // %0 3189de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(src_u), // %1 3199de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(src_v), // %2 3209de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(dst_rgb24), // %3 3219de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(width) // %4 3229de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com : "r"(&kUVToRB), // %5 3239de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "r"(&kUVToG) // %6 32464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", 32564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 32664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com ); 32764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com} 32864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 3299de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.comvoid I422ToRAWRow_NEON(const uint8* src_y, 3309de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com const uint8* src_u, 3319de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com const uint8* src_v, 3329de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com uint8* dst_raw, 33364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com int width) { 33464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com asm volatile ( 3352c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d24}, [%5] \n" 3362c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d25}, [%6] \n" 33764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmov.u8 d26, #128 \n" 33864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmov.u16 q14, #74 \n" 33964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vmov.u16 q15, #16 \n" 340c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 34164ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "1: \n" 34264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com READYUV422 34364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com YUV422TORGB 34464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "subs %4, %4, #8 \n" 34564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vswp.u8 d20, d22 \n" 34664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "vst3.8 {d20, d21, d22}, [%3]! \n" 34764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "bgt 1b \n" 3489de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com : "+r"(src_y), // %0 3499de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(src_u), // %1 3509de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(src_v), // %2 3519de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(dst_raw), // %3 35264ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "+r"(width) // %4 35364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com : "r"(&kUVToRB), // %5 35464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "r"(&kUVToG) // %6 35564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", 35664ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 35764ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com ); 35864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com} 35964ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com 36011c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com#define ARGBTORGB565 \ 36111c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vshr.u8 d20, d20, #3 \n" /* B */ \ 36211c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vshr.u8 d21, d21, #2 \n" /* G */ \ 36311c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vshr.u8 d22, d22, #3 \n" /* R */ \ 36411c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vmovl.u8 q8, d20 \n" /* B */ \ 36511c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vmovl.u8 q9, d21 \n" /* G */ \ 36611c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vmovl.u8 q10, d22 \n" /* R */ \ 36711c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vshl.u16 q9, q9, #5 \n" /* G */ \ 36811c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vshl.u16 q10, q10, #11 \n" /* R */ \ 36911c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vorr q0, q8, q9 \n" /* BG */ \ 37011c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vorr q0, q0, q10 \n" /* BGR */ 37111c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com 37215449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.comvoid I422ToRGB565Row_NEON(const uint8* src_y, 373834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com const uint8* src_u, 374834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com const uint8* src_v, 375834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com uint8* dst_rgb565, 376834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com int width) { 37715449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com asm volatile ( 3782c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d24}, [%5] \n" 3792c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d25}, [%6] \n" 38015449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com "vmov.u8 d26, #128 \n" 38115449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com "vmov.u16 q14, #74 \n" 38215449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com "vmov.u16 q15, #16 \n" 383c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 38415449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com "1: \n" 38515449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com READYUV422 38615449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com YUV422TORGB 38715449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com "subs %4, %4, #8 \n" 38811c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com ARGBTORGB565 38915449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com "vst1.8 {q0}, [%3]! \n" // store 8 pixels RGB565. 39015449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com "bgt 1b \n" 39115449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com : "+r"(src_y), // %0 39215449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com "+r"(src_u), // %1 39315449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com "+r"(src_v), // %2 39415449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com "+r"(dst_rgb565), // %3 39515449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com "+r"(width) // %4 39615449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com : "r"(&kUVToRB), // %5 39715449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com "r"(&kUVToG) // %6 39815449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", 39915449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 40015449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com ); 40115449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com} 40215449263c4bba75bc396dc3d60266efee6ab6c66fbarchard@google.com 40311c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com#define ARGBTOARGB1555 \ 40411c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vshr.u8 q10, q10, #3 \n" /* B */ \ 40511c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vshr.u8 d22, d22, #3 \n" /* R */ \ 40611c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vshr.u8 d23, d23, #7 \n" /* A */ \ 40711c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vmovl.u8 q8, d20 \n" /* B */ \ 40811c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vmovl.u8 q9, d21 \n" /* G */ \ 40911c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vmovl.u8 q10, d22 \n" /* R */ \ 41011c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vmovl.u8 q11, d23 \n" /* A */ \ 41111c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vshl.u16 q9, q9, #5 \n" /* G */ \ 41211c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vshl.u16 q10, q10, #10 \n" /* R */ \ 41311c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vshl.u16 q11, q11, #15 \n" /* A */ \ 41411c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vorr q0, q8, q9 \n" /* BG */ \ 41511c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vorr q1, q10, q11 \n" /* RA */ \ 41611c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vorr q0, q0, q1 \n" /* BGRA */ 41711c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com 41811c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.comvoid I422ToARGB1555Row_NEON(const uint8* src_y, 419834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com const uint8* src_u, 420834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com const uint8* src_v, 421834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com uint8* dst_argb1555, 422834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com int width) { 42311c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com asm volatile ( 4242c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d24}, [%5] \n" 4252c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d25}, [%6] \n" 42611c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vmov.u8 d26, #128 \n" 42711c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vmov.u16 q14, #74 \n" 42811c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vmov.u16 q15, #16 \n" 429c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 43011c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "1: \n" 43111c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com READYUV422 43211c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com YUV422TORGB 43311c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "subs %4, %4, #8 \n" 43411c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vmov.u8 d23, #255 \n" 43511c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com ARGBTOARGB1555 43611c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vst1.8 {q0}, [%3]! \n" // store 8 pixels ARGB1555. 43711c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "bgt 1b \n" 43811c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com : "+r"(src_y), // %0 43911c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "+r"(src_u), // %1 44011c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "+r"(src_v), // %2 44111c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "+r"(dst_argb1555), // %3 44211c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "+r"(width) // %4 44311c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com : "r"(&kUVToRB), // %5 44411c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "r"(&kUVToG) // %6 44511c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", 44611c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 44711c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com ); 44811c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com} 44911c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com 45011c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com#define ARGBTOARGB4444 \ 45111c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vshr.u8 d20, d20, #4 \n" /* B */ \ 45211c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vbic.32 d21, d21, d4 \n" /* G */ \ 45311c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vshr.u8 d22, d22, #4 \n" /* R */ \ 45411c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vbic.32 d23, d23, d4 \n" /* A */ \ 45511c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vorr d0, d20, d21 \n" /* BG */ \ 45611c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vorr d1, d22, d23 \n" /* RA */ \ 45711c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vzip.u8 d0, d1 \n" /* BGRA */ 45811c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com 45911c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.comvoid I422ToARGB4444Row_NEON(const uint8* src_y, 460834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com const uint8* src_u, 461834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com const uint8* src_v, 462834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com uint8* dst_argb4444, 463834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com int width) { 46411c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com asm volatile ( 4652c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d24}, [%5] \n" 4662c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d25}, [%6] \n" 46711c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vmov.u8 d26, #128 \n" 46811c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vmov.u16 q14, #74 \n" 46911c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vmov.u16 q15, #16 \n" 47011c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vmov.u8 d4, #0x0f \n" // bits to clear with vbic. 471c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 47211c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "1: \n" 47311c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com READYUV422 47411c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com YUV422TORGB 47511c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "subs %4, %4, #8 \n" 47611c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vmov.u8 d23, #255 \n" 47711c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com ARGBTOARGB4444 47811c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vst1.8 {q0}, [%3]! \n" // store 8 pixels ARGB4444. 47911c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "bgt 1b \n" 48011c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com : "+r"(src_y), // %0 48111c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "+r"(src_u), // %1 48211c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "+r"(src_v), // %2 48311c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "+r"(dst_argb4444), // %3 48411c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "+r"(width) // %4 48511c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com : "r"(&kUVToRB), // %5 48611c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "r"(&kUVToG) // %6 48711c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", 48811c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 48911c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com ); 49011c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com} 49111c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com 49200b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.comvoid YToARGBRow_NEON(const uint8* src_y, 49300b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com uint8* dst_argb, 49400b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com int width) { 49500b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com asm volatile ( 4962c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d24}, [%3] \n" 4972c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d25}, [%4] \n" 49800b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com "vmov.u8 d26, #128 \n" 49900b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com "vmov.u16 q14, #74 \n" 50000b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com "vmov.u16 q15, #16 \n" 501c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 50200b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com "1: \n" 50300b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com READYUV400 50400b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com YUV422TORGB 50500b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com "subs %2, %2, #8 \n" 50600b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com "vmov.u8 d23, #255 \n" 50700b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com "vst4.8 {d20, d21, d22, d23}, [%1]! \n" 50800b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com "bgt 1b \n" 50900b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com : "+r"(src_y), // %0 51000b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com "+r"(dst_argb), // %1 51100b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com "+r"(width) // %2 51200b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com : "r"(&kUVToRB), // %3 51300b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com "r"(&kUVToG) // %4 51400b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", 51500b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 51600b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com ); 51700b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com} 51800b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com 51900b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.comvoid I400ToARGBRow_NEON(const uint8* src_y, 52000b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com uint8* dst_argb, 52100b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com int width) { 52200b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com asm volatile ( 523c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 52400b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com "vmov.u8 d23, #255 \n" 52500b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com "1: \n" 5262c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d20}, [%0]! \n" 52700b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com "vmov d21, d20 \n" 52800b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com "vmov d22, d20 \n" 52900b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com "subs %2, %2, #8 \n" 53000b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com "vst4.8 {d20, d21, d22, d23}, [%1]! \n" 53100b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com "bgt 1b \n" 53200b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com : "+r"(src_y), // %0 53300b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com "+r"(dst_argb), // %1 53400b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com "+r"(width) // %2 53500b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com : 53600b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com : "cc", "memory", "d20", "d21", "d22", "d23" 53700b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com ); 53800b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com} 53900b69a2fe66183be5f72cb80c59f22e137b45359fbarchard@google.com 5409de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.comvoid NV12ToARGBRow_NEON(const uint8* src_y, 5419de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com const uint8* src_uv, 5429de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com uint8* dst_argb, 5434807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com int width) { 5444807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com asm volatile ( 5452c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d24}, [%4] \n" 5462c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d25}, [%5] \n" 5474807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "vmov.u8 d26, #128 \n" 5484807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "vmov.u16 q14, #74 \n" 5494807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "vmov.u16 q15, #16 \n" 550c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 5514807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "1: \n" 5524807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com READNV12 5534807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com YUV422TORGB 5544807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "subs %3, %3, #8 \n" 5554807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "vmov.u8 d23, #255 \n" 5564807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "vst4.8 {d20, d21, d22, d23}, [%2]! \n" 5574807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "bgt 1b \n" 5589de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com : "+r"(src_y), // %0 5599de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(src_uv), // %1 5609de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(dst_argb), // %2 5619de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(width) // %3 5629de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com : "r"(&kUVToRB), // %4 5639de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "r"(&kUVToG) // %5 56464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", 56564ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 5664807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com ); 5674807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com} 5684807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com 5699de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.comvoid NV21ToARGBRow_NEON(const uint8* src_y, 5709de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com const uint8* src_uv, 5719de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com uint8* dst_argb, 5724807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com int width) { 5734807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com asm volatile ( 5742c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d24}, [%4] \n" 5752c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d25}, [%5] \n" 5764807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "vmov.u8 d26, #128 \n" 5774807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "vmov.u16 q14, #74 \n" 5784807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "vmov.u16 q15, #16 \n" 579c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 5804807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "1: \n" 5814807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com READNV21 5824807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com YUV422TORGB 5834807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "subs %3, %3, #8 \n" 5844807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "vmov.u8 d23, #255 \n" 5854807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "vst4.8 {d20, d21, d22, d23}, [%2]! \n" 5864807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "bgt 1b \n" 5879de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com : "+r"(src_y), // %0 5889de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(src_uv), // %1 5899de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(dst_argb), // %2 5909de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "+r"(width) // %3 5919de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com : "r"(&kUVToRB), // %4 5929de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "r"(&kUVToG) // %5 59364ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", 59464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 5954807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com ); 5964807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com} 5974807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com 5989f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.comvoid NV12ToRGB565Row_NEON(const uint8* src_y, 599bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com const uint8* src_uv, 600bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com uint8* dst_rgb565, 601bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com int width) { 6029f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com asm volatile ( 6032c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d24}, [%4] \n" 6042c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d25}, [%5] \n" 6059f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "vmov.u8 d26, #128 \n" 6069f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "vmov.u16 q14, #74 \n" 6079f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "vmov.u16 q15, #16 \n" 608c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 6099f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "1: \n" 6109f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com READNV12 6119f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com YUV422TORGB 6129f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "subs %3, %3, #8 \n" 6139f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com ARGBTORGB565 6149f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "vst1.8 {q0}, [%2]! \n" // store 8 pixels RGB565. 6159f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "bgt 1b \n" 6169f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com : "+r"(src_y), // %0 6179f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "+r"(src_uv), // %1 6189f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "+r"(dst_rgb565), // %2 6199f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "+r"(width) // %3 6209f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com : "r"(&kUVToRB), // %4 6219f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "r"(&kUVToG) // %5 6229f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", 6239f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 6249f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com ); 6259f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com} 6269f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com 6279f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.comvoid NV21ToRGB565Row_NEON(const uint8* src_y, 628bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com const uint8* src_uv, 629bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com uint8* dst_rgb565, 630bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com int width) { 6319f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com asm volatile ( 6322c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d24}, [%4] \n" 6332c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d25}, [%5] \n" 6349f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "vmov.u8 d26, #128 \n" 6359f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "vmov.u16 q14, #74 \n" 6369f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "vmov.u16 q15, #16 \n" 637c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 6389f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "1: \n" 6399f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com READNV21 6409f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com YUV422TORGB 6419f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "subs %3, %3, #8 \n" 6429f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com ARGBTORGB565 6439f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "vst1.8 {q0}, [%2]! \n" // store 8 pixels RGB565. 6449f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "bgt 1b \n" 6459f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com : "+r"(src_y), // %0 6469f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "+r"(src_uv), // %1 6479f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "+r"(dst_rgb565), // %2 6489f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "+r"(width) // %3 6499f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com : "r"(&kUVToRB), // %4 6509f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "r"(&kUVToG) // %5 6519f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", 6529f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 6539f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com ); 6549f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com} 6559f2d404145e8a69d209eb130975442ab3f29d5a7fbarchard@google.com 656793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.comvoid YUY2ToARGBRow_NEON(const uint8* src_yuy2, 657793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com uint8* dst_argb, 658793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com int width) { 659793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com asm volatile ( 6602c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d24}, [%3] \n" 6612c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d25}, [%4] \n" 662793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "vmov.u8 d26, #128 \n" 663793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "vmov.u16 q14, #74 \n" 664793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "vmov.u16 q15, #16 \n" 665c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 666793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "1: \n" 667793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com READYUY2 668793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com YUV422TORGB 669793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "subs %2, %2, #8 \n" 670793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "vmov.u8 d23, #255 \n" 671793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "vst4.8 {d20, d21, d22, d23}, [%1]! \n" 672793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "bgt 1b \n" 673793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com : "+r"(src_yuy2), // %0 674793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "+r"(dst_argb), // %1 675793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "+r"(width) // %2 676793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com : "r"(&kUVToRB), // %3 677793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "r"(&kUVToG) // %4 678793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", 679793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 680793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com ); 681793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com} 682793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com 683793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.comvoid UYVYToARGBRow_NEON(const uint8* src_uyvy, 684793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com uint8* dst_argb, 685793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com int width) { 686793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com asm volatile ( 6872c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d24}, [%3] \n" 6882c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d25}, [%4] \n" 689793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "vmov.u8 d26, #128 \n" 690793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "vmov.u16 q14, #74 \n" 691793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "vmov.u16 q15, #16 \n" 692c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 693793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "1: \n" 694793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com READUYVY 695793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com YUV422TORGB 696793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "subs %2, %2, #8 \n" 697793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "vmov.u8 d23, #255 \n" 698793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "vst4.8 {d20, d21, d22, d23}, [%1]! \n" 699793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "bgt 1b \n" 700793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com : "+r"(src_uyvy), // %0 701793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "+r"(dst_argb), // %1 702793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "+r"(width) // %2 703793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com : "r"(&kUVToRB), // %3 704793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "r"(&kUVToG) // %4 705793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", 706793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 707793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com ); 708793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com} 709793e5a06ffe55a911f8aa3f4731ae681039952bcfbarchard@google.com 7104a86a836fcde981b6c3fd3f4a216a3253a2d26bcfbarchard@google.com// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v. 711f08ac6bb095348565b5259f2fab95f259ef47edefbarchard@google.comvoid SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, 712f08ac6bb095348565b5259f2fab95f259ef47edefbarchard@google.com int width) { 7135b22506b14367f38c1e31d1aab4dc1132158737ffbarchard@google.com asm volatile ( 714c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 7152d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com "1: \n" 7162c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld2.8 {q0, q1}, [%0]! \n" // load 16 pairs of UV 7174807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "subs %3, %3, #16 \n" // 16 processed per loop 7182c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {q0}, [%1]! \n" // store U 7192c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {q1}, [%2]! \n" // store V 72018184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com "bgt 1b \n" 7218536b2f389dea8f8b7177f4886d995e3315f12e8fbarchard@google.com : "+r"(src_uv), // %0 7228536b2f389dea8f8b7177f4886d995e3315f12e8fbarchard@google.com "+r"(dst_u), // %1 7238536b2f389dea8f8b7177f4886d995e3315f12e8fbarchard@google.com "+r"(dst_v), // %2 72416a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com "+r"(width) // %3 // Output registers 7252d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com : // Input registers 7268f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1" // Clobber List 7272d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com ); 7282d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com} 7292d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com 73062a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com// Reads 16 U's and V's and writes out 16 pairs of UV. 731f08ac6bb095348565b5259f2fab95f259ef47edefbarchard@google.comvoid MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv, 732f08ac6bb095348565b5259f2fab95f259ef47edefbarchard@google.com int width) { 73362a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com asm volatile ( 734c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 73562a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com "1: \n" 7362c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q0}, [%0]! \n" // load U 7372c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q1}, [%1]! \n" // load V 73862a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com "subs %3, %3, #16 \n" // 16 processed per loop 739d26b4514726a9a7476f6dfb6730cda2b422bf550fbarchard@google.com "vst2.u8 {q0, q1}, [%2]! \n" // store 16 pairs of UV 74062a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com "bgt 1b \n" 74162a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com : 74262a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com "+r"(src_u), // %0 74362a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com "+r"(src_v), // %1 74462a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com "+r"(dst_uv), // %2 74562a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com "+r"(width) // %3 // Output registers 74662a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com : // Input registers 7478f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1" // Clobber List 74862a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com ); 74962a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com} 750834b748d109d79dedc27b5022d2f959a6401591dfbarchard@google.com 7512c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com// Copy multiple of 32. vld4.8 allow unaligned and is fastest on a15. 75219932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.comvoid CopyRow_NEON(const uint8* src, uint8* dst, int count) { 7535b22506b14367f38c1e31d1aab4dc1132158737ffbarchard@google.com asm volatile ( 754c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 75519932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com "1: \n" 7562c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d0, d1, d2, d3}, [%0]! \n" // load 32 75762a961bee72e48e4fa14365bd7444c9280540b6ffbarchard@google.com "subs %2, %2, #32 \n" // 32 processed per loop 7582c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {d0, d1, d2, d3}, [%1]! \n" // store 32 75918184fd19dba08d6567357e3913285a779e4b9f3fbarchard@google.com "bgt 1b \n" 7603e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com : "+r"(src), // %0 7613e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "+r"(dst), // %1 7623e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "+r"(count) // %2 // Output registers 7633e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com : // Input registers 7648f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1" // Clobber List 76519932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com ); 76619932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com} 76719932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com 76864ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com// SetRow8 writes 'count' bytes using a 32 bit value repeated. 769f08ac6bb095348565b5259f2fab95f259ef47edefbarchard@google.comvoid SetRow_NEON(uint8* dst, uint32 v32, int count) { 7703e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com asm volatile ( 7714807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "vdup.u32 q0, %2 \n" // duplicate 4 ints 7724807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "1: \n" 7734807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "subs %1, %1, #16 \n" // 16 bytes per loop 7742c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {q0}, [%0]! \n" // store 7754807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "bgt 1b \n" 7763e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com : "+r"(dst), // %0 7773e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "+r"(count) // %1 7783e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com : "r"(v32) // %2 7798f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0" 7803e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com ); 7814807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com} 7824807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com 7834807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com// TODO(fbarchard): Make fully assembler 78464ce0ab544591b1e26ae6d276932cacdb8137071fbarchard@google.com// SetRow32 writes 'count' words using a 32 bit value repeated. 785f08ac6bb095348565b5259f2fab95f259ef47edefbarchard@google.comvoid ARGBSetRows_NEON(uint8* dst, uint32 v32, int width, 7861b40d8caa2811759aa5fa87f2e23061d26f8968cfbarchard@google.com int dst_stride, int height) { 7874807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com for (int y = 0; y < height; ++y) { 788f08ac6bb095348565b5259f2fab95f259ef47edefbarchard@google.com SetRow_NEON(dst, v32, width << 2); 7894807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com dst += dst_stride; 7904807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com } 7914807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com} 7924807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com 79316a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.comvoid MirrorRow_NEON(const uint8* src, uint8* dst, int width) { 7945b22506b14367f38c1e31d1aab4dc1132158737ffbarchard@google.com asm volatile ( 7953e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com // Start at end of source row. 7963e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "mov r3, #-16 \n" 7973e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "add %0, %0, %2 \n" 7983e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "sub %0, #16 \n" 7993e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com 800c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 80182069e7a7108a956d0d78f7e923851925b2827d4fbarchard@google.com "1: \n" 8023e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "vld1.8 {q0}, [%0], r3 \n" // src -= 16 8033e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "subs %2, #16 \n" // 16 pixels per loop. 8043e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "vrev64.8 q0, q0 \n" 8053e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "vst1.8 {d1}, [%1]! \n" // dst += 16 8063e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "vst1.8 {d0}, [%1]! \n" 8073e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "bgt 1b \n" 8083e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com : "+r"(src), // %0 8093e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "+r"(dst), // %1 8103e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "+r"(width) // %2 8113e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com : 8128f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "r3", "q0" 81316a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com ); 81416a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com} 81516a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com 816752cb9e057c8c36a251810e57f98f195196fedc6fbarchard@google.comvoid MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, 817752cb9e057c8c36a251810e57f98f195196fedc6fbarchard@google.com int width) { 8185b22506b14367f38c1e31d1aab4dc1132158737ffbarchard@google.com asm volatile ( 8193e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com // Start at end of source row. 820752cb9e057c8c36a251810e57f98f195196fedc6fbarchard@google.com "mov r12, #-16 \n" 8213e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "add %0, %0, %3, lsl #1 \n" 8223e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "sub %0, #16 \n" 8233e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com 824c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 82582069e7a7108a956d0d78f7e923851925b2827d4fbarchard@google.com "1: \n" 826752cb9e057c8c36a251810e57f98f195196fedc6fbarchard@google.com "vld2.8 {d0, d1}, [%0], r12 \n" // src -= 16 8273e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "subs %3, #8 \n" // 8 pixels per loop. 8283e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "vrev64.8 q0, q0 \n" 8293e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "vst1.8 {d0}, [%1]! \n" // dst += 8 8303e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "vst1.8 {d1}, [%2]! \n" 8313e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "bgt 1b \n" 8323e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com : "+r"(src_uv), // %0 8333e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "+r"(dst_u), // %1 8343e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "+r"(dst_v), // %2 8353e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "+r"(width) // %3 8363e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com : 8378f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "r12", "q0" 83816a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com ); 83916a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com} 8403e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com 8413e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.comvoid ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) { 8423e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com asm volatile ( 8433e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com // Start at end of source row. 8443e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "mov r3, #-16 \n" 8453e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "add %0, %0, %2, lsl #2 \n" 8463e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "sub %0, #16 \n" 8473e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com 848c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 8493e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "1: \n" 8503e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "vld1.8 {q0}, [%0], r3 \n" // src -= 16 8513e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "subs %2, #4 \n" // 4 pixels per loop. 8523e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "vrev64.32 q0, q0 \n" 8533e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "vst1.8 {d1}, [%1]! \n" // dst += 16 8543e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "vst1.8 {d0}, [%1]! \n" 8553e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "bgt 1b \n" 8563e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com : "+r"(src), // %0 8573e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "+r"(dst), // %1 8583e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "+r"(width) // %2 8593e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com : 8608f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "r3", "q0" 8613e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com ); 8623e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com} 86316a96645b4987fddbcf726dea2fcf5dc87ca10e1fbarchard@google.com 864797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.comvoid RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix) { 865797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com asm volatile ( 866275913410d879df91edd1648dfee19207870264bfbarchard@google.com "vmov.u8 d4, #255 \n" // Alpha 867c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 868797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com "1: \n" 869275913410d879df91edd1648dfee19207870264bfbarchard@google.com "vld3.8 {d1, d2, d3}, [%0]! \n" // load 8 pixels of RGB24. 87082069e7a7108a956d0d78f7e923851925b2827d4fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 8714807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "vst4.8 {d1, d2, d3, d4}, [%1]! \n" // store 8 pixels of ARGB. 872797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com "bgt 1b \n" 873797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com : "+r"(src_rgb24), // %0 874dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "+r"(dst_argb), // %1 875dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "+r"(pix) // %2 876797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com : 8778f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List 878797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com ); 879797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com} 880797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com 881797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.comvoid RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int pix) { 882797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com asm volatile ( 883275913410d879df91edd1648dfee19207870264bfbarchard@google.com "vmov.u8 d4, #255 \n" // Alpha 884c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 885797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com "1: \n" 886275913410d879df91edd1648dfee19207870264bfbarchard@google.com "vld3.8 {d1, d2, d3}, [%0]! \n" // load 8 pixels of RAW. 8874807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 888275913410d879df91edd1648dfee19207870264bfbarchard@google.com "vswp.u8 d1, d3 \n" // swap R, B 889275913410d879df91edd1648dfee19207870264bfbarchard@google.com "vst4.8 {d1, d2, d3, d4}, [%1]! \n" // store 8 pixels of ARGB. 890797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com "bgt 1b \n" 891dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com : "+r"(src_raw), // %0 892797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com "+r"(dst_argb), // %1 893797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com "+r"(pix) // %2 894797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com : 8958f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List 896797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com ); 897797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com} 898797e9ef871912dc927b80edeffdee0c614142a51fbarchard@google.com 899bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com#define RGB565TOARGB \ 900f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vshrn.u16 d6, q0, #5 \n" /* G xxGGGGGG */ \ 901f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vuzp.u8 d0, d1 \n" /* d0 xxxBBBBB RRRRRxxx */ \ 902f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vshl.u8 d6, d6, #2 \n" /* G GGGGGG00 upper 6 */ \ 903f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vshr.u8 d1, d1, #3 \n" /* R 000RRRRR lower 5 */ \ 904f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vshl.u8 q0, q0, #3 \n" /* B,R BBBBB000 upper 5 */ \ 905f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vshr.u8 q2, q0, #5 \n" /* B,R 00000BBB lower 3 */ \ 906f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vorr.u8 d0, d0, d4 \n" /* B */ \ 907f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vshr.u8 d4, d6, #6 \n" /* G 000000GG lower 2 */ \ 908f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vorr.u8 d2, d1, d5 \n" /* R */ \ 909f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vorr.u8 d1, d4, d6 \n" /* G */ 910bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com 911bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.comvoid RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int pix) { 912bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com asm volatile ( 913bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d3, #255 \n" // Alpha 914c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 915bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "1: \n" 9164b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vld1.8 {q0}, [%0]! \n" // load 8 RGB565 pixels. 917bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 918bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com RGB565TOARGB 919bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB. 920bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "bgt 1b \n" 921bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com : "+r"(src_rgb565), // %0 922bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "+r"(dst_argb), // %1 923bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "+r"(pix) // %2 924bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com : 9258f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List 926bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com ); 927bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com} 928bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com 9294b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com#define ARGB1555TOARGB \ 9304b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vshrn.u16 d7, q0, #8 \n" /* A Arrrrrxx */ \ 9314b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vshr.u8 d6, d7, #2 \n" /* R xxxRRRRR */ \ 9324b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vshrn.u16 d5, q0, #5 \n" /* G xxxGGGGG */ \ 9334b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vmovn.u16 d4, q0 \n" /* B xxxBBBBB */ \ 9344b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vshr.u8 d7, d7, #7 \n" /* A 0000000A */ \ 9354b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vneg.s8 d7, d7 \n" /* A AAAAAAAA upper 8 */ \ 9364b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vshl.u8 d6, d6, #3 \n" /* R RRRRR000 upper 5 */ \ 9374b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vshr.u8 q1, q3, #5 \n" /* R,A 00000RRR lower 3 */ \ 9384b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vshl.u8 q0, q2, #3 \n" /* B,G BBBBB000 upper 5 */ \ 9394b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vshr.u8 q2, q0, #5 \n" /* B,G 00000BBB lower 3 */ \ 9404b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vorr.u8 q1, q1, q3 \n" /* R,A */ \ 9414b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vorr.u8 q0, q0, q2 \n" /* B,G */ \ 9424b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com 943522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com// RGB555TOARGB is same as ARGB1555TOARGB but ignores alpha. 944522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com#define RGB555TOARGB \ 945522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vshrn.u16 d6, q0, #5 \n" /* G xxxGGGGG */ \ 946522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vuzp.u8 d0, d1 \n" /* d0 xxxBBBBB xRRRRRxx */ \ 947522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vshl.u8 d6, d6, #3 \n" /* G GGGGG000 upper 5 */ \ 948522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vshr.u8 d1, d1, #2 \n" /* R 00xRRRRR lower 5 */ \ 949522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vshl.u8 q0, q0, #3 \n" /* B,R BBBBB000 upper 5 */ \ 950522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vshr.u8 q2, q0, #5 \n" /* B,R 00000BBB lower 3 */ \ 951522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vorr.u8 d0, d0, d4 \n" /* B */ \ 952522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vshr.u8 d4, d6, #5 \n" /* G 00000GGG lower 3 */ \ 953522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vorr.u8 d2, d1, d5 \n" /* R */ \ 954522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vorr.u8 d1, d4, d6 \n" /* G */ 955522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com 9564b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.comvoid ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb, 9574b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com int pix) { 9584b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com asm volatile ( 9594b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vmov.u8 d3, #255 \n" // Alpha 960c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 9614b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "1: \n" 9624b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vld1.8 {q0}, [%0]! \n" // load 8 ARGB1555 pixels. 9634b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 9644b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com ARGB1555TOARGB 9654b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB. 9664b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "bgt 1b \n" 9674b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com : "+r"(src_argb1555), // %0 9684b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "+r"(dst_argb), // %1 9694b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "+r"(pix) // %2 9704b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com : 9718f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List 9724b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com ); 9734b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com} 9744b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com 9754b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com#define ARGB4444TOARGB \ 9764b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vuzp.u8 d0, d1 \n" /* d0 BG, d1 RA */ \ 9774b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vshl.u8 q2, q0, #4 \n" /* B,R BBBB0000 */ \ 9784b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vshr.u8 q1, q0, #4 \n" /* G,A 0000GGGG */ \ 9794b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vshr.u8 q0, q2, #4 \n" /* B,R 0000BBBB */ \ 9804b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vorr.u8 q0, q0, q2 \n" /* B,R BBBBBBBB */ \ 9814b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vshl.u8 q2, q1, #4 \n" /* G,A GGGG0000 */ \ 9824b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vorr.u8 q1, q1, q2 \n" /* G,A GGGGGGGG */ \ 9834b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vswp.u8 d1, d2 \n" /* B,R,G,A -> B,G,R,A */ 9844b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com 9854b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.comvoid ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb, 9864b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com int pix) { 9874b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com asm volatile ( 9884b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vmov.u8 d3, #255 \n" // Alpha 989c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 9904b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "1: \n" 9914b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vld1.8 {q0}, [%0]! \n" // load 8 ARGB4444 pixels. 9924b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 9934b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com ARGB4444TOARGB 9944b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB. 9954b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "bgt 1b \n" 9964b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com : "+r"(src_argb4444), // %0 9974b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "+r"(dst_argb), // %1 9984b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com "+r"(pix) // %2 9994b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com : 10008f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1", "q2" // Clobber List 10014b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com ); 10024b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com} 10034b4a32cb17596321ccee7ba3179bcd3ad6e2c81efbarchard@google.com 100464961c01b200a77b4af9629bf1215358ec056f0afbarchard@google.comvoid ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb24, int pix) { 100564961c01b200a77b4af9629bf1215358ec056f0afbarchard@google.com asm volatile ( 1006c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 100764961c01b200a77b4af9629bf1215358ec056f0afbarchard@google.com "1: \n" 1008275913410d879df91edd1648dfee19207870264bfbarchard@google.com "vld4.8 {d1, d2, d3, d4}, [%0]! \n" // load 8 pixels of ARGB. 100982069e7a7108a956d0d78f7e923851925b2827d4fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 10104807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "vst3.8 {d1, d2, d3}, [%1]! \n" // store 8 pixels of RGB24. 101164961c01b200a77b4af9629bf1215358ec056f0afbarchard@google.com "bgt 1b \n" 101264961c01b200a77b4af9629bf1215358ec056f0afbarchard@google.com : "+r"(src_argb), // %0 101364961c01b200a77b4af9629bf1215358ec056f0afbarchard@google.com "+r"(dst_rgb24), // %1 101464961c01b200a77b4af9629bf1215358ec056f0afbarchard@google.com "+r"(pix) // %2 101564961c01b200a77b4af9629bf1215358ec056f0afbarchard@google.com : 10168f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List 101764961c01b200a77b4af9629bf1215358ec056f0afbarchard@google.com ); 101864961c01b200a77b4af9629bf1215358ec056f0afbarchard@google.com} 101964961c01b200a77b4af9629bf1215358ec056f0afbarchard@google.com 10205808cb22ce60bf963e15bfb1a0958cb362f5efbcfbarchard@google.comvoid ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_raw, int pix) { 10215808cb22ce60bf963e15bfb1a0958cb362f5efbcfbarchard@google.com asm volatile ( 1022c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 10235808cb22ce60bf963e15bfb1a0958cb362f5efbcfbarchard@google.com "1: \n" 1024275913410d879df91edd1648dfee19207870264bfbarchard@google.com "vld4.8 {d1, d2, d3, d4}, [%0]! \n" // load 8 pixels of ARGB. 10254807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 1026275913410d879df91edd1648dfee19207870264bfbarchard@google.com "vswp.u8 d1, d3 \n" // swap R, B 1027275913410d879df91edd1648dfee19207870264bfbarchard@google.com "vst3.8 {d1, d2, d3}, [%1]! \n" // store 8 pixels of RAW. 10285808cb22ce60bf963e15bfb1a0958cb362f5efbcfbarchard@google.com "bgt 1b \n" 10295808cb22ce60bf963e15bfb1a0958cb362f5efbcfbarchard@google.com : "+r"(src_argb), // %0 10305808cb22ce60bf963e15bfb1a0958cb362f5efbcfbarchard@google.com "+r"(dst_raw), // %1 10315808cb22ce60bf963e15bfb1a0958cb362f5efbcfbarchard@google.com "+r"(pix) // %2 10325808cb22ce60bf963e15bfb1a0958cb362f5efbcfbarchard@google.com : 10338f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List 10345808cb22ce60bf963e15bfb1a0958cb362f5efbcfbarchard@google.com ); 10355808cb22ce60bf963e15bfb1a0958cb362f5efbcfbarchard@google.com} 10365808cb22ce60bf963e15bfb1a0958cb362f5efbcfbarchard@google.com 1037dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.comvoid YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int pix) { 1038dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com asm volatile ( 1039c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 1040dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "1: \n" 10412c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld2.8 {q0, q1}, [%0]! \n" // load 16 pixels of YUY2. 104282069e7a7108a956d0d78f7e923851925b2827d4fbarchard@google.com "subs %2, %2, #16 \n" // 16 processed per loop. 10432c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {q0}, [%1]! \n" // store 16 pixels of Y. 1044dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "bgt 1b \n" 1045dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com : "+r"(src_yuy2), // %0 1046dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "+r"(dst_y), // %1 1047dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "+r"(pix) // %2 1048dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com : 10498f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1" // Clobber List 1050dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com ); 1051dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com} 1052dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com 1053dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.comvoid UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int pix) { 1054dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com asm volatile ( 1055c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 1056dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "1: \n" 10572c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld2.8 {q0, q1}, [%0]! \n" // load 16 pixels of UYVY. 105882069e7a7108a956d0d78f7e923851925b2827d4fbarchard@google.com "subs %2, %2, #16 \n" // 16 processed per loop. 10592c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {q1}, [%1]! \n" // store 16 pixels of Y. 1060dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "bgt 1b \n" 1061dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com : "+r"(src_uyvy), // %0 1062dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "+r"(dst_y), // %1 1063dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "+r"(pix) // %2 1064dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com : 10658f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1" // Clobber List 1066dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com ); 1067dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com} 1068dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com 1069dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.comvoid YUY2ToUV422Row_NEON(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v, 1070dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com int pix) { 1071dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com asm volatile ( 1072c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 1073dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "1: \n" 1074dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of YUY2. 10754807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "subs %3, %3, #16 \n" // 16 pixels = 8 UVs. 10762c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {d1}, [%1]! \n" // store 8 U. 10772c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {d3}, [%2]! \n" // store 8 V. 1078dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "bgt 1b \n" 1079dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com : "+r"(src_yuy2), // %0 1080dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "+r"(dst_u), // %1 1081dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "+r"(dst_v), // %2 1082dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "+r"(pix) // %3 1083dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com : 10848f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "d0", "d1", "d2", "d3" // Clobber List 1085dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com ); 1086dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com} 1087dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com 1088dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.comvoid UYVYToUV422Row_NEON(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v, 1089dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com int pix) { 1090dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com asm volatile ( 1091c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 1092dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "1: \n" 1093dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of UYVY. 10944807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "subs %3, %3, #16 \n" // 16 pixels = 8 UVs. 10952c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {d0}, [%1]! \n" // store 8 U. 10962c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {d2}, [%2]! \n" // store 8 V. 1097dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "bgt 1b \n" 1098dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com : "+r"(src_uyvy), // %0 1099dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "+r"(dst_u), // %1 1100dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "+r"(dst_v), // %2 1101dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "+r"(pix) // %3 1102dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com : 11038f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "d0", "d1", "d2", "d3" // Clobber List 1104dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com ); 1105dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com} 1106dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com 1107dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.comvoid YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2, 1108dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com uint8* dst_u, uint8* dst_v, int pix) { 1109dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com asm volatile ( 1110dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com "add %1, %0, %1 \n" // stride + src_yuy2 1111c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 1112dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "1: \n" 1113dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of YUY2. 11144807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "subs %4, %4, #16 \n" // 16 pixels = 8 UVs. 1115dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load next row YUY2. 1116dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "vrhadd.u8 d1, d1, d5 \n" // average rows of U 1117dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "vrhadd.u8 d3, d3, d7 \n" // average rows of V 11182c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {d1}, [%2]! \n" // store 8 U. 11192c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {d3}, [%3]! \n" // store 8 V. 1120dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "bgt 1b \n" 1121cc206dee46f09369d5b4dad79ec662fd4289959cfbarchard@google.com : "+r"(src_yuy2), // %0 1122dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "+r"(stride_yuy2), // %1 1123cc206dee46f09369d5b4dad79ec662fd4289959cfbarchard@google.com "+r"(dst_u), // %2 1124cc206dee46f09369d5b4dad79ec662fd4289959cfbarchard@google.com "+r"(dst_v), // %3 1125cc206dee46f09369d5b4dad79ec662fd4289959cfbarchard@google.com "+r"(pix) // %4 1126dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com : 11278f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7" // Clobber List 1128dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com ); 1129dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com} 1130dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com 1131dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.comvoid UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy, 1132dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com uint8* dst_u, uint8* dst_v, int pix) { 1133dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com asm volatile ( 1134dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com "add %1, %0, %1 \n" // stride + src_uyvy 1135c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 1136dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "1: \n" 1137dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of UYVY. 11384807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com "subs %4, %4, #16 \n" // 16 pixels = 8 UVs. 1139dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load next row UYVY. 1140dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "vrhadd.u8 d0, d0, d4 \n" // average rows of U 1141dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "vrhadd.u8 d2, d2, d6 \n" // average rows of V 11422c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {d0}, [%2]! \n" // store 8 U. 11432c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {d2}, [%3]! \n" // store 8 V. 1144dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "bgt 1b \n" 1145cc206dee46f09369d5b4dad79ec662fd4289959cfbarchard@google.com : "+r"(src_uyvy), // %0 1146dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com "+r"(stride_uyvy), // %1 1147cc206dee46f09369d5b4dad79ec662fd4289959cfbarchard@google.com "+r"(dst_u), // %2 1148cc206dee46f09369d5b4dad79ec662fd4289959cfbarchard@google.com "+r"(dst_v), // %3 1149cc206dee46f09369d5b4dad79ec662fd4289959cfbarchard@google.com "+r"(pix) // %4 1150dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com : 11518f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7" // Clobber List 1152dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com ); 1153dddf94c343c9a6413468a334e9fd965e4b1b3eb7fbarchard@google.com} 11544807dea4e731cef5038892474458be07c346fbf3fbarchard@google.com 1155e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.comvoid HalfRow_NEON(const uint8* src_uv, int src_uv_stride, 1156e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com uint8* dst_uv, int pix) { 1157e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com asm volatile ( 1158e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com // change the stride to row 2 pointer 1159e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com "add %1, %0 \n" 1160f658aebb7c2e072ad5b94ad82b94b8299649bdf2fbarchard@google.com "1: \n" 11612c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q0}, [%0]! \n" // load row 1 16 pixels. 1162e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com "subs %3, %3, #16 \n" // 16 processed per loop 11632c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q1}, [%1]! \n" // load row 2 16 pixels. 1164e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com "vrhadd.u8 q0, q1 \n" // average row 1 and 2 11652c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {q0}, [%2]! \n" 1166e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com "bgt 1b \n" 11673e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com : "+r"(src_uv), // %0 11683e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "+r"(src_uv_stride), // %1 11693e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "+r"(dst_uv), // %2 11703e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "+r"(pix) // %3 11713e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com : 11728f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1" // Clobber List 11733e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com ); 1174e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com} 1175e91bdaca3674830570cbb2aaab6d5c939f56dee4fbarchard@google.com 11768d37dd5c205216e0ad13c5091061908cb981c5f9fbarchard@google.com// Select 2 channels from ARGB on alternating pixels. e.g. BGBGBGBG 11771096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.comvoid ARGBToBayerRow_NEON(const uint8* src_argb, uint8* dst_bayer, 11781096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com uint32 selector, int pix) { 1179f658aebb7c2e072ad5b94ad82b94b8299649bdf2fbarchard@google.com asm volatile ( 1180c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vmov.u32 d6[0], %3 \n" // selector 1181f658aebb7c2e072ad5b94ad82b94b8299649bdf2fbarchard@google.com "1: \n" 11822c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q0, q1}, [%0]! \n" // load row 8 pixels. 1183c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop 1184c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vtbl.8 d4, {d0, d1}, d6 \n" // look up 4 pixels 1185c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vtbl.8 d5, {d2, d3}, d6 \n" // look up 4 pixels 1186c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vtrn.u32 d4, d5 \n" // combine 8 pixels 11872c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {d4}, [%1]! \n" // store 8. 1188f658aebb7c2e072ad5b94ad82b94b8299649bdf2fbarchard@google.com "bgt 1b \n" 11894f4c8b544cf0d4c30cca0baaa19d8be29c79ed2ffbarchard@google.com : "+r"(src_argb), // %0 11904f4c8b544cf0d4c30cca0baaa19d8be29c79ed2ffbarchard@google.com "+r"(dst_bayer), // %1 11914f4c8b544cf0d4c30cca0baaa19d8be29c79ed2ffbarchard@google.com "+r"(pix) // %2 11924f4c8b544cf0d4c30cca0baaa19d8be29c79ed2ffbarchard@google.com : "r"(selector) // %3 1193c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List 11941096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com ); 11951096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com} 11961096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com 119708b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com// Select G channels from ARGB. e.g. GGGGGGGG 119808b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.comvoid ARGBToBayerGGRow_NEON(const uint8* src_argb, uint8* dst_bayer, 119908b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com uint32 /*selector*/, int pix) { 120008b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com asm volatile ( 120108b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com "1: \n" 120208b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load row 8 pixels. 120308b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop 120408b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com "vst1.8 {d1}, [%1]! \n" // store 8 G's. 120508b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com "bgt 1b \n" 120608b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com : "+r"(src_argb), // %0 120708b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com "+r"(dst_bayer), // %1 120808b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com "+r"(pix) // %2 120908b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com : 121008b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com : "cc", "memory", "q0", "q1" // Clobber List 121108b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com ); 121208b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com} 121308b24a4232600b2f9f21584f34f6868d8c15c215fbarchard@google.com 12141096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. 12151096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.comvoid ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb, 12161096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com const uint8* shuffler, int pix) { 12171096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com asm volatile ( 12182c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q2}, [%3] \n" // shuffler 12191096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com "1: \n" 12202c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q0}, [%0]! \n" // load 4 pixels. 12211096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com "subs %2, %2, #4 \n" // 4 processed per loop 12221096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com "vtbl.8 d2, {d0, d1}, d4 \n" // look up 2 first pixels 12231096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com "vtbl.8 d3, {d0, d1}, d5 \n" // look up 2 next pixels 12242c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {q1}, [%1]! \n" // store 4. 12251096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com "bgt 1b \n" 12261096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com : "+r"(src_argb), // %0 12271096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com "+r"(dst_argb), // %1 12281096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com "+r"(pix) // %2 12291096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com : "r"(shuffler) // %3 12301096543eaa1e596a93ba5d3863e637dc489e32ccfbarchard@google.com : "cc", "memory", "q0", "q1", "q2" // Clobber List 12313e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com ); 12328d37dd5c205216e0ad13c5091061908cb981c5f9fbarchard@google.com} 12338d37dd5c205216e0ad13c5091061908cb981c5f9fbarchard@google.com 12349de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.comvoid I422ToYUY2Row_NEON(const uint8* src_y, 12359de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com const uint8* src_u, 12369de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com const uint8* src_v, 12379de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com uint8* dst_yuy2, int width) { 12389de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com asm volatile ( 1239c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 12409de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "1: \n" 12419de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "vld2.8 {d0, d2}, [%0]! \n" // load 16 Ys 12429de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "vld1.8 {d1}, [%1]! \n" // load 8 Us 12439de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "vld1.8 {d3}, [%2]! \n" // load 8 Vs 12449de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "subs %4, %4, #16 \n" // 16 pixels 12452c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst4.8 {d0, d1, d2, d3}, [%3]! \n" // Store 8 YUY2/16 pixels. 12469de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "bgt 1b \n" 12473e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com : "+r"(src_y), // %0 12483e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "+r"(src_u), // %1 12493e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "+r"(src_v), // %2 12503e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "+r"(dst_yuy2), // %3 12513e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "+r"(width) // %4 12523e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com : 12533e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com : "cc", "memory", "d0", "d1", "d2", "d3" 12549de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com ); 12559de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com} 12569de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com 12579de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.comvoid I422ToUYVYRow_NEON(const uint8* src_y, 12589de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com const uint8* src_u, 12599de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com const uint8* src_v, 12609de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com uint8* dst_uyvy, int width) { 12619de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com asm volatile ( 1262c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 12639de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "1: \n" 12649de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "vld2.8 {d1, d3}, [%0]! \n" // load 16 Ys 12659de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "vld1.8 {d0}, [%1]! \n" // load 8 Us 12669de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "vld1.8 {d2}, [%2]! \n" // load 8 Vs 12679de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "subs %4, %4, #16 \n" // 16 pixels 12682c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst4.8 {d0, d1, d2, d3}, [%3]! \n" // Store 8 UYVY/16 pixels. 12699de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com "bgt 1b \n" 12703e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com : "+r"(src_y), // %0 12713e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "+r"(src_u), // %1 12723e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "+r"(src_v), // %2 12733e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "+r"(dst_uyvy), // %3 12743e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com "+r"(width) // %4 12753e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com : 12763e46444727a0524d2f1d81117e0b1404148ac910fbarchard@google.com : "cc", "memory", "d0", "d1", "d2", "d3" 12779de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com ); 12789de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com} 12799de8867ab636128ff667fdf50d3ede83e861d97afbarchard@google.com 12801bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.comvoid ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb565, int pix) { 12811bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com asm volatile ( 1282c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 12831bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com "1: \n" 128411c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vld4.8 {d20, d21, d22, d23}, [%0]! \n" // load 8 pixels of ARGB. 12851bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 128611c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com ARGBTORGB565 12871bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com "vst1.8 {q0}, [%1]! \n" // store 8 pixels RGB565. 12881bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com "bgt 1b \n" 12891bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com : "+r"(src_argb), // %0 12901bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com "+r"(dst_rgb565), // %1 12911bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com "+r"(pix) // %2 12921bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com : 12938f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q8", "q9", "q10", "q11" 12941bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com ); 12951bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com} 12961bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com 12971bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.comvoid ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_argb1555, 12981bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com int pix) { 12991bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com asm volatile ( 1300c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 13011bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com "1: \n" 130211c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vld4.8 {d20, d21, d22, d23}, [%0]! \n" // load 8 pixels of ARGB. 13031bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 130411c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com ARGBTOARGB1555 13051bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com "vst1.8 {q0}, [%1]! \n" // store 8 pixels ARGB1555. 13061bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com "bgt 1b \n" 13071bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com : "+r"(src_argb), // %0 13081bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com "+r"(dst_argb1555), // %1 13091bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com "+r"(pix) // %2 13101bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com : 13118f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q8", "q9", "q10", "q11" 13121bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com ); 13131bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com} 13141bdcc4c3e3d9207252b57b0b7f99b4113caa7d34fbarchard@google.com 1315bb6bddc9fb4aea694ef26d7761d9fbcba8f5b6c1fbarchard@google.comvoid ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_argb4444, 1316bb6bddc9fb4aea694ef26d7761d9fbcba8f5b6c1fbarchard@google.com int pix) { 1317c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com asm volatile ( 1318c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com "vmov.u8 d4, #0x0f \n" // bits to clear with vbic. 1319c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 1320c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com "1: \n" 132111c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com "vld4.8 {d20, d21, d22, d23}, [%0]! \n" // load 8 pixels of ARGB. 1322c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 132311c6d32afc1dd86ef530a4646585a659a7cafd9ffbarchard@google.com ARGBTOARGB4444 1324c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com "vst1.8 {q0}, [%1]! \n" // store 8 pixels ARGB4444. 1325c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com "bgt 1b \n" 1326bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com : "+r"(src_argb), // %0 1327c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com "+r"(dst_argb4444), // %1 1328bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "+r"(pix) // %2 1329c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com : 13308f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q8", "q9", "q10", "q11" 1331c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com ); 1332c389e8e3b14bcdbb4dce7a5667abba98a65cf1b6fbarchard@google.com} 13330908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com 13340908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.comvoid ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) { 13350908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com asm volatile ( 1336bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient 1337bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient 1338bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient 1339bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d27, #16 \n" // Add 16 constant 1340c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 1341bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "1: \n" 13421dee6250936424ced8722329369da75935d61580fbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. 1343bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 1344bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmull.u8 q2, d0, d24 \n" // B 1345bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmlal.u8 q2, d1, d25 \n" // G 1346bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmlal.u8 q2, d2, d26 \n" // R 1347bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y 1348bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vqadd.u8 d0, d27 \n" 1349bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. 1350bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "bgt 1b \n" 1351bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com : "+r"(src_argb), // %0 1352bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "+r"(dst_y), // %1 1353bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "+r"(pix) // %2 1354bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com : 13558f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q12", "q13" 1356bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com ); 1357bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com} 1358bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com 1359cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.comvoid ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) { 1360cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com asm volatile ( 1361050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "vmov.u8 d24, #15 \n" // B * 0.11400 coefficient 1362050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "vmov.u8 d25, #75 \n" // G * 0.58700 coefficient 1363050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "vmov.u8 d26, #38 \n" // R * 0.29900 coefficient 1364c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 1365cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com "1: \n" 1366cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. 1367cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 1368cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com "vmull.u8 q2, d0, d24 \n" // B 1369cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com "vmlal.u8 q2, d1, d25 \n" // G 1370cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com "vmlal.u8 q2, d2, d26 \n" // R 1371050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "vqrshrun.s16 d0, q2, #7 \n" // 15 bit to 8 bit Y 1372cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. 1373cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com "bgt 1b \n" 1374cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com : "+r"(src_argb), // %0 1375cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com "+r"(dst_y), // %1 1376cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com "+r"(pix) // %2 1377cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com : 1378cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q12", "q13" 1379cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com ); 1380cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com} 1381cfaa66c04154eec91951e2596cfe55eb6f2f749efbarchard@google.com 1382c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com// 8x1 pixels. 1383c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.comvoid ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, 1384c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com int pix) { 1385c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com asm volatile ( 1386c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vmov.u8 d24, #112 \n" // UB / VR 0.875 coefficient 1387c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vmov.u8 d25, #74 \n" // UG -0.5781 coefficient 1388c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vmov.u8 d26, #38 \n" // UR -0.2969 coefficient 1389c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vmov.u8 d27, #18 \n" // VB -0.1406 coefficient 1390c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vmov.u8 d28, #94 \n" // VG -0.7344 coefficient 1391c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vmov.u16 q15, #0x8080 \n" // 128.5 1392c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 1393c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com "1: \n" 1394c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. 1395c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com "subs %3, %3, #8 \n" // 8 processed per loop. 1396c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com "vmull.u8 q2, d0, d24 \n" // B 1397c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com "vmlsl.u8 q2, d1, d25 \n" // G 1398c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com "vmlsl.u8 q2, d2, d26 \n" // R 1399c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com "vadd.u16 q2, q2, q15 \n" // +128 -> unsigned 1400c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com 1401c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com "vmull.u8 q3, d2, d24 \n" // R 1402c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com "vmlsl.u8 q3, d1, d28 \n" // G 1403c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com "vmlsl.u8 q3, d0, d27 \n" // B 1404c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com "vadd.u16 q3, q3, q15 \n" // +128 -> unsigned 1405c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com 1406c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vqshrn.u16 d0, q2, #8 \n" // 16 bit to 8 bit U 1407c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vqshrn.u16 d1, q3, #8 \n" // 16 bit to 8 bit V 1408c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com 1409c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com "vst1.8 {d0}, [%1]! \n" // store 8 pixels U. 1410c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vst1.8 {d1}, [%2]! \n" // store 8 pixels V. 1411c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com "bgt 1b \n" 1412c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com : "+r"(src_argb), // %0 1413c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com "+r"(dst_u), // %1 1414c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com "+r"(dst_v), // %2 1415c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com "+r"(pix) // %3 1416c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com : 14178f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q12", "q13", "q14", "q15" 1418c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com ); 1419c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com} 1420c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com 1421c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com// 16x1 pixels -> 8x1. pix is number of argb pixels. e.g. 16. 1422c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.comvoid ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, 1423c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com int pix) { 1424c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com asm volatile ( 1425c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient 1426c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient 1427c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient 1428c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient 142976e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient 1430c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vmov.u16 q15, #0x8080 \n" // 128.5 1431c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 1432c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "1: \n" 1433c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. 1434c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels. 1435c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com 1436c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts. 1437c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. 1438c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts. 1439c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com 1440c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "subs %3, %3, #16 \n" // 16 processed per loop. 1441c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vmul.s16 q8, q0, q10 \n" // B 1442c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vmls.s16 q8, q1, q11 \n" // G 1443c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vmls.s16 q8, q2, q12 \n" // R 1444c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned 1445c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com 1446c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vmul.s16 q9, q2, q10 \n" // R 1447c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vmls.s16 q9, q1, q14 \n" // G 1448c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vmls.s16 q9, q0, q13 \n" // B 1449c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned 1450c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com 1451c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U 1452c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V 1453c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com 1454c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vst1.8 {d0}, [%1]! \n" // store 8 pixels U. 1455c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "vst1.8 {d1}, [%2]! \n" // store 8 pixels V. 1456c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "bgt 1b \n" 1457c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com : "+r"(src_argb), // %0 1458c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "+r"(dst_u), // %1 1459c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "+r"(dst_v), // %2 1460c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "+r"(pix) // %3 1461c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com : 14628f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", 1463c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 1464c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com ); 1465c4f443f8fe38f6bd447a1393ad63cf6a09887afcfbarchard@google.com} 1466c673f426de5fd73ee9214678fa5356328cb1fbcbfbarchard@google.com 146776e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com// 32x1 pixels -> 8x1. pix is number of argb pixels. e.g. 32. 146876e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.comvoid ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, 146976e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com int pix) { 147076e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com asm volatile ( 147106ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient 147206ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient 147306ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient 147406ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient 147506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient 147676e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vmov.u16 q15, #0x8080 \n" // 128.5 1477c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 147876e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "1: \n" 147976e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. 148076e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels. 148176e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts. 148276e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. 148376e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts. 148476e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vld4.8 {d8, d10, d12, d14}, [%0]! \n" // load 8 more ARGB pixels. 148576e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vld4.8 {d9, d11, d13, d15}, [%0]! \n" // load last 8 ARGB pixels. 148676e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vpaddl.u8 q4, q4 \n" // B 16 bytes -> 8 shorts. 148776e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vpaddl.u8 q5, q5 \n" // G 16 bytes -> 8 shorts. 148876e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vpaddl.u8 q6, q6 \n" // R 16 bytes -> 8 shorts. 148906ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com 149076e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vpadd.u16 d0, d0, d1 \n" // B 16 shorts -> 8 shorts. 149176e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vpadd.u16 d1, d8, d9 \n" // B 149276e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vpadd.u16 d2, d2, d3 \n" // G 16 shorts -> 8 shorts. 149376e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vpadd.u16 d3, d10, d11 \n" // G 149476e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vpadd.u16 d4, d4, d5 \n" // R 16 shorts -> 8 shorts. 149576e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vpadd.u16 d5, d12, d13 \n" // R 149606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com 149706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q0, q0, #1 \n" // 2x average 149806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q1, q1, #1 \n" 149906ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q2, q2, #1 \n" 150006ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com 150176e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "subs %3, %3, #32 \n" // 32 processed per loop. 150276e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vmul.s16 q8, q0, q10 \n" // B 150376e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vmls.s16 q8, q1, q11 \n" // G 150476e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vmls.s16 q8, q2, q12 \n" // R 150576e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned 150676e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vmul.s16 q9, q2, q10 \n" // R 150776e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vmls.s16 q9, q1, q14 \n" // G 150876e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vmls.s16 q9, q0, q13 \n" // B 150976e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned 151076e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U 151176e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V 151276e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vst1.8 {d0}, [%1]! \n" // store 8 pixels U. 151376e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vst1.8 {d1}, [%2]! \n" // store 8 pixels V. 151476e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "bgt 1b \n" 151576e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com : "+r"(src_argb), // %0 151676e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "+r"(dst_u), // %1 151776e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "+r"(dst_v), // %2 151876e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "+r"(pix) // %3 151976e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com : 15208f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", 152176e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 152276e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com ); 152376e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com} 152476e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com 1525dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com// 16x2 pixels -> 8x1. pix is number of argb pixels. e.g. 16. 152695730719503137a7db61a105bec02220f9ed159efbarchard@google.com#define RGBTOUV(QB, QG, QR) \ 152795730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vmul.s16 q8, " #QB ", q10 \n" /* B */ \ 152895730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vmls.s16 q8, " #QG ", q11 \n" /* G */ \ 152995730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vmls.s16 q8, " #QR ", q12 \n" /* R */ \ 1530522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vadd.u16 q8, q8, q15 \n" /* +128 -> unsigned */ \ 153195730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vmul.s16 q9, " #QR ", q10 \n" /* R */ \ 153295730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vmls.s16 q9, " #QG ", q14 \n" /* G */ \ 153395730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vmls.s16 q9, " #QB ", q13 \n" /* B */ \ 1534522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vadd.u16 q9, q9, q15 \n" /* +128 -> unsigned */ \ 1535522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vqshrn.u16 d0, q8, #8 \n" /* 16 bit to 8 bit U */ \ 1536522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vqshrn.u16 d1, q9, #8 \n" /* 16 bit to 8 bit V */ 1537522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com 153806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com// TODO(fbarchard): Consider vhadd vertical, then vpaddl horizontal, avoid shr. 1539dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.comvoid ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb, 1540dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com uint8* dst_u, uint8* dst_v, int pix) { 154176e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com asm volatile ( 1542dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com "add %1, %0, %1 \n" // src_stride + src_argb 154306ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient 154406ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient 154506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient 154606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient 154706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient 154876e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "vmov.u16 q15, #0x8080 \n" // 128.5 1549c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 155076e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "1: \n" 1551dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. 1552dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels. 1553dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts. 1554dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. 1555dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts. 1556dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more ARGB pixels. 1557dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 ARGB pixels. 1558dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com "vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts. 1559dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts. 1560dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com "vpadal.u8 q2, q6 \n" // R 16 bytes -> 8 shorts. 156106ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com 156206ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q0, q0, #1 \n" // 2x average 156306ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q1, q1, #1 \n" 156406ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q2, q2, #1 \n" 156506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com 1566dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com "subs %4, %4, #16 \n" // 32 processed per loop. 156795730719503137a7db61a105bec02220f9ed159efbarchard@google.com RGBTOUV(q0, q1, q2) 1568dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. 1569dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. 157076e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "bgt 1b \n" 157176e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com : "+r"(src_argb), // %0 1572dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com "+r"(src_stride_argb), // %1 1573dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com "+r"(dst_u), // %2 1574dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com "+r"(dst_v), // %3 1575dd2d512e5afad7536e3a010c0193ca1b43c14985fbarchard@google.com "+r"(pix) // %4 157676e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com : 15778f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", 157876e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 157976e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com ); 158076e851792cbdeb41c6e56be26e3a9c4d9fd1771cfbarchard@google.com} 158195730719503137a7db61a105bec02220f9ed159efbarchard@google.com 1582050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com// TODO(fbarchard): Subsample match C code. 1583050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.comvoid ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb, 1584050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com uint8* dst_u, uint8* dst_v, int pix) { 1585050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com asm volatile ( 1586050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "add %1, %0, %1 \n" // src_stride + src_argb 158706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q10, #127 / 2 \n" // UB / VR 0.500 coefficient 158806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q11, #84 / 2 \n" // UG -0.33126 coefficient 158906ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q12, #43 / 2 \n" // UR -0.16874 coefficient 159006ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q13, #20 / 2 \n" // VB -0.08131 coefficient 159106ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q14, #107 / 2 \n" // VG -0.41869 coefficient 1592050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "vmov.u16 q15, #0x8080 \n" // 128.5 1593c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 1594050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "1: \n" 1595050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. 1596050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels. 1597050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts. 1598050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. 1599050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts. 1600050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more ARGB pixels. 1601050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 ARGB pixels. 1602050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts. 1603050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts. 1604050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "vpadal.u8 q2, q6 \n" // R 16 bytes -> 8 shorts. 160506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com 160606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q0, q0, #1 \n" // 2x average 160706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q1, q1, #1 \n" 160806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q2, q2, #1 \n" 160906ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com 1610050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "subs %4, %4, #16 \n" // 32 processed per loop. 1611050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com RGBTOUV(q0, q1, q2) 1612050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. 1613050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. 1614050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "bgt 1b \n" 1615050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com : "+r"(src_argb), // %0 1616050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "+r"(src_stride_argb), // %1 1617050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "+r"(dst_u), // %2 1618050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "+r"(dst_v), // %3 1619050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "+r"(pix) // %4 1620050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com : 1621050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", 1622050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 1623050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com ); 1624050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com} 1625050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com 162695730719503137a7db61a105bec02220f9ed159efbarchard@google.comvoid BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra, 162795730719503137a7db61a105bec02220f9ed159efbarchard@google.com uint8* dst_u, uint8* dst_v, int pix) { 162895730719503137a7db61a105bec02220f9ed159efbarchard@google.com asm volatile ( 162995730719503137a7db61a105bec02220f9ed159efbarchard@google.com "add %1, %0, %1 \n" // src_stride + src_bgra 163006ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient 163106ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient 163206ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient 163306ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient 163406ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient 163595730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vmov.u16 q15, #0x8080 \n" // 128.5 1636c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 163795730719503137a7db61a105bec02220f9ed159efbarchard@google.com "1: \n" 163895730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 BGRA pixels. 163995730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 BGRA pixels. 164095730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpaddl.u8 q3, q3 \n" // B 16 bytes -> 8 shorts. 164195730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpaddl.u8 q2, q2 \n" // G 16 bytes -> 8 shorts. 164295730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpaddl.u8 q1, q1 \n" // R 16 bytes -> 8 shorts. 164395730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more BGRA pixels. 164495730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 BGRA pixels. 164595730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpadal.u8 q3, q7 \n" // B 16 bytes -> 8 shorts. 164695730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpadal.u8 q2, q6 \n" // G 16 bytes -> 8 shorts. 164795730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpadal.u8 q1, q5 \n" // R 16 bytes -> 8 shorts. 164806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com 164906ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q1, q1, #1 \n" // 2x average 165006ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q2, q2, #1 \n" 165106ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q3, q3, #1 \n" 165206ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com 165395730719503137a7db61a105bec02220f9ed159efbarchard@google.com "subs %4, %4, #16 \n" // 32 processed per loop. 165495730719503137a7db61a105bec02220f9ed159efbarchard@google.com RGBTOUV(q3, q2, q1) 165595730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. 165695730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. 165795730719503137a7db61a105bec02220f9ed159efbarchard@google.com "bgt 1b \n" 165895730719503137a7db61a105bec02220f9ed159efbarchard@google.com : "+r"(src_bgra), // %0 165995730719503137a7db61a105bec02220f9ed159efbarchard@google.com "+r"(src_stride_bgra), // %1 166095730719503137a7db61a105bec02220f9ed159efbarchard@google.com "+r"(dst_u), // %2 166195730719503137a7db61a105bec02220f9ed159efbarchard@google.com "+r"(dst_v), // %3 166295730719503137a7db61a105bec02220f9ed159efbarchard@google.com "+r"(pix) // %4 166395730719503137a7db61a105bec02220f9ed159efbarchard@google.com : 16648f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", 166595730719503137a7db61a105bec02220f9ed159efbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 166695730719503137a7db61a105bec02220f9ed159efbarchard@google.com ); 166795730719503137a7db61a105bec02220f9ed159efbarchard@google.com} 166895730719503137a7db61a105bec02220f9ed159efbarchard@google.com 166995730719503137a7db61a105bec02220f9ed159efbarchard@google.comvoid ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr, 167095730719503137a7db61a105bec02220f9ed159efbarchard@google.com uint8* dst_u, uint8* dst_v, int pix) { 167195730719503137a7db61a105bec02220f9ed159efbarchard@google.com asm volatile ( 167295730719503137a7db61a105bec02220f9ed159efbarchard@google.com "add %1, %0, %1 \n" // src_stride + src_abgr 167306ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient 167406ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient 167506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient 167606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient 167706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient 167895730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vmov.u16 q15, #0x8080 \n" // 128.5 1679c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 168095730719503137a7db61a105bec02220f9ed159efbarchard@google.com "1: \n" 168195730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ABGR pixels. 168295730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ABGR pixels. 168395730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpaddl.u8 q2, q2 \n" // B 16 bytes -> 8 shorts. 168495730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. 168595730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpaddl.u8 q0, q0 \n" // R 16 bytes -> 8 shorts. 168695730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more ABGR pixels. 168795730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 ABGR pixels. 168895730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpadal.u8 q2, q6 \n" // B 16 bytes -> 8 shorts. 168995730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts. 169095730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpadal.u8 q0, q4 \n" // R 16 bytes -> 8 shorts. 169106ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com 169206ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q0, q0, #1 \n" // 2x average 169306ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q1, q1, #1 \n" 169406ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q2, q2, #1 \n" 169506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com 169695730719503137a7db61a105bec02220f9ed159efbarchard@google.com "subs %4, %4, #16 \n" // 32 processed per loop. 169795730719503137a7db61a105bec02220f9ed159efbarchard@google.com RGBTOUV(q2, q1, q0) 169895730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. 169995730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. 170095730719503137a7db61a105bec02220f9ed159efbarchard@google.com "bgt 1b \n" 170195730719503137a7db61a105bec02220f9ed159efbarchard@google.com : "+r"(src_abgr), // %0 170295730719503137a7db61a105bec02220f9ed159efbarchard@google.com "+r"(src_stride_abgr), // %1 170395730719503137a7db61a105bec02220f9ed159efbarchard@google.com "+r"(dst_u), // %2 170495730719503137a7db61a105bec02220f9ed159efbarchard@google.com "+r"(dst_v), // %3 170595730719503137a7db61a105bec02220f9ed159efbarchard@google.com "+r"(pix) // %4 170695730719503137a7db61a105bec02220f9ed159efbarchard@google.com : 17078f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", 170895730719503137a7db61a105bec02220f9ed159efbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 170995730719503137a7db61a105bec02220f9ed159efbarchard@google.com ); 171095730719503137a7db61a105bec02220f9ed159efbarchard@google.com} 171195730719503137a7db61a105bec02220f9ed159efbarchard@google.com 171295730719503137a7db61a105bec02220f9ed159efbarchard@google.comvoid RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba, 171395730719503137a7db61a105bec02220f9ed159efbarchard@google.com uint8* dst_u, uint8* dst_v, int pix) { 171495730719503137a7db61a105bec02220f9ed159efbarchard@google.com asm volatile ( 171595730719503137a7db61a105bec02220f9ed159efbarchard@google.com "add %1, %0, %1 \n" // src_stride + src_rgba 171606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient 171706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient 171806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient 171906ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient 172006ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient 172195730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vmov.u16 q15, #0x8080 \n" // 128.5 1722c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 172395730719503137a7db61a105bec02220f9ed159efbarchard@google.com "1: \n" 172495730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 RGBA pixels. 172595730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 RGBA pixels. 172695730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpaddl.u8 q0, q1 \n" // B 16 bytes -> 8 shorts. 172795730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpaddl.u8 q1, q2 \n" // G 16 bytes -> 8 shorts. 172895730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpaddl.u8 q2, q3 \n" // R 16 bytes -> 8 shorts. 172995730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more RGBA pixels. 173095730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 RGBA pixels. 173195730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpadal.u8 q0, q5 \n" // B 16 bytes -> 8 shorts. 173295730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpadal.u8 q1, q6 \n" // G 16 bytes -> 8 shorts. 173395730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpadal.u8 q2, q7 \n" // R 16 bytes -> 8 shorts. 173406ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com 173506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q0, q0, #1 \n" // 2x average 173606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q1, q1, #1 \n" 173706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q2, q2, #1 \n" 173806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com 173995730719503137a7db61a105bec02220f9ed159efbarchard@google.com "subs %4, %4, #16 \n" // 32 processed per loop. 174095730719503137a7db61a105bec02220f9ed159efbarchard@google.com RGBTOUV(q0, q1, q2) 174195730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. 174295730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. 174395730719503137a7db61a105bec02220f9ed159efbarchard@google.com "bgt 1b \n" 174495730719503137a7db61a105bec02220f9ed159efbarchard@google.com : "+r"(src_rgba), // %0 174595730719503137a7db61a105bec02220f9ed159efbarchard@google.com "+r"(src_stride_rgba), // %1 174695730719503137a7db61a105bec02220f9ed159efbarchard@google.com "+r"(dst_u), // %2 174795730719503137a7db61a105bec02220f9ed159efbarchard@google.com "+r"(dst_v), // %3 174895730719503137a7db61a105bec02220f9ed159efbarchard@google.com "+r"(pix) // %4 174995730719503137a7db61a105bec02220f9ed159efbarchard@google.com : 17508f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", 175195730719503137a7db61a105bec02220f9ed159efbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 175295730719503137a7db61a105bec02220f9ed159efbarchard@google.com ); 175395730719503137a7db61a105bec02220f9ed159efbarchard@google.com} 175495730719503137a7db61a105bec02220f9ed159efbarchard@google.com 175595730719503137a7db61a105bec02220f9ed159efbarchard@google.comvoid RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24, 175695730719503137a7db61a105bec02220f9ed159efbarchard@google.com uint8* dst_u, uint8* dst_v, int pix) { 175795730719503137a7db61a105bec02220f9ed159efbarchard@google.com asm volatile ( 175895730719503137a7db61a105bec02220f9ed159efbarchard@google.com "add %1, %0, %1 \n" // src_stride + src_rgb24 175906ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient 176006ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient 176106ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient 176206ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient 176306ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient 176495730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vmov.u16 q15, #0x8080 \n" // 128.5 1765c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 176695730719503137a7db61a105bec02220f9ed159efbarchard@google.com "1: \n" 176795730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vld3.8 {d0, d2, d4}, [%0]! \n" // load 8 RGB24 pixels. 176895730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vld3.8 {d1, d3, d5}, [%0]! \n" // load next 8 RGB24 pixels. 176995730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts. 177095730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. 177195730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts. 177295730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vld3.8 {d8, d10, d12}, [%1]! \n" // load 8 more RGB24 pixels. 177395730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vld3.8 {d9, d11, d13}, [%1]! \n" // load last 8 RGB24 pixels. 177495730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts. 177595730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts. 177695730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpadal.u8 q2, q6 \n" // R 16 bytes -> 8 shorts. 177706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com 177806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q0, q0, #1 \n" // 2x average 177906ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q1, q1, #1 \n" 178006ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q2, q2, #1 \n" 178106ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com 178295730719503137a7db61a105bec02220f9ed159efbarchard@google.com "subs %4, %4, #16 \n" // 32 processed per loop. 178395730719503137a7db61a105bec02220f9ed159efbarchard@google.com RGBTOUV(q0, q1, q2) 178495730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. 178595730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. 178695730719503137a7db61a105bec02220f9ed159efbarchard@google.com "bgt 1b \n" 178795730719503137a7db61a105bec02220f9ed159efbarchard@google.com : "+r"(src_rgb24), // %0 178895730719503137a7db61a105bec02220f9ed159efbarchard@google.com "+r"(src_stride_rgb24), // %1 178995730719503137a7db61a105bec02220f9ed159efbarchard@google.com "+r"(dst_u), // %2 179095730719503137a7db61a105bec02220f9ed159efbarchard@google.com "+r"(dst_v), // %3 179195730719503137a7db61a105bec02220f9ed159efbarchard@google.com "+r"(pix) // %4 179295730719503137a7db61a105bec02220f9ed159efbarchard@google.com : 17938f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", 179495730719503137a7db61a105bec02220f9ed159efbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 179595730719503137a7db61a105bec02220f9ed159efbarchard@google.com ); 179695730719503137a7db61a105bec02220f9ed159efbarchard@google.com} 179795730719503137a7db61a105bec02220f9ed159efbarchard@google.com 179895730719503137a7db61a105bec02220f9ed159efbarchard@google.comvoid RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw, 179995730719503137a7db61a105bec02220f9ed159efbarchard@google.com uint8* dst_u, uint8* dst_v, int pix) { 180095730719503137a7db61a105bec02220f9ed159efbarchard@google.com asm volatile ( 180195730719503137a7db61a105bec02220f9ed159efbarchard@google.com "add %1, %0, %1 \n" // src_stride + src_raw 180206ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient 180306ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient 180406ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient 180506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient 180606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient 180795730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vmov.u16 q15, #0x8080 \n" // 128.5 1808c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 180995730719503137a7db61a105bec02220f9ed159efbarchard@google.com "1: \n" 181095730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vld3.8 {d0, d2, d4}, [%0]! \n" // load 8 RAW pixels. 181195730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vld3.8 {d1, d3, d5}, [%0]! \n" // load next 8 RAW pixels. 181295730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpaddl.u8 q2, q2 \n" // B 16 bytes -> 8 shorts. 181395730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. 181495730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpaddl.u8 q0, q0 \n" // R 16 bytes -> 8 shorts. 181595730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vld3.8 {d8, d10, d12}, [%1]! \n" // load 8 more RAW pixels. 181695730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vld3.8 {d9, d11, d13}, [%1]! \n" // load last 8 RAW pixels. 181795730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpadal.u8 q2, q6 \n" // B 16 bytes -> 8 shorts. 181895730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts. 181995730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vpadal.u8 q0, q4 \n" // R 16 bytes -> 8 shorts. 182006ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com 182106ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q0, q0, #1 \n" // 2x average 182206ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q1, q1, #1 \n" 182306ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q2, q2, #1 \n" 182406ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com 182595730719503137a7db61a105bec02220f9ed159efbarchard@google.com "subs %4, %4, #16 \n" // 32 processed per loop. 182695730719503137a7db61a105bec02220f9ed159efbarchard@google.com RGBTOUV(q2, q1, q0) 182795730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. 182895730719503137a7db61a105bec02220f9ed159efbarchard@google.com "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. 182995730719503137a7db61a105bec02220f9ed159efbarchard@google.com "bgt 1b \n" 183095730719503137a7db61a105bec02220f9ed159efbarchard@google.com : "+r"(src_raw), // %0 183195730719503137a7db61a105bec02220f9ed159efbarchard@google.com "+r"(src_stride_raw), // %1 183295730719503137a7db61a105bec02220f9ed159efbarchard@google.com "+r"(dst_u), // %2 183395730719503137a7db61a105bec02220f9ed159efbarchard@google.com "+r"(dst_v), // %3 183495730719503137a7db61a105bec02220f9ed159efbarchard@google.com "+r"(pix) // %4 183595730719503137a7db61a105bec02220f9ed159efbarchard@google.com : 18368f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", 183795730719503137a7db61a105bec02220f9ed159efbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 183895730719503137a7db61a105bec02220f9ed159efbarchard@google.com ); 183995730719503137a7db61a105bec02220f9ed159efbarchard@google.com} 184095730719503137a7db61a105bec02220f9ed159efbarchard@google.com 1841f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com// 16x2 pixels -> 8x1. pix is number of argb pixels. e.g. 16. 1842f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.comvoid RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565, 1843f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com uint8* dst_u, uint8* dst_v, int pix) { 1844f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com asm volatile ( 1845f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "add %1, %0, %1 \n" // src_stride + src_argb 184606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient 184706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient 184806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient 184906ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient 185006ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient 1851f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vmov.u16 q15, #0x8080 \n" // 128.5 1852c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 1853f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "1: \n" 1854f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vld1.8 {q0}, [%0]! \n" // load 8 RGB565 pixels. 1855f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com RGB565TOARGB 1856f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vpaddl.u8 d8, d0 \n" // B 8 bytes -> 4 shorts. 1857f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vpaddl.u8 d10, d1 \n" // G 8 bytes -> 4 shorts. 1858f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vpaddl.u8 d12, d2 \n" // R 8 bytes -> 4 shorts. 1859f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vld1.8 {q0}, [%0]! \n" // next 8 RGB565 pixels. 1860f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com RGB565TOARGB 1861f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vpaddl.u8 d9, d0 \n" // B 8 bytes -> 4 shorts. 1862f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vpaddl.u8 d11, d1 \n" // G 8 bytes -> 4 shorts. 1863f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vpaddl.u8 d13, d2 \n" // R 8 bytes -> 4 shorts. 1864f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com 1865f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vld1.8 {q0}, [%1]! \n" // load 8 RGB565 pixels. 1866f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com RGB565TOARGB 1867f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vpadal.u8 d8, d0 \n" // B 8 bytes -> 4 shorts. 1868f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vpadal.u8 d10, d1 \n" // G 8 bytes -> 4 shorts. 1869f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vpadal.u8 d12, d2 \n" // R 8 bytes -> 4 shorts. 1870f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vld1.8 {q0}, [%1]! \n" // next 8 RGB565 pixels. 1871f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com RGB565TOARGB 1872f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vpadal.u8 d9, d0 \n" // B 8 bytes -> 4 shorts. 1873f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vpadal.u8 d11, d1 \n" // G 8 bytes -> 4 shorts. 1874f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vpadal.u8 d13, d2 \n" // R 8 bytes -> 4 shorts. 1875f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com 187606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q4, q4, #1 \n" // 2x average 187706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q5, q5, #1 \n" 187806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q6, q6, #1 \n" 187906ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com 1880f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "subs %4, %4, #16 \n" // 16 processed per loop. 1881f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vmul.s16 q8, q4, q10 \n" // B 1882f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vmls.s16 q8, q5, q11 \n" // G 1883f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vmls.s16 q8, q6, q12 \n" // R 1884f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned 1885f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vmul.s16 q9, q6, q10 \n" // R 1886f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vmls.s16 q9, q5, q14 \n" // G 1887f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vmls.s16 q9, q4, q13 \n" // B 1888f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned 1889f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U 1890f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V 1891f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. 1892f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. 1893f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "bgt 1b \n" 1894f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com : "+r"(src_rgb565), // %0 1895f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "+r"(src_stride_rgb565), // %1 1896f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "+r"(dst_u), // %2 1897f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "+r"(dst_v), // %3 1898f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "+r"(pix) // %4 1899f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com : 19008f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", 1901f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 1902f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com ); 1903f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com} 1904522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com 1905522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com// 16x2 pixels -> 8x1. pix is number of argb pixels. e.g. 16. 1906522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.comvoid ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555, 1907522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com uint8* dst_u, uint8* dst_v, int pix) { 1908522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com asm volatile ( 1909522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "add %1, %0, %1 \n" // src_stride + src_argb 191006ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient 191106ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient 191206ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient 191306ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient 191406ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient 1915522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vmov.u16 q15, #0x8080 \n" // 128.5 1916c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 1917522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "1: \n" 1918522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vld1.8 {q0}, [%0]! \n" // load 8 ARGB1555 pixels. 1919522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com RGB555TOARGB 1920522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpaddl.u8 d8, d0 \n" // B 8 bytes -> 4 shorts. 1921522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpaddl.u8 d10, d1 \n" // G 8 bytes -> 4 shorts. 1922522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpaddl.u8 d12, d2 \n" // R 8 bytes -> 4 shorts. 1923522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vld1.8 {q0}, [%0]! \n" // next 8 ARGB1555 pixels. 1924522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com RGB555TOARGB 1925522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpaddl.u8 d9, d0 \n" // B 8 bytes -> 4 shorts. 1926522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpaddl.u8 d11, d1 \n" // G 8 bytes -> 4 shorts. 1927522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpaddl.u8 d13, d2 \n" // R 8 bytes -> 4 shorts. 1928522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com 1929522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vld1.8 {q0}, [%1]! \n" // load 8 ARGB1555 pixels. 1930522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com RGB555TOARGB 1931522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpadal.u8 d8, d0 \n" // B 8 bytes -> 4 shorts. 1932522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpadal.u8 d10, d1 \n" // G 8 bytes -> 4 shorts. 1933522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpadal.u8 d12, d2 \n" // R 8 bytes -> 4 shorts. 1934522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vld1.8 {q0}, [%1]! \n" // next 8 ARGB1555 pixels. 1935522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com RGB555TOARGB 1936522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpadal.u8 d9, d0 \n" // B 8 bytes -> 4 shorts. 1937522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpadal.u8 d11, d1 \n" // G 8 bytes -> 4 shorts. 1938522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpadal.u8 d13, d2 \n" // R 8 bytes -> 4 shorts. 1939522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com 194006ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q4, q4, #1 \n" // 2x average 194106ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q5, q5, #1 \n" 194206ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q6, q6, #1 \n" 194306ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com 1944522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "subs %4, %4, #16 \n" // 16 processed per loop. 1945522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vmul.s16 q8, q4, q10 \n" // B 1946522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vmls.s16 q8, q5, q11 \n" // G 1947522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vmls.s16 q8, q6, q12 \n" // R 1948522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned 1949522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vmul.s16 q9, q6, q10 \n" // R 1950522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vmls.s16 q9, q5, q14 \n" // G 1951522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vmls.s16 q9, q4, q13 \n" // B 1952522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned 1953522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U 1954522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V 1955522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. 1956522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. 1957522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "bgt 1b \n" 1958522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com : "+r"(src_argb1555), // %0 1959522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "+r"(src_stride_argb1555), // %1 1960522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "+r"(dst_u), // %2 1961522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "+r"(dst_v), // %3 1962522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "+r"(pix) // %4 1963522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com : 19648f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", 1965522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 1966522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com ); 1967522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com} 1968522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com 1969522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com// 16x2 pixels -> 8x1. pix is number of argb pixels. e.g. 16. 1970522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.comvoid ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444, 1971522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com uint8* dst_u, uint8* dst_v, int pix) { 1972522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com asm volatile ( 1973522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "add %1, %0, %1 \n" // src_stride + src_argb 197406ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient 197506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient 197606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient 197706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient 197806ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient 1979522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vmov.u16 q15, #0x8080 \n" // 128.5 1980c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 1981522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "1: \n" 1982522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vld1.8 {q0}, [%0]! \n" // load 8 ARGB4444 pixels. 1983522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com ARGB4444TOARGB 1984522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpaddl.u8 d8, d0 \n" // B 8 bytes -> 4 shorts. 1985522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpaddl.u8 d10, d1 \n" // G 8 bytes -> 4 shorts. 1986522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpaddl.u8 d12, d2 \n" // R 8 bytes -> 4 shorts. 1987522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vld1.8 {q0}, [%0]! \n" // next 8 ARGB4444 pixels. 1988522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com ARGB4444TOARGB 1989522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpaddl.u8 d9, d0 \n" // B 8 bytes -> 4 shorts. 1990522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpaddl.u8 d11, d1 \n" // G 8 bytes -> 4 shorts. 1991522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpaddl.u8 d13, d2 \n" // R 8 bytes -> 4 shorts. 1992522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com 1993522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vld1.8 {q0}, [%1]! \n" // load 8 ARGB4444 pixels. 1994522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com ARGB4444TOARGB 1995522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpadal.u8 d8, d0 \n" // B 8 bytes -> 4 shorts. 1996522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpadal.u8 d10, d1 \n" // G 8 bytes -> 4 shorts. 1997522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpadal.u8 d12, d2 \n" // R 8 bytes -> 4 shorts. 1998522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vld1.8 {q0}, [%1]! \n" // next 8 ARGB4444 pixels. 1999522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com ARGB4444TOARGB 2000522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpadal.u8 d9, d0 \n" // B 8 bytes -> 4 shorts. 2001522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpadal.u8 d11, d1 \n" // G 8 bytes -> 4 shorts. 2002522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vpadal.u8 d13, d2 \n" // R 8 bytes -> 4 shorts. 2003522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com 200406ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q4, q4, #1 \n" // 2x average 200506ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q5, q5, #1 \n" 200606ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com "vrshr.u16 q6, q6, #1 \n" 200706ed625808a8e5334256df7c032800f6eef1719ffbarchard@google.com 2008522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "subs %4, %4, #16 \n" // 16 processed per loop. 2009522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vmul.s16 q8, q4, q10 \n" // B 2010522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vmls.s16 q8, q5, q11 \n" // G 2011522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vmls.s16 q8, q6, q12 \n" // R 2012522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned 2013522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vmul.s16 q9, q6, q10 \n" // R 2014522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vmls.s16 q9, q5, q14 \n" // G 2015522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vmls.s16 q9, q4, q13 \n" // B 2016522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned 2017522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U 2018522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V 2019522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. 2020522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. 2021522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "bgt 1b \n" 2022522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com : "+r"(src_argb4444), // %0 2023522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "+r"(src_stride_argb4444), // %1 2024522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "+r"(dst_u), // %2 2025522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "+r"(dst_v), // %3 2026522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "+r"(pix) // %4 2027522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com : 20288f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", 2029522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 2030522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com ); 2031522d757c9257056020f058acc5a7c63e401ce019fbarchard@google.com} 2032f1daa3db65a41d5d0766c8309ce5a2ef43bf8bb1fbarchard@google.com 2033bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.comvoid RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int pix) { 2034bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com asm volatile ( 2035bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient 2036bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient 2037bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient 2038bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d27, #16 \n" // Add 16 constant 2039c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 2040bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "1: \n" 20411dee6250936424ced8722329369da75935d61580fbarchard@google.com "vld1.8 {q0}, [%0]! \n" // load 8 RGB565 pixels. 2042bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 2043bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com RGB565TOARGB 2044bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmull.u8 q2, d0, d24 \n" // B 2045bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmlal.u8 q2, d1, d25 \n" // G 2046bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmlal.u8 q2, d2, d26 \n" // R 2047bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y 2048bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vqadd.u8 d0, d27 \n" 2049bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. 2050bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "bgt 1b \n" 2051bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com : "+r"(src_rgb565), // %0 2052bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "+r"(dst_y), // %1 2053bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "+r"(pix) // %2 2054bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com : 20558f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13" 2056bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com ); 2057bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com} 2058bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com 20591dee6250936424ced8722329369da75935d61580fbarchard@google.comvoid ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int pix) { 20601dee6250936424ced8722329369da75935d61580fbarchard@google.com asm volatile ( 20611dee6250936424ced8722329369da75935d61580fbarchard@google.com "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient 20621dee6250936424ced8722329369da75935d61580fbarchard@google.com "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient 20631dee6250936424ced8722329369da75935d61580fbarchard@google.com "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient 20641dee6250936424ced8722329369da75935d61580fbarchard@google.com "vmov.u8 d27, #16 \n" // Add 16 constant 2065c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 20661dee6250936424ced8722329369da75935d61580fbarchard@google.com "1: \n" 20671dee6250936424ced8722329369da75935d61580fbarchard@google.com "vld1.8 {q0}, [%0]! \n" // load 8 ARGB1555 pixels. 20681dee6250936424ced8722329369da75935d61580fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 20691dee6250936424ced8722329369da75935d61580fbarchard@google.com ARGB1555TOARGB 20701dee6250936424ced8722329369da75935d61580fbarchard@google.com "vmull.u8 q2, d0, d24 \n" // B 20711dee6250936424ced8722329369da75935d61580fbarchard@google.com "vmlal.u8 q2, d1, d25 \n" // G 20721dee6250936424ced8722329369da75935d61580fbarchard@google.com "vmlal.u8 q2, d2, d26 \n" // R 20731dee6250936424ced8722329369da75935d61580fbarchard@google.com "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y 20741dee6250936424ced8722329369da75935d61580fbarchard@google.com "vqadd.u8 d0, d27 \n" 20751dee6250936424ced8722329369da75935d61580fbarchard@google.com "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. 20761dee6250936424ced8722329369da75935d61580fbarchard@google.com "bgt 1b \n" 20771dee6250936424ced8722329369da75935d61580fbarchard@google.com : "+r"(src_argb1555), // %0 20781dee6250936424ced8722329369da75935d61580fbarchard@google.com "+r"(dst_y), // %1 20791dee6250936424ced8722329369da75935d61580fbarchard@google.com "+r"(pix) // %2 20801dee6250936424ced8722329369da75935d61580fbarchard@google.com : 20818f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13" 20821dee6250936424ced8722329369da75935d61580fbarchard@google.com ); 20831dee6250936424ced8722329369da75935d61580fbarchard@google.com} 20841dee6250936424ced8722329369da75935d61580fbarchard@google.com 20851dee6250936424ced8722329369da75935d61580fbarchard@google.comvoid ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int pix) { 20861dee6250936424ced8722329369da75935d61580fbarchard@google.com asm volatile ( 20871dee6250936424ced8722329369da75935d61580fbarchard@google.com "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient 20881dee6250936424ced8722329369da75935d61580fbarchard@google.com "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient 20891dee6250936424ced8722329369da75935d61580fbarchard@google.com "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient 20901dee6250936424ced8722329369da75935d61580fbarchard@google.com "vmov.u8 d27, #16 \n" // Add 16 constant 2091c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 20921dee6250936424ced8722329369da75935d61580fbarchard@google.com "1: \n" 20931dee6250936424ced8722329369da75935d61580fbarchard@google.com "vld1.8 {q0}, [%0]! \n" // load 8 ARGB4444 pixels. 20941dee6250936424ced8722329369da75935d61580fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 20951dee6250936424ced8722329369da75935d61580fbarchard@google.com ARGB4444TOARGB 20961dee6250936424ced8722329369da75935d61580fbarchard@google.com "vmull.u8 q2, d0, d24 \n" // B 20971dee6250936424ced8722329369da75935d61580fbarchard@google.com "vmlal.u8 q2, d1, d25 \n" // G 20981dee6250936424ced8722329369da75935d61580fbarchard@google.com "vmlal.u8 q2, d2, d26 \n" // R 20991dee6250936424ced8722329369da75935d61580fbarchard@google.com "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y 21001dee6250936424ced8722329369da75935d61580fbarchard@google.com "vqadd.u8 d0, d27 \n" 21011dee6250936424ced8722329369da75935d61580fbarchard@google.com "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. 21021dee6250936424ced8722329369da75935d61580fbarchard@google.com "bgt 1b \n" 21031dee6250936424ced8722329369da75935d61580fbarchard@google.com : "+r"(src_argb4444), // %0 21041dee6250936424ced8722329369da75935d61580fbarchard@google.com "+r"(dst_y), // %1 21051dee6250936424ced8722329369da75935d61580fbarchard@google.com "+r"(pix) // %2 21061dee6250936424ced8722329369da75935d61580fbarchard@google.com : 21078f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13" 21081dee6250936424ced8722329369da75935d61580fbarchard@google.com ); 21091dee6250936424ced8722329369da75935d61580fbarchard@google.com} 2110bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com 2111bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.comvoid BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int pix) { 2112bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com asm volatile ( 2113bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d4, #33 \n" // R * 0.2578 coefficient 2114bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient 2115bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d6, #13 \n" // B * 0.1016 coefficient 2116bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d7, #16 \n" // Add 16 constant 2117c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 2118bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "1: \n" 2119bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of BGRA. 2120bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 2121bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmull.u8 q8, d1, d4 \n" // R 2122bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmlal.u8 q8, d2, d5 \n" // G 2123bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmlal.u8 q8, d3, d6 \n" // B 2124bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y 2125bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vqadd.u8 d0, d7 \n" 2126bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. 2127bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "bgt 1b \n" 2128bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com : "+r"(src_bgra), // %0 2129bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "+r"(dst_y), // %1 2130bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "+r"(pix) // %2 2131bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com : 21328f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8" 2133bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com ); 2134bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com} 2135bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com 2136bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.comvoid ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int pix) { 2137bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com asm volatile ( 2138bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d4, #33 \n" // R * 0.2578 coefficient 2139bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient 2140bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d6, #13 \n" // B * 0.1016 coefficient 2141bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d7, #16 \n" // Add 16 constant 2142c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 2143bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "1: \n" 2144bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ABGR. 2145bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 2146bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmull.u8 q8, d0, d4 \n" // R 2147bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmlal.u8 q8, d1, d5 \n" // G 2148bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmlal.u8 q8, d2, d6 \n" // B 2149bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y 2150bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vqadd.u8 d0, d7 \n" 2151bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. 2152bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "bgt 1b \n" 2153bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com : "+r"(src_abgr), // %0 2154bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "+r"(dst_y), // %1 2155bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "+r"(pix) // %2 2156bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com : 21578f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8" 2158bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com ); 2159bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com} 2160bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com 2161bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.comvoid RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int pix) { 2162bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com asm volatile ( 21630908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com "vmov.u8 d4, #13 \n" // B * 0.1016 coefficient 21640908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient 21650908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com "vmov.u8 d6, #33 \n" // R * 0.2578 coefficient 21660908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com "vmov.u8 d7, #16 \n" // Add 16 constant 2167c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 21680908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com "1: \n" 2169bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of RGBA. 2170bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 2171bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmull.u8 q8, d1, d4 \n" // B 2172bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmlal.u8 q8, d2, d5 \n" // G 2173bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmlal.u8 q8, d3, d6 \n" // R 2174bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y 2175bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vqadd.u8 d0, d7 \n" 2176bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. 2177bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "bgt 1b \n" 2178bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com : "+r"(src_rgba), // %0 2179bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "+r"(dst_y), // %1 2180bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "+r"(pix) // %2 2181bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com : 21828f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8" 2183bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com ); 2184bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com} 2185bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com 2186bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.comvoid RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int pix) { 2187bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com asm volatile ( 2188bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d4, #13 \n" // B * 0.1016 coefficient 2189bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient 2190bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d6, #33 \n" // R * 0.2578 coefficient 2191bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d7, #16 \n" // Add 16 constant 2192c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 2193bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "1: \n" 2194bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vld3.8 {d0, d1, d2}, [%0]! \n" // load 8 pixels of RGB24. 21950908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 21960908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com "vmull.u8 q8, d0, d4 \n" // B 21970908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com "vmlal.u8 q8, d1, d5 \n" // G 21980908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com "vmlal.u8 q8, d2, d6 \n" // R 21990908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y 22000908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com "vqadd.u8 d0, d7 \n" 22010908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. 22020908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com "bgt 1b \n" 2203bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com : "+r"(src_rgb24), // %0 22040908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com "+r"(dst_y), // %1 22050908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com "+r"(pix) // %2 22060908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com : 22078f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8" 22080908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com ); 22090908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com} 2210bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com 2211bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.comvoid RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int pix) { 2212bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com asm volatile ( 2213bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d4, #33 \n" // R * 0.2578 coefficient 2214bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient 2215bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d6, #13 \n" // B * 0.1016 coefficient 2216bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmov.u8 d7, #16 \n" // Add 16 constant 2217c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 2218bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "1: \n" 2219bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vld3.8 {d0, d1, d2}, [%0]! \n" // load 8 pixels of RAW. 2220bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 2221bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmull.u8 q8, d0, d4 \n" // B 2222bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmlal.u8 q8, d1, d5 \n" // G 2223bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vmlal.u8 q8, d2, d6 \n" // R 2224bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y 2225bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vqadd.u8 d0, d7 \n" 2226bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. 2227bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "bgt 1b \n" 2228bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com : "+r"(src_raw), // %0 2229bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "+r"(dst_y), // %1 2230bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com "+r"(pix) // %2 2231bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com : 22328f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8" 2233bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com ); 2234bdf7cb591452611090922e690d5104a7d8c6b1e5fbarchard@google.com} 22350908a701e90dc15d973784f6245131aa65a66f52fbarchard@google.com 2236b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com// Bilinear filter 16x2 -> 16x1 2237b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.comvoid InterpolateRow_NEON(uint8* dst_ptr, 2238b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com const uint8* src_ptr, ptrdiff_t src_stride, 2239b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com int dst_width, int source_y_fraction) { 2240b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com asm volatile ( 22418f506332af217882648eed166a257557855b9fdbfbarchard@google.com "cmp %4, #0 \n" 22428f506332af217882648eed166a257557855b9fdbfbarchard@google.com "beq 100f \n" 22438f506332af217882648eed166a257557855b9fdbfbarchard@google.com "add %2, %1 \n" 22448f506332af217882648eed166a257557855b9fdbfbarchard@google.com "cmp %4, #64 \n" 22458f506332af217882648eed166a257557855b9fdbfbarchard@google.com "beq 75f \n" 22468f506332af217882648eed166a257557855b9fdbfbarchard@google.com "cmp %4, #128 \n" 22478f506332af217882648eed166a257557855b9fdbfbarchard@google.com "beq 50f \n" 22488f506332af217882648eed166a257557855b9fdbfbarchard@google.com "cmp %4, #192 \n" 22498f506332af217882648eed166a257557855b9fdbfbarchard@google.com "beq 25f \n" 22508f506332af217882648eed166a257557855b9fdbfbarchard@google.com 22518f506332af217882648eed166a257557855b9fdbfbarchard@google.com "vdup.8 d5, %4 \n" 22528f506332af217882648eed166a257557855b9fdbfbarchard@google.com "rsb %4, #256 \n" 22538f506332af217882648eed166a257557855b9fdbfbarchard@google.com "vdup.8 d4, %4 \n" 2254b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com // General purpose row blend. 2255b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com "1: \n" 22562c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q0}, [%1]! \n" 22572c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q1}, [%2]! \n" 2258b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com "subs %3, %3, #16 \n" 22598f506332af217882648eed166a257557855b9fdbfbarchard@google.com "vmull.u8 q13, d0, d4 \n" 22608f506332af217882648eed166a257557855b9fdbfbarchard@google.com "vmull.u8 q14, d1, d4 \n" 22618f506332af217882648eed166a257557855b9fdbfbarchard@google.com "vmlal.u8 q13, d2, d5 \n" 22628f506332af217882648eed166a257557855b9fdbfbarchard@google.com "vmlal.u8 q14, d3, d5 \n" 22638f506332af217882648eed166a257557855b9fdbfbarchard@google.com "vrshrn.u16 d0, q13, #8 \n" 22648f506332af217882648eed166a257557855b9fdbfbarchard@google.com "vrshrn.u16 d1, q14, #8 \n" 22652c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {q0}, [%0]! \n" 22668f506332af217882648eed166a257557855b9fdbfbarchard@google.com "bgt 1b \n" 22678f506332af217882648eed166a257557855b9fdbfbarchard@google.com "b 99f \n" 2268b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com 2269b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com // Blend 25 / 75. 2270b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com "25: \n" 22712c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q0}, [%1]! \n" 22722c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q1}, [%2]! \n" 2273b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com "subs %3, %3, #16 \n" 22748f506332af217882648eed166a257557855b9fdbfbarchard@google.com "vrhadd.u8 q0, q1 \n" 22758f506332af217882648eed166a257557855b9fdbfbarchard@google.com "vrhadd.u8 q0, q1 \n" 22762c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {q0}, [%0]! \n" 22778f506332af217882648eed166a257557855b9fdbfbarchard@google.com "bgt 25b \n" 22788f506332af217882648eed166a257557855b9fdbfbarchard@google.com "b 99f \n" 2279b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com 2280b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com // Blend 50 / 50. 2281b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com "50: \n" 22822c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q0}, [%1]! \n" 22832c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q1}, [%2]! \n" 2284b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com "subs %3, %3, #16 \n" 22858f506332af217882648eed166a257557855b9fdbfbarchard@google.com "vrhadd.u8 q0, q1 \n" 22862c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {q0}, [%0]! \n" 22878f506332af217882648eed166a257557855b9fdbfbarchard@google.com "bgt 50b \n" 22888f506332af217882648eed166a257557855b9fdbfbarchard@google.com "b 99f \n" 2289b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com 2290b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com // Blend 75 / 25. 2291b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com "75: \n" 22922c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q1}, [%1]! \n" 22932c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q0}, [%2]! \n" 2294b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com "subs %3, %3, #16 \n" 22958f506332af217882648eed166a257557855b9fdbfbarchard@google.com "vrhadd.u8 q0, q1 \n" 22968f506332af217882648eed166a257557855b9fdbfbarchard@google.com "vrhadd.u8 q0, q1 \n" 22972c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {q0}, [%0]! \n" 22988f506332af217882648eed166a257557855b9fdbfbarchard@google.com "bgt 75b \n" 22998f506332af217882648eed166a257557855b9fdbfbarchard@google.com "b 99f \n" 2300b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com 2301b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com // Blend 100 / 0 - Copy row unchanged. 2302b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com "100: \n" 23032c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {q0}, [%1]! \n" 2304b911428afd3994f47e5780a80c876d05d1d4c590fbarchard@google.com "subs %3, %3, #16 \n" 23052c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {q0}, [%0]! \n" 23068f506332af217882648eed166a257557855b9fdbfbarchard@google.com "bgt 100b \n" 2307b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com 2308b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com "99: \n" 2309b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com : "+r"(dst_ptr), // %0 2310b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com "+r"(src_ptr), // %1 2311b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com "+r"(src_stride), // %2 2312b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com "+r"(dst_width), // %3 2313b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com "+r"(source_y_fraction) // %4 2314b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com : 23158f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "cc", "memory", "q0", "q1", "d4", "d5", "q13", "q14" 2316b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com ); 2317b5491759b45de37df781d4408a0c46abf6d4ae08fbarchard@google.com} 23188f506332af217882648eed166a257557855b9fdbfbarchard@google.com 23198f506332af217882648eed166a257557855b9fdbfbarchard@google.com// dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr 23208f506332af217882648eed166a257557855b9fdbfbarchard@google.comvoid ARGBBlendRow_NEON(const uint8* src_argb0, const uint8* src_argb1, 23218f506332af217882648eed166a257557855b9fdbfbarchard@google.com uint8* dst_argb, int width) { 23228f506332af217882648eed166a257557855b9fdbfbarchard@google.com asm volatile ( 23238f506332af217882648eed166a257557855b9fdbfbarchard@google.com "subs %3, #8 \n" 23248f506332af217882648eed166a257557855b9fdbfbarchard@google.com "blt 89f \n" 23258f506332af217882648eed166a257557855b9fdbfbarchard@google.com // Blend 8 pixels. 23268f506332af217882648eed166a257557855b9fdbfbarchard@google.com "8: \n" 23278f506332af217882648eed166a257557855b9fdbfbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ARGB0. 23288f506332af217882648eed166a257557855b9fdbfbarchard@google.com "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load 8 pixels of ARGB1. 23298f506332af217882648eed166a257557855b9fdbfbarchard@google.com "subs %3, %3, #8 \n" // 8 processed per loop. 2330d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com "vmull.u8 q10, d4, d3 \n" // db * a 2331d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com "vmull.u8 q11, d5, d3 \n" // dg * a 2332d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com "vmull.u8 q12, d6, d3 \n" // dr * a 2333d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com "vqrshrn.u16 d20, q10, #8 \n" // db >>= 8 2334d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com "vqrshrn.u16 d21, q11, #8 \n" // dg >>= 8 2335d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com "vqrshrn.u16 d22, q12, #8 \n" // dr >>= 8 2336d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com "vqsub.u8 q2, q2, q10 \n" // dbg - dbg * a / 256 2337d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com "vqsub.u8 d6, d6, d22 \n" // dr - dr * a / 256 2338d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com "vqadd.u8 q0, q0, q2 \n" // + sbg 2339d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com "vqadd.u8 d2, d2, d6 \n" // + sr 23408f506332af217882648eed166a257557855b9fdbfbarchard@google.com "vmov.u8 d3, #255 \n" // a = 255 23418f506332af217882648eed166a257557855b9fdbfbarchard@google.com "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 pixels of ARGB. 23428f506332af217882648eed166a257557855b9fdbfbarchard@google.com "bge 8b \n" 23438f506332af217882648eed166a257557855b9fdbfbarchard@google.com 23448f506332af217882648eed166a257557855b9fdbfbarchard@google.com "89: \n" 23458f506332af217882648eed166a257557855b9fdbfbarchard@google.com "adds %3, #8-1 \n" 23468f506332af217882648eed166a257557855b9fdbfbarchard@google.com "blt 99f \n" 23478f506332af217882648eed166a257557855b9fdbfbarchard@google.com 23488f506332af217882648eed166a257557855b9fdbfbarchard@google.com // Blend 1 pixels. 23498f506332af217882648eed166a257557855b9fdbfbarchard@google.com "1: \n" 23508f506332af217882648eed166a257557855b9fdbfbarchard@google.com "vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [%0]! \n" // load 1 pixel ARGB0. 23518f506332af217882648eed166a257557855b9fdbfbarchard@google.com "vld4.8 {d4[0],d5[0],d6[0],d7[0]}, [%1]! \n" // load 1 pixel ARGB1. 23528f506332af217882648eed166a257557855b9fdbfbarchard@google.com "subs %3, %3, #1 \n" // 1 processed per loop. 2353d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com "vmull.u8 q10, d4, d3 \n" // db * a 2354d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com "vmull.u8 q11, d5, d3 \n" // dg * a 2355d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com "vmull.u8 q12, d6, d3 \n" // dr * a 2356d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com "vqrshrn.u16 d20, q10, #8 \n" // db >>= 8 2357d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com "vqrshrn.u16 d21, q11, #8 \n" // dg >>= 8 2358d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com "vqrshrn.u16 d22, q12, #8 \n" // dr >>= 8 2359d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com "vqsub.u8 q2, q2, q10 \n" // dbg - dbg * a / 256 2360d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com "vqsub.u8 d6, d6, d22 \n" // dr - dr * a / 256 2361d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com "vqadd.u8 q0, q0, q2 \n" // + sbg 2362d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com "vqadd.u8 d2, d2, d6 \n" // + sr 23638f506332af217882648eed166a257557855b9fdbfbarchard@google.com "vmov.u8 d3, #255 \n" // a = 255 23648f506332af217882648eed166a257557855b9fdbfbarchard@google.com "vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [%2]! \n" // store 1 pixel. 23658f506332af217882648eed166a257557855b9fdbfbarchard@google.com "bge 1b \n" 23668f506332af217882648eed166a257557855b9fdbfbarchard@google.com 23678f506332af217882648eed166a257557855b9fdbfbarchard@google.com "99: \n" 23688f506332af217882648eed166a257557855b9fdbfbarchard@google.com 23698f506332af217882648eed166a257557855b9fdbfbarchard@google.com : "+r"(src_argb0), // %0 23708f506332af217882648eed166a257557855b9fdbfbarchard@google.com "+r"(src_argb1), // %1 23718f506332af217882648eed166a257557855b9fdbfbarchard@google.com "+r"(dst_argb), // %2 23728f506332af217882648eed166a257557855b9fdbfbarchard@google.com "+r"(width) // %3 23738f506332af217882648eed166a257557855b9fdbfbarchard@google.com : 2374d6b94fde24d1ccc6c1f8ee4a3a45caa4fcb66a6cfbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", "q10", "q11", "q12" 23758f506332af217882648eed166a257557855b9fdbfbarchard@google.com ); 23768f506332af217882648eed166a257557855b9fdbfbarchard@google.com} 23778f506332af217882648eed166a257557855b9fdbfbarchard@google.com 23781d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com// Attenuate 8 pixels at a time. 23791d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.comvoid ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) { 23801d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com asm volatile ( 23811d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com // Attenuate 8 pixels. 23821d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com "1: \n" 23831d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ARGB. 23841d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 23851d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com "vmull.u8 q10, d0, d3 \n" // b * a 23861d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com "vmull.u8 q11, d1, d3 \n" // g * a 23871d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com "vmull.u8 q12, d2, d3 \n" // r * a 23881d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com "vqrshrn.u16 d0, q10, #8 \n" // b >>= 8 23891d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com "vqrshrn.u16 d1, q11, #8 \n" // g >>= 8 23901d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com "vqrshrn.u16 d2, q12, #8 \n" // r >>= 8 23911d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB. 23921d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com "bgt 1b \n" 23931d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com : "+r"(src_argb), // %0 23941d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com "+r"(dst_argb), // %1 23951d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com "+r"(width) // %2 23961d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com : 23971d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com : "cc", "memory", "q0", "q1", "q10", "q11", "q12" 23981d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com ); 23991d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com} 24001d160cb99f2b05df80c4555bd769825ad1175dc9fbarchard@google.com 2401ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com// Quantize 8 ARGB pixels (32 bytes). 2402ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com// dst = (dst * scale >> 16) * interval_size + interval_offset; 2403ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.comvoid ARGBQuantizeRow_NEON(uint8* dst_argb, int scale, int interval_size, 2404ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com int interval_offset, int width) { 2405ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com asm volatile ( 2406ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "vdup.u16 q8, %2 \n" 2407ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "vshr.u16 q8, q8, #1 \n" // scale >>= 1 2408ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "vdup.u16 q9, %3 \n" // interval multiply. 2409ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "vdup.u16 q10, %4 \n" // interval add 2410ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com 2411ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com // 8 pixel loop. 2412ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com ".p2align 2 \n" 2413ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "1: \n" 2414ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "vld4.8 {d0, d2, d4, d6}, [%0] \n" // load 8 pixels of ARGB. 2415ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "subs %1, %1, #8 \n" // 8 processed per loop. 2416ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "vmovl.u8 q0, d0 \n" // b (0 .. 255) 2417ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "vmovl.u8 q1, d2 \n" 2418ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "vmovl.u8 q2, d4 \n" 2419ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "vqdmulh.s16 q0, q0, q8 \n" // b * scale 2420ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "vqdmulh.s16 q1, q1, q8 \n" // g 2421ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "vqdmulh.s16 q2, q2, q8 \n" // r 2422ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "vmul.u16 q0, q0, q9 \n" // b * interval_size 2423ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "vmul.u16 q1, q1, q9 \n" // g 2424ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "vmul.u16 q2, q2, q9 \n" // r 2425ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "vadd.u16 q0, q0, q10 \n" // b + interval_offset 2426ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "vadd.u16 q1, q1, q10 \n" // g 2427ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "vadd.u16 q2, q2, q10 \n" // r 2428ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "vqmovn.u16 d0, q0 \n" 2429ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "vqmovn.u16 d2, q1 \n" 2430ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "vqmovn.u16 d4, q2 \n" 2431ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "vst4.8 {d0, d2, d4, d6}, [%0]! \n" // store 8 pixels of ARGB. 2432ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "bgt 1b \n" 2433ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com : "+r"(dst_argb), // %0 2434ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "+r"(width) // %1 2435ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com : "r"(scale), // %2 2436ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "r"(interval_size), // %3 2437ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com "r"(interval_offset) // %4 2438b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9", "q10" 2439b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com ); 2440b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com} 2441b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com 2442b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com// Shade 8 pixels at a time by specified value. 2443b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com// NOTE vqrdmulh.s16 q10, q10, d0[0] must use a scaler register from 0 to 8. 2444fa5d5fb491b7b0a654871295bddbbe4e0e69542cfbarchard@google.com// Rounding in vqrdmulh does +1 to high if high bit of low s16 is set. 2445b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.comvoid ARGBShadeRow_NEON(const uint8* src_argb, uint8* dst_argb, int width, 2446b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com uint32 value) { 2447b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com asm volatile ( 2448b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com "vdup.u32 q0, %3 \n" // duplicate scale value. 2449fa5d5fb491b7b0a654871295bddbbe4e0e69542cfbarchard@google.com "vzip.u8 d0, d1 \n" // d0 aarrggbb. 2450fa5d5fb491b7b0a654871295bddbbe4e0e69542cfbarchard@google.com "vshr.u16 q0, q0, #1 \n" // scale / 2. 2451b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com 2452b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com // 8 pixel loop. 2453b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com ".p2align 2 \n" 2454b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com "1: \n" 2455b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com "vld4.8 {d20, d22, d24, d26}, [%0]! \n" // load 8 pixels of ARGB. 2456b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 2457b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com "vmovl.u8 q10, d20 \n" // b (0 .. 255) 2458b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com "vmovl.u8 q11, d22 \n" 2459b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com "vmovl.u8 q12, d24 \n" 2460b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com "vmovl.u8 q13, d26 \n" 2461578c88a9f7114b3ede887b3c6d9a11d8d06b043bfbarchard@google.com "vqrdmulh.s16 q10, q10, d0[0] \n" // b * scale * 2 2462fa5d5fb491b7b0a654871295bddbbe4e0e69542cfbarchard@google.com "vqrdmulh.s16 q11, q11, d0[1] \n" // g 2463fa5d5fb491b7b0a654871295bddbbe4e0e69542cfbarchard@google.com "vqrdmulh.s16 q12, q12, d0[2] \n" // r 2464fa5d5fb491b7b0a654871295bddbbe4e0e69542cfbarchard@google.com "vqrdmulh.s16 q13, q13, d0[3] \n" // a 2465b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com "vqmovn.u16 d20, q10 \n" 2466b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com "vqmovn.u16 d22, q11 \n" 2467b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com "vqmovn.u16 d24, q12 \n" 2468b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com "vqmovn.u16 d26, q13 \n" 2469b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com "vst4.8 {d20, d22, d24, d26}, [%1]! \n" // store 8 pixels of ARGB. 2470b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com "bgt 1b \n" 2471b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com : "+r"(src_argb), // %0 2472b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com "+r"(dst_argb), // %1 2473b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com "+r"(width) // %2 2474b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com : "r"(value) // %3 2475b94b139e86635d40ed0d054bb66e30e6086ae7a3fbarchard@google.com : "cc", "memory", "q0", "q10", "q11", "q12", "q13" 2476ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com ); 2477ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com} 2478ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9fbarchard@google.com 247982375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels 2480050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com// Similar to ARGBToYJ but stores ARGB. 2481050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com// C code is (15 * b + 75 * g + 38 * r + 64) >> 7; 248282375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.comvoid ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) { 248382375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com asm volatile ( 2484050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "vmov.u8 d24, #15 \n" // B * 0.11400 coefficient 2485050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "vmov.u8 d25, #75 \n" // G * 0.58700 coefficient 2486050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "vmov.u8 d26, #38 \n" // R * 0.29900 coefficient 2487c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 248882375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com "1: \n" 248982375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. 249082375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 249182375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com "vmull.u8 q2, d0, d24 \n" // B 249282375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com "vmlal.u8 q2, d1, d25 \n" // G 249382375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com "vmlal.u8 q2, d2, d26 \n" // R 2494050b39a5cbf6c0f529531aafba36f2c846a139b1fbarchard@google.com "vqrshrun.s16 d0, q2, #7 \n" // 15 bit to 8 bit B 249582375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com "vmov d1, d0 \n" // G 249682375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com "vmov d2, d0 \n" // R 249782375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 ARGB pixels. 249882375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com "bgt 1b \n" 249982375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com : "+r"(src_argb), // %0 250082375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com "+r"(dst_argb), // %1 250182375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com "+r"(width) // %2 250282375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com : 250382375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q12", "q13" 250482375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com ); 250582375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com} 250682375d6de22e6fdc09f9b74dfe036ec172c3af73fbarchard@google.com 2507c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com// Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels. 2508c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com// b = (r * 35 + g * 68 + b * 17) >> 7 2509c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com// g = (r * 45 + g * 88 + b * 22) >> 7 2510c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com// r = (r * 50 + g * 98 + b * 24) >> 7 2511c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.comvoid ARGBSepiaRow_NEON(uint8* dst_argb, int width) { 2512c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com asm volatile ( 2513c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "vmov.u8 d20, #17 \n" // BB coefficient 2514c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "vmov.u8 d21, #68 \n" // BG coefficient 2515c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "vmov.u8 d22, #35 \n" // BR coefficient 2516c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "vmov.u8 d24, #22 \n" // GB coefficient 2517c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "vmov.u8 d25, #88 \n" // GG coefficient 2518c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "vmov.u8 d26, #45 \n" // GR coefficient 2519c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "vmov.u8 d28, #24 \n" // BB coefficient 2520c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "vmov.u8 d29, #98 \n" // BG coefficient 2521c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "vmov.u8 d30, #50 \n" // BR coefficient 2522c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 2523c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "1: \n" 2524c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0] \n" // load 8 ARGB pixels. 2525c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "subs %1, %1, #8 \n" // 8 processed per loop. 2526c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "vmull.u8 q2, d0, d20 \n" // B to Sepia B 2527c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "vmlal.u8 q2, d1, d21 \n" // G 2528c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "vmlal.u8 q2, d2, d22 \n" // R 2529c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "vmull.u8 q3, d0, d24 \n" // B to Sepia G 2530c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "vmlal.u8 q3, d1, d25 \n" // G 2531c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "vmlal.u8 q3, d2, d26 \n" // R 2532c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "vmull.u8 q8, d0, d28 \n" // B to Sepia R 2533c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "vmlal.u8 q8, d1, d29 \n" // G 2534c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "vmlal.u8 q8, d2, d30 \n" // R 253587adfaa61ea7704874ad3494a3bce3e7364b146afbarchard@google.com "vqshrn.u16 d0, q2, #7 \n" // 16 bit to 8 bit B 253687adfaa61ea7704874ad3494a3bce3e7364b146afbarchard@google.com "vqshrn.u16 d1, q3, #7 \n" // 16 bit to 8 bit G 253787adfaa61ea7704874ad3494a3bce3e7364b146afbarchard@google.com "vqshrn.u16 d2, q8, #7 \n" // 16 bit to 8 bit R 2538c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "vst4.8 {d0, d1, d2, d3}, [%0]! \n" // store 8 ARGB pixels. 2539c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "bgt 1b \n" 2540c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com : "+r"(dst_argb), // %0 2541c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "+r"(width) // %1 2542c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com : 2543c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", 2544c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com "q10", "q11", "q12", "q13", "q14", "q15" 2545c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com ); 2546c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com} 2547c247625d7fabfa6217111c0b6a2f0f30b99da204fbarchard@google.com 254862154e53a2820136f6a8cab8cc029622e3b100c1fbarchard@google.com// Tranform 8 ARGB pixels (32 bytes) with color matrix. 2549c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com// TODO(fbarchard): Was same as Sepia except matrix is provided. This function 2550c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com// needs to saturate. Consider doing a non-saturating version. 2551c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.comvoid ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb, 2552c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com const int8* matrix_argb, int width) { 255362154e53a2820136f6a8cab8cc029622e3b100c1fbarchard@google.com asm volatile ( 2554c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com "vld1.8 {q2}, [%3] \n" // load 3 ARGB vectors. 255562154e53a2820136f6a8cab8cc029622e3b100c1fbarchard@google.com "vmovl.s8 q0, d4 \n" // B,G coefficients s16. 2556c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com "vmovl.s8 q1, d5 \n" // R,A coefficients s16. 255762154e53a2820136f6a8cab8cc029622e3b100c1fbarchard@google.com 2558c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 255962154e53a2820136f6a8cab8cc029622e3b100c1fbarchard@google.com "1: \n" 2560c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com "vld4.8 {d16, d18, d20, d22}, [%0]! \n" // load 8 ARGB pixels. 2561c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com "subs %2, %2, #8 \n" // 8 processed per loop. 25620cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vmovl.u8 q8, d16 \n" // b (0 .. 255) 16 bit 25630cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vmovl.u8 q9, d18 \n" // g 25640cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vmovl.u8 q10, d20 \n" // r 25650cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vmovl.u8 q15, d22 \n" // a 25660cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vmul.s16 q12, q8, d0[0] \n" // B = B * Matrix B 25670cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vmul.s16 q13, q8, d1[0] \n" // G = B * Matrix G 25680cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vmul.s16 q14, q8, d2[0] \n" // R = B * Matrix R 2569c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com "vmul.s16 q15, q8, d3[0] \n" // A = B * Matrix A 25700cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vmul.s16 q4, q9, d0[1] \n" // B += G * Matrix B 25710cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vmul.s16 q5, q9, d1[1] \n" // G += G * Matrix G 25720cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vmul.s16 q6, q9, d2[1] \n" // R += G * Matrix R 2573c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com "vmul.s16 q7, q9, d3[1] \n" // A += G * Matrix A 25740cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vqadd.s16 q12, q12, q4 \n" // Accumulate B 25750cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vqadd.s16 q13, q13, q5 \n" // Accumulate G 25760cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vqadd.s16 q14, q14, q6 \n" // Accumulate R 2577c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com "vqadd.s16 q15, q15, q7 \n" // Accumulate A 25780cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vmul.s16 q4, q10, d0[2] \n" // B += R * Matrix B 25790cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vmul.s16 q5, q10, d1[2] \n" // G += R * Matrix G 25800cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vmul.s16 q6, q10, d2[2] \n" // R += R * Matrix R 2581c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com "vmul.s16 q7, q10, d3[2] \n" // A += R * Matrix A 25820cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vqadd.s16 q12, q12, q4 \n" // Accumulate B 25830cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vqadd.s16 q13, q13, q5 \n" // Accumulate G 25840cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vqadd.s16 q14, q14, q6 \n" // Accumulate R 2585c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com "vqadd.s16 q15, q15, q7 \n" // Accumulate A 25860cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vmul.s16 q4, q15, d0[3] \n" // B += A * Matrix B 25870cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vmul.s16 q5, q15, d1[3] \n" // G += A * Matrix G 25880cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vmul.s16 q6, q15, d2[3] \n" // R += A * Matrix R 2589c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com "vmul.s16 q7, q15, d3[3] \n" // A += A * Matrix A 25900cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vqadd.s16 q12, q12, q4 \n" // Accumulate B 25910cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vqadd.s16 q13, q13, q5 \n" // Accumulate G 25920cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "vqadd.s16 q14, q14, q6 \n" // Accumulate R 2593c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com "vqadd.s16 q15, q15, q7 \n" // Accumulate A 2594c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com "vqshrun.s16 d16, q12, #6 \n" // 16 bit to 8 bit B 2595c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com "vqshrun.s16 d18, q13, #6 \n" // 16 bit to 8 bit G 2596c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com "vqshrun.s16 d20, q14, #6 \n" // 16 bit to 8 bit R 2597c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com "vqshrun.s16 d22, q15, #6 \n" // 16 bit to 8 bit A 2598c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com "vst4.8 {d16, d18, d20, d22}, [%1]! \n" // store 8 ARGB pixels. 2599c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com "bgt 1b \n" 2600c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com : "+r"(src_argb), // %0 2601c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com "+r"(dst_argb), // %1 2602c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com "+r"(width) // %2 2603c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com : "r"(matrix_argb) // %3 2604c99db063e24d6180740d4adc29e84159096eef2dfbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", 26050cc0b4df467bc318d085c472fadd34e8bf994165fbarchard@google.com "q10", "q11", "q12", "q13", "q14", "q15" 260662154e53a2820136f6a8cab8cc029622e3b100c1fbarchard@google.com ); 260762154e53a2820136f6a8cab8cc029622e3b100c1fbarchard@google.com} 260862154e53a2820136f6a8cab8cc029622e3b100c1fbarchard@google.com 2609512bec91edaea60129d08c2d8053653b9fe51db4fbarchard@google.com// TODO(fbarchard): fix vqshrun in ARGBMultiplyRow_NEON and reenable. 2610512bec91edaea60129d08c2d8053653b9fe51db4fbarchard@google.com#ifdef HAS_ARGBMULTIPLYROW_NEON 26115b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com// Multiply 2 rows of ARGB pixels together, 8 pixels at a time. 26125b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.comvoid ARGBMultiplyRow_NEON(const uint8* src_argb0, const uint8* src_argb1, 26135b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com uint8* dst_argb, int width) { 26145b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com asm volatile ( 26155b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com // 8 pixel loop. 2616c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 26175b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "1: \n" 26185b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. 26195b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "vld4.8 {d1, d3, d5, d7}, [%1]! \n" // load 8 more ARGB pixels. 26205b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "subs %3, %3, #8 \n" // 8 processed per loop. 26215b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "vmull.u8 q0, d0, d1 \n" // multiply B 26225b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "vmull.u8 q1, d2, d3 \n" // multiply G 26235b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "vmull.u8 q2, d4, d5 \n" // multiply R 26245b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "vmull.u8 q3, d6, d7 \n" // multiply A 26256a352141ef2167c204a450cea179c65e4e34cb65fbarchard@google.com "vrshrn.u16 d0, q0, #8 \n" // 16 bit to 8 bit B 26266a352141ef2167c204a450cea179c65e4e34cb65fbarchard@google.com "vrshrn.u16 d1, q1, #8 \n" // 16 bit to 8 bit G 26276a352141ef2167c204a450cea179c65e4e34cb65fbarchard@google.com "vrshrn.u16 d2, q2, #8 \n" // 16 bit to 8 bit R 26286a352141ef2167c204a450cea179c65e4e34cb65fbarchard@google.com "vrshrn.u16 d3, q3, #8 \n" // 16 bit to 8 bit A 26295b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels. 26305b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "bgt 1b \n" 26315b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com 26325b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com : "+r"(src_argb0), // %0 26335b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "+r"(src_argb1), // %1 26345b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "+r"(dst_argb), // %2 26355b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "+r"(width) // %3 26365b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com : 26375b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3" 26385b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com ); 26395b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com} 2640512bec91edaea60129d08c2d8053653b9fe51db4fbarchard@google.com#endif // HAS_ARGBMULTIPLYROW_NEON 26415b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com 26425b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com// Add 2 rows of ARGB pixels together, 8 pixels at a time. 26435b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.comvoid ARGBAddRow_NEON(const uint8* src_argb0, const uint8* src_argb1, 26445b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com uint8* dst_argb, int width) { 26455b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com asm volatile ( 26465b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com // 8 pixel loop. 2647c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 26485b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "1: \n" 26495b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. 26505b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load 8 more ARGB pixels. 26515b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "subs %3, %3, #8 \n" // 8 processed per loop. 26525b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "vqadd.u8 q0, q0, q2 \n" // add B, G 26535b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "vqadd.u8 q1, q1, q3 \n" // add R, A 26545b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels. 26555b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "bgt 1b \n" 26565b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com 26575b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com : "+r"(src_argb0), // %0 26585b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "+r"(src_argb1), // %1 26595b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "+r"(dst_argb), // %2 26605b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com "+r"(width) // %3 26615b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com : 2662573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3" 2663573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com ); 2664573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com} 2665573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com 2666573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com// Subtract 2 rows of ARGB pixels, 8 pixels at a time. 2667573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.comvoid ARGBSubtractRow_NEON(const uint8* src_argb0, const uint8* src_argb1, 2668573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com uint8* dst_argb, int width) { 2669573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com asm volatile ( 2670573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com // 8 pixel loop. 2671c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 2672573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com "1: \n" 2673573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. 2674573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load 8 more ARGB pixels. 2675573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com "subs %3, %3, #8 \n" // 8 processed per loop. 2676573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com "vqsub.u8 q0, q0, q2 \n" // subtract B, G 2677573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com "vqsub.u8 q1, q1, q3 \n" // subtract R, A 2678573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels. 2679573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com "bgt 1b \n" 2680573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com 2681573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com : "+r"(src_argb0), // %0 2682573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com "+r"(src_argb1), // %1 2683573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com "+r"(dst_argb), // %2 2684573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com "+r"(width) // %3 2685573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com : 2686573a883dd65c94a10422e6e9e0d453e2a5d45227fbarchard@google.com : "cc", "memory", "q0", "q1", "q2", "q3" 26875b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com ); 26885b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com} 26895b0f7e1132bbf79dc3d70dcf225646fcbc5875e2fbarchard@google.com 2690c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// Adds Sobel X and Sobel Y and stores Sobel into ARGB. 2691c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// A = 255 2692c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// R = Sobel 2693c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// G = Sobel 2694c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// B = Sobel 2695c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.comvoid SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, 2696c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com uint8* dst_argb, int width) { 2697c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com asm volatile ( 2698c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vmov.u8 d3, #255 \n" // alpha 2699c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com // 8 pixel loop. 2700c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 2701c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "1: \n" 2702c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vld1.8 {d0}, [%0]! \n" // load 8 sobelx. 2703c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vld1.8 {d1}, [%1]! \n" // load 8 sobely. 2704c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "subs %3, %3, #8 \n" // 8 processed per loop. 2705c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vqadd.u8 d0, d0, d1 \n" // add 2706c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vmov.u8 d1, d0 \n" 2707c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vmov.u8 d2, d0 \n" 2708c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels. 2709c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "bgt 1b \n" 2710c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com : "+r"(src_sobelx), // %0 2711c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "+r"(src_sobely), // %1 27128be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com "+r"(dst_argb), // %2 27138be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com "+r"(width) // %3 27148be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com : 27158be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com : "cc", "memory", "q0", "q1" 27168be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com ); 27178be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com} 27188be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com 27198be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com// Adds Sobel X and Sobel Y and stores Sobel into plane. 27208be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.comvoid SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, 27218be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com uint8* dst_y, int width) { 27228be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com asm volatile ( 27238be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com // 16 pixel loop. 2724c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 27258be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com "1: \n" 27268be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com "vld1.8 {q0}, [%0]! \n" // load 16 sobelx. 27278be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com "vld1.8 {q1}, [%1]! \n" // load 16 sobely. 27288be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com "subs %3, %3, #16 \n" // 16 processed per loop. 27298be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com "vqadd.u8 q0, q0, q1 \n" // add 27308be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com "vst1.8 {q0}, [%2]! \n" // store 16 pixels. 27318be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com "bgt 1b \n" 27328be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com : "+r"(src_sobelx), // %0 27338be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com "+r"(src_sobely), // %1 27348be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com "+r"(dst_y), // %2 27358be4b289c799356d84c68c4eb4b5403285096693fbarchard@google.com "+r"(width) // %3 2736c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com : 2737c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com : "cc", "memory", "q0", "q1" 2738c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com ); 2739c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com} 2740c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com 2741c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// Mixes Sobel X, Sobel Y and Sobel into ARGB. 2742c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// A = 255 2743c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// R = Sobel X 2744c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// G = Sobel 2745c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// B = Sobel Y 2746c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.comvoid SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, 2747c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com uint8* dst_argb, int width) { 2748c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com asm volatile ( 2749c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vmov.u8 d3, #255 \n" // alpha 2750c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com // 8 pixel loop. 2751c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 2752c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "1: \n" 2753c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vld1.8 {d2}, [%0]! \n" // load 8 sobelx. 2754c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vld1.8 {d0}, [%1]! \n" // load 8 sobely. 2755c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "subs %3, %3, #8 \n" // 8 processed per loop. 2756c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vqadd.u8 d1, d0, d2 \n" // add 2757c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels. 2758c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "bgt 1b \n" 2759c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com : "+r"(src_sobelx), // %0 2760c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "+r"(src_sobely), // %1 2761ff4a84168d731c33ce7dedcfb497376a8669cecafbarchard@google.com "+r"(dst_argb), // %2 2762ff4a84168d731c33ce7dedcfb497376a8669cecafbarchard@google.com "+r"(width) // %3 2763c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com : 2764c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com : "cc", "memory", "q0", "q1" 2765c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com ); 2766c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com} 2767c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com 2768c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// SobelX as a matrix is 2769c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// -1 0 1 2770c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// -2 0 2 2771c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// -1 0 1 2772c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.comvoid SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1, 2773c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com const uint8* src_y2, uint8* dst_sobelx, int width) { 2774c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com asm volatile ( 2775c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 2776c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "1: \n" 27772c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d0}, [%0],%5 \n" // top 27782c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d1}, [%0],%6 \n" 2779c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vsubl.u8 q0, d0, d1 \n" 27802c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d2}, [%1],%5 \n" // center * 2 27812c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d3}, [%1],%6 \n" 2782c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vsubl.u8 q1, d2, d3 \n" 2783c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vadd.s16 q0, q0, q1 \n" 2784c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vadd.s16 q0, q0, q1 \n" 27852c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d2}, [%2],%5 \n" // bottom 27862c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d3}, [%2],%6 \n" 2787c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "subs %4, %4, #8 \n" // 8 pixels 2788c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vsubl.u8 q1, d2, d3 \n" 2789c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vadd.s16 q0, q0, q1 \n" 2790c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vabs.s16 q0, q0 \n" 2791c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vqmovn.u16 d0, q0 \n" 27922c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {d0}, [%3]! \n" // store 8 sobelx 2793c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "bgt 1b \n" 2794c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com : "+r"(src_y0), // %0 2795c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "+r"(src_y1), // %1 2796c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "+r"(src_y2), // %2 2797c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "+r"(dst_sobelx), // %3 2798c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "+r"(width) // %4 2799c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com : "r"(2), // %5 2800c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "r"(6) // %6 2801c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com : "cc", "memory", "q0", "q1" // Clobber List 2802c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com ); 2803c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com} 2804c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com 2805c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// SobelY as a matrix is 2806c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// -1 -2 -1 2807c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// 0 0 0 2808c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com// 1 2 1 2809c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.comvoid SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1, 2810c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com uint8* dst_sobely, int width) { 2811c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com asm volatile ( 2812c2295807bdcfcc45d932f228f0ed3f7124005de6fbarchard@google.com ".p2align 2 \n" 2813c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "1: \n" 28142c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d0}, [%0],%4 \n" // left 28152c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d1}, [%1],%4 \n" 2816c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vsubl.u8 q0, d0, d1 \n" 28172c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d2}, [%0],%4 \n" // center * 2 28182c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d3}, [%1],%4 \n" 2819c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vsubl.u8 q1, d2, d3 \n" 2820c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vadd.s16 q0, q0, q1 \n" 2821c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vadd.s16 q0, q0, q1 \n" 28222c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d2}, [%0],%5 \n" // right 28232c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vld1.8 {d3}, [%1],%5 \n" 2824c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "subs %3, %3, #8 \n" // 8 pixels 2825c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vsubl.u8 q1, d2, d3 \n" 2826c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vadd.s16 q0, q0, q1 \n" 2827c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vabs.s16 q0, q0 \n" 2828c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "vqmovn.u16 d0, q0 \n" 28292c4e3993c073a99ed42fa0a91cdb926e715d57f4fbarchard@google.com "vst1.8 {d0}, [%2]! \n" // store 8 sobely 2830c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "bgt 1b \n" 2831c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com : "+r"(src_y0), // %0 2832c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "+r"(src_y1), // %1 2833c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "+r"(dst_sobely), // %2 2834c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "+r"(width) // %3 2835c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com : "r"(1), // %4 2836c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com "r"(6) // %5 2837c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com : "cc", "memory", "q0", "q1" // Clobber List 2838c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com ); 2839c56a55fc7206a257eecc21969f94ab066dd80f2ffbarchard@google.com} 284019932f8dbc5ca3123d87b5b8369e7d7bf3469a97fbarchard@google.com#endif // __ARM_NEON__ 28412d11d43a6e21865b904705acce6535ae4c2d3caffbarchard@google.com 2842fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.com#ifdef __cplusplus 2843fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.com} // extern "C" 2844fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.com} // namespace libyuv 2845fe5ff7ed5451496281697bda9cb85084c532926cfbarchard@google.com#endif 2846