12ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org/* 22ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * ARMv8 NEON optimizations for libjpeg-turbo 32ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 42ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies). 52ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * All rights reserved. 62ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> 72ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * Copyright (C) 2013-2014, Linaro Limited 82ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * Author: Ragesh Radhakrishnan <ragesh.r@linaro.org> 92ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 102ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * This software is provided 'as-is', without any express or implied 112ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * warranty. In no event will the authors be held liable for any damages 122ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * arising from the use of this software. 132ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 142ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * Permission is granted to anyone to use this software for any purpose, 152ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * including commercial applications, and to alter it and redistribute it 162ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * freely, subject to the following restrictions: 172ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 182ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 1. The origin of this software must not be misrepresented; you must not 192ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * claim that you wrote the original software. If you use this software 202ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * in a product, an acknowledgment in the product documentation would be 212ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * appreciated but is not required. 222ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 2. Altered source versions must be plainly marked as such, and must not be 232ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * misrepresented as being the original software. 242ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 3. This notice may not be removed or altered from any source distribution. 252ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org */ 262ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 272ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#if defined(__linux__) && defined(__ELF__) 282ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.section .note.GNU-stack,"",%progbits /* mark stack as non-executable */ 292ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#endif 302ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 312ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.text 322ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.arch armv8-a+fp+simd 332ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 342ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 352ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define RESPECT_STRICT_ALIGNMENT 1 362ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 372ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 382ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org/*****************************************************************************/ 392ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 402ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org/* Supplementary macro for setting function attributes */ 412ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.macro asm_function fname 422ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#ifdef __APPLE__ 432ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .globl _\fname 442ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org_\fname: 452ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#else 462ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .global \fname 472ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#ifdef __ELF__ 482ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .hidden \fname 492ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .type \fname, %function 502ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#endif 512ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org\fname: 522ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#endif 532ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.endm 542ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 552ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org/* Transpose elements of single 128 bit registers */ 562ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.macro transpose_single x0,x1,xi,xilen,literal 572ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins \xi\xilen[0], \x0\xilen[0] 582ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins \x1\xilen[0], \x0\xilen[1] 592ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org trn1 \x0\literal, \x0\literal, \x1\literal 602ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org trn2 \x1\literal, \xi\literal, \x1\literal 612ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.endm 622ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 632ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org/* Transpose elements of 2 differnet registers */ 642ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.macro transpose x0,x1,xi,xilen,literal 652ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mov \xi\xilen, \x0\xilen 662ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org trn1 \x0\literal, \x0\literal, \x1\literal 672ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org trn2 \x1\literal, \xi\literal, \x1\literal 682ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.endm 692ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 702ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org/* Transpose a block of 4x4 coefficients in four 64-bit registers */ 712ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.macro transpose_4x4_32 x0,x0len x1,x1len x2,x2len x3,x3len,xi,xilen 722ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mov \xi\xilen, \x0\xilen 732ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org trn1 \x0\x0len, \x0\x0len, \x2\x2len 742ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org trn2 \x2\x2len, \xi\x0len, \x2\x2len 752ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mov \xi\xilen, \x1\xilen 762ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org trn1 \x1\x1len, \x1\x1len, \x3\x3len 772ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org trn2 \x3\x3len, \xi\x1len, \x3\x3len 782ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.endm 792ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 802ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.macro transpose_4x4_16 x0,x0len x1,x1len, x2,x2len, x3,x3len,xi,xilen 812ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mov \xi\xilen, \x0\xilen 822ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org trn1 \x0\x0len, \x0\x0len, \x1\x1len 832ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org trn2 \x1\x2len, \xi\x0len, \x1\x2len 842ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mov \xi\xilen, \x2\xilen 852ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org trn1 \x2\x2len, \x2\x2len, \x3\x3len 862ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org trn2 \x3\x2len, \xi\x1len, \x3\x3len 872ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.endm 882ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 892ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.macro transpose_4x4 x0, x1, x2, x3,x5 902ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org transpose_4x4_16 \x0,.4h, \x1,.4h, \x2,.4h,\x3,.4h,\x5,.16b 912ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org transpose_4x4_32 \x0,.2s, \x1,.2s, \x2,.2s,\x3,.2s,\x5,.16b 922ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.endm 932ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 942ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 952ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define CENTERJSAMPLE 128 962ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 972ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org/*****************************************************************************/ 982ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 992ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org/* 1002ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * Perform dequantization and inverse DCT on one block of coefficients. 1012ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 1022ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * GLOBAL(void) 1032ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * jsimd_idct_islow_neon (void * dct_table, JCOEFPTR coef_block, 1042ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * JSAMPARRAY output_buf, JDIMENSION output_col) 1052ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org */ 1062ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 1072ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define FIX_0_298631336 (2446) 1082ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define FIX_0_390180644 (3196) 1092ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define FIX_0_541196100 (4433) 1102ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define FIX_0_765366865 (6270) 1112ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define FIX_0_899976223 (7373) 1122ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define FIX_1_175875602 (9633) 1132ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define FIX_1_501321110 (12299) 1142ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define FIX_1_847759065 (15137) 1152ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define FIX_1_961570560 (16069) 1162ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define FIX_2_053119869 (16819) 1172ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define FIX_2_562915447 (20995) 1182ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define FIX_3_072711026 (25172) 1192ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 1202ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define FIX_1_175875602_MINUS_1_961570560 (FIX_1_175875602 - FIX_1_961570560) 1212ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define FIX_1_175875602_MINUS_0_390180644 (FIX_1_175875602 - FIX_0_390180644) 1222ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define FIX_0_541196100_MINUS_1_847759065 (FIX_0_541196100 - FIX_1_847759065) 1232ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define FIX_3_072711026_MINUS_2_562915447 (FIX_3_072711026 - FIX_2_562915447) 1242ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define FIX_0_298631336_MINUS_0_899976223 (FIX_0_298631336 - FIX_0_899976223) 1252ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define FIX_1_501321110_MINUS_0_899976223 (FIX_1_501321110 - FIX_0_899976223) 1262ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define FIX_2_053119869_MINUS_2_562915447 (FIX_2_053119869 - FIX_2_562915447) 1272ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define FIX_0_541196100_PLUS_0_765366865 (FIX_0_541196100 + FIX_0_765366865) 1282ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 1292ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org/* 1302ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * Reference SIMD-friendly 1-D ISLOW iDCT C implementation. 1312ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * Uses some ideas from the comments in 'simd/jiss2int-64.asm' 1322ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org */ 1332ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define REF_1D_IDCT(xrow0, xrow1, xrow2, xrow3, xrow4, xrow5, xrow6, xrow7) \ 1342ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org{ \ 1352ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org DCTELEM row0, row1, row2, row3, row4, row5, row6, row7; \ 1362ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org INT32 q1, q2, q3, q4, q5, q6, q7; \ 1372ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org INT32 tmp11_plus_tmp2, tmp11_minus_tmp2; \ 1382ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org \ 1392ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* 1-D iDCT input data */ \ 1402ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org row0 = xrow0; \ 1412ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org row1 = xrow1; \ 1422ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org row2 = xrow2; \ 1432ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org row3 = xrow3; \ 1442ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org row4 = xrow4; \ 1452ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org row5 = xrow5; \ 1462ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org row6 = xrow6; \ 1472ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org row7 = xrow7; \ 1482ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org \ 1492ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org q5 = row7 + row3; \ 1502ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org q4 = row5 + row1; \ 1512ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org q6 = MULTIPLY(q5, FIX_1_175875602_MINUS_1_961570560) + \ 1522ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org MULTIPLY(q4, FIX_1_175875602); \ 1532ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org q7 = MULTIPLY(q5, FIX_1_175875602) + \ 1542ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org MULTIPLY(q4, FIX_1_175875602_MINUS_0_390180644); \ 1552ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org q2 = MULTIPLY(row2, FIX_0_541196100) + \ 1562ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org MULTIPLY(row6, FIX_0_541196100_MINUS_1_847759065); \ 1572ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org q4 = q6; \ 1582ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org q3 = ((INT32) row0 - (INT32) row4) << 13; \ 1592ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org q6 += MULTIPLY(row5, -FIX_2_562915447) + \ 1602ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org MULTIPLY(row3, FIX_3_072711026_MINUS_2_562915447); \ 1612ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* now we can use q1 (reloadable constants have been used up) */ \ 1622ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org q1 = q3 + q2; \ 1632ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org q4 += MULTIPLY(row7, FIX_0_298631336_MINUS_0_899976223) + \ 1642ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org MULTIPLY(row1, -FIX_0_899976223); \ 1652ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org q5 = q7; \ 1662ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org q1 = q1 + q6; \ 1672ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org q7 += MULTIPLY(row7, -FIX_0_899976223) + \ 1682ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org MULTIPLY(row1, FIX_1_501321110_MINUS_0_899976223); \ 1692ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org \ 1702ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* (tmp11 + tmp2) has been calculated (out_row1 before descale) */ \ 1712ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org tmp11_plus_tmp2 = q1; \ 1722ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org row1 = 0; \ 1732ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org \ 1742ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org q1 = q1 - q6; \ 1752ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org q5 += MULTIPLY(row5, FIX_2_053119869_MINUS_2_562915447) + \ 1762ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org MULTIPLY(row3, -FIX_2_562915447); \ 1772ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org q1 = q1 - q6; \ 1782ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org q6 = MULTIPLY(row2, FIX_0_541196100_PLUS_0_765366865) + \ 1792ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org MULTIPLY(row6, FIX_0_541196100); \ 1802ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org q3 = q3 - q2; \ 1812ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org \ 1822ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* (tmp11 - tmp2) has been calculated (out_row6 before descale) */ \ 1832ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org tmp11_minus_tmp2 = q1; \ 1842ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org \ 1852ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org q1 = ((INT32) row0 + (INT32) row4) << 13; \ 1862ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org q2 = q1 + q6; \ 1872ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org q1 = q1 - q6; \ 1882ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org \ 1892ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* pick up the results */ \ 1902ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org tmp0 = q4; \ 1912ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org tmp1 = q5; \ 1922ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org tmp2 = (tmp11_plus_tmp2 - tmp11_minus_tmp2) / 2; \ 1932ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org tmp3 = q7; \ 1942ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org tmp10 = q2; \ 1952ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org tmp11 = (tmp11_plus_tmp2 + tmp11_minus_tmp2) / 2; \ 1962ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org tmp12 = q3; \ 1972ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org tmp13 = q1; \ 1982ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org} 1992ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 2002ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define XFIX_0_899976223 v0.4h[0] 2012ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define XFIX_0_541196100 v0.4h[1] 2022ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define XFIX_2_562915447 v0.4h[2] 2032ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define XFIX_0_298631336_MINUS_0_899976223 v0.4h[3] 2042ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define XFIX_1_501321110_MINUS_0_899976223 v1.4h[0] 2052ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define XFIX_2_053119869_MINUS_2_562915447 v1.4h[1] 2062ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define XFIX_0_541196100_PLUS_0_765366865 v1.4h[2] 2072ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define XFIX_1_175875602 v1.4h[3] 2082ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define XFIX_1_175875602_MINUS_0_390180644 v2.4h[0] 2092ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define XFIX_0_541196100_MINUS_1_847759065 v2.4h[1] 2102ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define XFIX_3_072711026_MINUS_2_562915447 v2.4h[2] 2112ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define XFIX_1_175875602_MINUS_1_961570560 v2.4h[3] 2122ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 2132ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.balign 16 2142ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.orgjsimd_idct_islow_neon_consts: 2152ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .short FIX_0_899976223 /* d0[0] */ 2162ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .short FIX_0_541196100 /* d0[1] */ 2172ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .short FIX_2_562915447 /* d0[2] */ 2182ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .short FIX_0_298631336_MINUS_0_899976223 /* d0[3] */ 2192ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .short FIX_1_501321110_MINUS_0_899976223 /* d1[0] */ 2202ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .short FIX_2_053119869_MINUS_2_562915447 /* d1[1] */ 2212ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .short FIX_0_541196100_PLUS_0_765366865 /* d1[2] */ 2222ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .short FIX_1_175875602 /* d1[3] */ 2232ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* reloadable constants */ 2242ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .short FIX_1_175875602_MINUS_0_390180644 /* d2[0] */ 2252ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .short FIX_0_541196100_MINUS_1_847759065 /* d2[1] */ 2262ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .short FIX_3_072711026_MINUS_2_562915447 /* d2[2] */ 2272ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .short FIX_1_175875602_MINUS_1_961570560 /* d2[3] */ 2282ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 2292ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.orgasm_function jsimd_idct_islow_neon 2302ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 2312ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org DCT_TABLE .req x0 2322ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org COEF_BLOCK .req x1 2332ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org OUTPUT_BUF .req x2 2342ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org OUTPUT_COL .req x3 2352ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org TMP1 .req x0 2362ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org TMP2 .req x1 2372ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org TMP3 .req x2 2382ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org TMP4 .req x15 2392ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 2402ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ROW0L .req v16 2412ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ROW0R .req v17 2422ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ROW1L .req v18 2432ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ROW1R .req v19 2442ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ROW2L .req v20 2452ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ROW2R .req v21 2462ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ROW3L .req v22 2472ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ROW3R .req v23 2482ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ROW4L .req v24 2492ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ROW4R .req v25 2502ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ROW5L .req v26 2512ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ROW5R .req v27 2522ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ROW6L .req v28 2532ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ROW6R .req v29 2542ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ROW7L .req v30 2552ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ROW7R .req v31 2562ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Save all NEON registers and x15 (32 NEON registers * 8 bytes + 16) */ 2572ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub sp, sp, 272 2582ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org str x15, [sp], 16 2592ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org adr x15, jsimd_idct_islow_neon_consts 2602ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v0.8b - v3.8b}, [sp], 32 2612ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v4.8b - v7.8b}, [sp], 32 2622ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v8.8b - v11.8b}, [sp], 32 2632ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v12.8b - v15.8b}, [sp], 32 2642ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v16.8b - v19.8b}, [sp], 32 2652ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v20.8b - v23.8b}, [sp], 32 2662ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v24.8b - v27.8b}, [sp], 32 2672ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v28.8b - v31.8b}, [sp], 32 2682ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v16.4h, v17.4h, v18.4h, v19.4h}, [COEF_BLOCK], 32 2692ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v0.4h, v1.4h, v2.4h, v3.4h}, [DCT_TABLE], 32 2702ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v20.4h, v21.4h, v22.4h, v23.4h}, [COEF_BLOCK], 32 2712ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v16.4h, v16.4h, v0.4h 2722ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v17.4h, v17.4h, v1.4h 2732ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v16.2d[1], v17.2d[0] /* 128 bit q8 */ 2742ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v4.4h, v5.4h, v6.4h, v7.4h}, [DCT_TABLE], 32 2752ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v18.4h, v18.4h, v2.4h 2762ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v19.4h, v19.4h, v3.4h 2772ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v18.2d[1], v19.2d[0] /* 128 bit q9 */ 2782ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v24.4h, v25.4h, v26.4h, v27.4h}, [COEF_BLOCK], 32 2792ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v20.4h, v20.4h, v4.4h 2802ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v21.4h, v21.4h, v5.4h 2812ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v20.2d[1], v21.2d[0] /* 128 bit q10 */ 2822ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v0.4h, v1.4h, v2.4h, v3.4h}, [DCT_TABLE], 32 2832ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v22.4h, v22.4h, v6.4h 2842ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v23.4h, v23.4h, v7.4h 2852ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v22.2d[1], v23.2d[0] /* 128 bit q11 */ 2862ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v28.4h, v29.4h, v30.4h, v31.4h}, [COEF_BLOCK] 2872ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v24.4h, v24.4h, v0.4h 2882ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v25.4h, v25.4h, v1.4h 2892ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v24.2d[1], v25.2d[0] /* 128 bit q12 */ 2902ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v4.4h, v5.4h, v6.4h, v7.4h}, [DCT_TABLE], 32 2912ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v28.4h, v28.4h, v4.4h 2922ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v29.4h, v29.4h, v5.4h 2932ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v28.2d[1], v29.2d[0] /* 128 bit q14 */ 2942ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v26.4h, v26.4h, v2.4h 2952ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v27.4h, v27.4h, v3.4h 2962ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v26.2d[1], v27.2d[0] /* 128 bit q13 */ 2972ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v0.4h, v1.4h, v2.4h, v3.4h}, [x15] /* load constants */ 2982ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add x15, x15, #16 2992ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v30.4h, v30.4h, v6.4h 3002ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v31.4h, v31.4h, v7.4h 3012ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v30.2d[1], v31.2d[0] /* 128 bit q15 */ 3022ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Go to the bottom of the stack */ 3032ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub sp, sp, 352 3042ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org stp x4, x5, [sp], 16 3052ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v8.4h - v11.4h}, [sp], 32 /* save NEON registers */ 3062ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v12.4h - v15.4h}, [sp], 32 3072ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* 1-D IDCT, pass 1, left 4x8 half */ 3082ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v4.4h, ROW7L.4h, ROW3L.4h 3092ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v5.4h, ROW5L.4h, ROW1L.4h 3102ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull v12.4s, v4.4h, XFIX_1_175875602_MINUS_1_961570560 3112ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v12.4s, v5.4h, XFIX_1_175875602 3122ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull v14.4s, v4.4h, XFIX_1_175875602 3132ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Check for the zero coefficients in the right 4x8 half */ 3142ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v14.4s, v5.4h, XFIX_1_175875602_MINUS_0_390180644 3152ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ssubl v6.4s, ROW0L.4h, ROW4L.4h 3162ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ldp w4, w5, [COEF_BLOCK, #(-96 + 2 * (4 + 1 * 8))] 3172ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull v4.4s, ROW2L.4h, XFIX_0_541196100 3182ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v4.4s, ROW6L.4h, XFIX_0_541196100_MINUS_1_847759065 3192ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org orr x0, x4, x5 3202ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mov v8.16b, v12.16b 3212ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlsl v12.4s, ROW5L.4h, XFIX_2_562915447 3222ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ldp w4, w5, [COEF_BLOCK, #(-96 + 2 * (4 + 2 * 8))] 3232ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v12.4s, ROW3L.4h, XFIX_3_072711026_MINUS_2_562915447 3242ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org shl v6.4s, v6.4s, #13 3252ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org orr x0, x0, x4 3262ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlsl v8.4s, ROW1L.4h, XFIX_0_899976223 3272ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org orr x0, x0 , x5 3282ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v2.4s, v6.4s, v4.4s 3292ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ldp w4, w5, [COEF_BLOCK, #(-96 + 2 * (4 + 3 * 8))] 3302ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mov v10.16b, v14.16b 3312ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v2.4s, v2.4s, v12.4s 3322ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org orr x0, x0, x4 3332ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlsl v14.4s, ROW7L.4h, XFIX_0_899976223 3342ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org orr x0, x0, x5 3352ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v14.4s, ROW1L.4h, XFIX_1_501321110_MINUS_0_899976223 3362ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org rshrn ROW1L.4h, v2.4s, #11 3372ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ldp w4, w5, [COEF_BLOCK, #(-96 + 2 * (4 + 4 * 8))] 3382ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v2.4s, v2.4s, v12.4s 3392ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v10.4s, ROW5L.4h, XFIX_2_053119869_MINUS_2_562915447 3402ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org orr x0, x0, x4 3412ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlsl v10.4s, ROW3L.4h, XFIX_2_562915447 3422ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org orr x0, x0, x5 3432ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v2.4s, v2.4s, v12.4s 3442ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull v12.4s, ROW2L.4h, XFIX_0_541196100_PLUS_0_765366865 3452ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ldp w4, w5, [COEF_BLOCK, #(-96 + 2 * (4 + 5 * 8))] 3462ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v12.4s, ROW6L.4h, XFIX_0_541196100 3472ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v6.4s, v6.4s, v4.4s 3482ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org orr x0, x0, x4 3492ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org rshrn ROW6L.4h, v2.4s, #11 3502ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org orr x0, x0, x5 3512ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v2.4s, v6.4s, v10.4s 3522ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ldp w4, w5, [COEF_BLOCK, #(-96 + 2 * (4 + 6 * 8))] 3532ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v6.4s, v6.4s, v10.4s 3542ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org saddl v10.4s, ROW0L.4h, ROW4L.4h 3552ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org orr x0, x0, x4 3562ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org rshrn ROW2L.4h, v2.4s, #11 3572ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org orr x0, x0, x5 3582ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org rshrn ROW5L.4h, v6.4s, #11 3592ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ldp w4, w5, [COEF_BLOCK, #(-96 + 2 * (4 + 7 * 8))] 3602ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org shl v10.4s, v10.4s, #13 3612ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v8.4s, ROW7L.4h, XFIX_0_298631336_MINUS_0_899976223 3622ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org orr x0, x0, x4 3632ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v4.4s, v10.4s, v12.4s 3642ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org orr x0, x0, x5 3652ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org cmp x0, #0 /* orrs instruction removed */ 3662ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v2.4s, v10.4s, v12.4s 3672ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v12.4s, v4.4s, v14.4s 3682ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ldp w4, w5, [COEF_BLOCK, #(-96 + 2 * (4 + 0 * 8))] 3692ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v4.4s, v4.4s, v14.4s 3702ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v10.4s, v2.4s, v8.4s 3712ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org orr x0, x4, x5 3722ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v6.4s, v2.4s, v8.4s 3732ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* pop {x4, x5} */ 3742ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub sp, sp, 80 3752ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ldp x4, x5, [sp], 16 3762ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org rshrn ROW7L.4h, v4.4s, #11 3772ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org rshrn ROW3L.4h, v10.4s, #11 3782ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org rshrn ROW0L.4h, v12.4s, #11 3792ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org rshrn ROW4L.4h, v6.4s, #11 3802ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 3812ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org beq 3f /* Go to do some special handling for the sparse right 4x8 half */ 3822ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 3832ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* 1-D IDCT, pass 1, right 4x8 half */ 3842ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v2.4h}, [x15] /* reload constants */ 3852ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v10.4h, ROW7R.4h, ROW3R.4h 3862ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v8.4h, ROW5R.4h, ROW1R.4h 3872ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Transpose ROW6L <-> ROW7L (v3 available free register) */ 3882ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org transpose ROW6L, ROW7L, v3, .16b, .4h 3892ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull v12.4s, v10.4h, XFIX_1_175875602_MINUS_1_961570560 3902ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v12.4s, v8.4h, XFIX_1_175875602 3912ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Transpose ROW2L <-> ROW3L (v3 available free register) */ 3922ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org transpose ROW2L, ROW3L, v3, .16b, .4h 3932ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull v14.4s, v10.4h, XFIX_1_175875602 3942ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v14.4s, v8.4h, XFIX_1_175875602_MINUS_0_390180644 3952ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Transpose ROW0L <-> ROW1L (v3 available free register) */ 3962ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org transpose ROW0L, ROW1L, v3, .16b, .4h 3972ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ssubl v6.4s, ROW0R.4h, ROW4R.4h 3982ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull v4.4s, ROW2R.4h, XFIX_0_541196100 3992ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v4.4s, ROW6R.4h, XFIX_0_541196100_MINUS_1_847759065 4002ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Transpose ROW4L <-> ROW5L (v3 available free register) */ 4012ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org transpose ROW4L, ROW5L, v3, .16b, .4h 4022ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mov v8.16b, v12.16b 4032ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlsl v12.4s, ROW5R.4h, XFIX_2_562915447 4042ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v12.4s, ROW3R.4h, XFIX_3_072711026_MINUS_2_562915447 4052ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Transpose ROW1L <-> ROW3L (v3 available free register) */ 4062ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org transpose ROW1L, ROW3L, v3, .16b, .2s 4072ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org shl v6.4s, v6.4s, #13 4082ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlsl v8.4s, ROW1R.4h, XFIX_0_899976223 4092ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Transpose ROW4L <-> ROW6L (v3 available free register) */ 4102ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org transpose ROW4L, ROW6L, v3, .16b, .2s 4112ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v2.4s, v6.4s, v4.4s 4122ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mov v10.16b, v14.16b 4132ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v2.4s, v2.4s, v12.4s 4142ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Transpose ROW0L <-> ROW2L (v3 available free register) */ 4152ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org transpose ROW0L, ROW2L, v3, .16b, .2s 4162ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlsl v14.4s, ROW7R.4h, XFIX_0_899976223 4172ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v14.4s, ROW1R.4h, XFIX_1_501321110_MINUS_0_899976223 4182ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org rshrn ROW1R.4h, v2.4s, #11 4192ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Transpose ROW5L <-> ROW7L (v3 available free register) */ 4202ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org transpose ROW5L, ROW7L, v3, .16b, .2s 4212ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v2.4s, v2.4s, v12.4s 4222ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v10.4s, ROW5R.4h, XFIX_2_053119869_MINUS_2_562915447 4232ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlsl v10.4s, ROW3R.4h, XFIX_2_562915447 4242ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v2.4s, v2.4s, v12.4s 4252ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull v12.4s, ROW2R.4h, XFIX_0_541196100_PLUS_0_765366865 4262ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v12.4s, ROW6R.4h, XFIX_0_541196100 4272ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v6.4s, v6.4s, v4.4s 4282ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org rshrn ROW6R.4h, v2.4s, #11 4292ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v2.4s, v6.4s, v10.4s 4302ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v6.4s, v6.4s, v10.4s 4312ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org saddl v10.4s, ROW0R.4h, ROW4R.4h 4322ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org rshrn ROW2R.4h, v2.4s, #11 4332ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org rshrn ROW5R.4h, v6.4s, #11 4342ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org shl v10.4s, v10.4s, #13 4352ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v8.4s, ROW7R.4h, XFIX_0_298631336_MINUS_0_899976223 4362ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v4.4s, v10.4s, v12.4s 4372ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v2.4s, v10.4s, v12.4s 4382ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v12.4s, v4.4s, v14.4s 4392ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v4.4s, v4.4s, v14.4s 4402ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v10.4s, v2.4s, v8.4s 4412ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v6.4s, v2.4s, v8.4s 4422ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org rshrn ROW7R.4h, v4.4s, #11 4432ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org rshrn ROW3R.4h, v10.4s, #11 4442ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org rshrn ROW0R.4h, v12.4s, #11 4452ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org rshrn ROW4R.4h, v6.4s, #11 4462ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Transpose right 4x8 half */ 4472ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org transpose ROW6R, ROW7R, v3, .16b, .4h 4482ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org transpose ROW2R, ROW3R, v3, .16b, .4h 4492ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org transpose ROW0R, ROW1R, v3, .16b, .4h 4502ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org transpose ROW4R, ROW5R, v3, .16b, .4h 4512ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org transpose ROW1R, ROW3R, v3, .16b, .2s 4522ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org transpose ROW4R, ROW6R, v3, .16b, .2s 4532ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org transpose ROW0R, ROW2R, v3, .16b, .2s 4542ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org transpose ROW5R, ROW7R, v3, .16b, .2s 4552ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 4562ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org1: /* 1-D IDCT, pass 2 (normal variant), left 4x8 half */ 4572ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v2.4h}, [x15] /* reload constants */ 4582ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull v12.4S, ROW1R.4h, XFIX_1_175875602 /* ROW5L.4h <-> ROW1R.4h */ 4592ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v12.4s, ROW1L.4h, XFIX_1_175875602 4602ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v12.4s, ROW3R.4h, XFIX_1_175875602_MINUS_1_961570560 /* ROW7L.4h <-> ROW3R.4h */ 4612ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v12.4s, ROW3L.4h, XFIX_1_175875602_MINUS_1_961570560 4622ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull v14.4s, ROW3R.4h, XFIX_1_175875602 /* ROW7L.4h <-> ROW3R.4h */ 4632ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v14.4s, ROW3L.4h, XFIX_1_175875602 4642ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v14.4s, ROW1R.4h, XFIX_1_175875602_MINUS_0_390180644 /* ROW5L.4h <-> ROW1R.4h */ 4652ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v14.4s, ROW1L.4h, XFIX_1_175875602_MINUS_0_390180644 4662ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ssubl v6.4s, ROW0L.4h, ROW0R.4h /* ROW4L.4h <-> ROW0R.4h */ 4672ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull v4.4s, ROW2L.4h, XFIX_0_541196100 4682ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v4.4s, ROW2R.4h, XFIX_0_541196100_MINUS_1_847759065 /* ROW6L.4h <-> ROW2R.4h */ 4692ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mov v8.16b, v12.16b 4702ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlsl v12.4s, ROW1R.4h, XFIX_2_562915447 /* ROW5L.4h <-> ROW1R.4h */ 4712ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v12.4s, ROW3L.4h, XFIX_3_072711026_MINUS_2_562915447 4722ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org shl v6.4s, v6.4s, #13 4732ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlsl v8.4s, ROW1L.4h, XFIX_0_899976223 4742ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v2.4s, v6.4s, v4.4s 4752ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mov v10.16b, v14.16b 4762ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v2.4s, v2.4s, v12.4s 4772ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlsl v14.4s, ROW3R.4h, XFIX_0_899976223 /* ROW7L.4h <-> ROW3R.4h */ 4782ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v14.4s, ROW1L.4h, XFIX_1_501321110_MINUS_0_899976223 4792ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org shrn ROW1L.4h, v2.4s, #16 4802ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v2.4s, v2.4s, v12.4s 4812ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v10.4s, ROW1R.4h, XFIX_2_053119869_MINUS_2_562915447 /* ROW5L.4h <-> ROW1R.4h */ 4822ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlsl v10.4s, ROW3L.4h, XFIX_2_562915447 4832ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v2.4s, v2.4s, v12.4s 4842ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull v12.4s, ROW2L.4h, XFIX_0_541196100_PLUS_0_765366865 4852ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v12.4s, ROW2R.4h, XFIX_0_541196100 /* ROW6L.4h <-> ROW2R.4h */ 4862ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v6.4s, v6.4s, v4.4s 4872ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org shrn ROW2R.4h, v2.4s, #16 /* ROW6L.4h <-> ROW2R.4h */ 4882ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v2.4s, v6.4s, v10.4s 4892ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v6.4s, v6.4s, v10.4s 4902ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org saddl v10.4s, ROW0L.4h, ROW0R.4h /* ROW4L.4h <-> ROW0R.4h */ 4912ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org shrn ROW2L.4h, v2.4s, #16 4922ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org shrn ROW1R.4h, v6.4s, #16 /* ROW5L.4h <-> ROW1R.4h */ 4932ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org shl v10.4s, v10.4s, #13 4942ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v8.4s, ROW3R.4h, XFIX_0_298631336_MINUS_0_899976223 /* ROW7L.4h <-> ROW3R.4h */ 4952ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v4.4s, v10.4s, v12.4s 4962ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v2.4s, v10.4s, v12.4s 4972ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v12.4s, v4.4s, v14.4s 4982ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v4.4s, v4.4s, v14.4s 4992ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v10.4s, v2.4s, v8.4s 5002ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v6.4s, v2.4s, v8.4s 5012ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org shrn ROW3R.4h, v4.4s, #16 /* ROW7L.4h <-> ROW3R.4h */ 5022ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org shrn ROW3L.4h, v10.4s, #16 5032ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org shrn ROW0L.4h, v12.4s, #16 5042ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org shrn ROW0R.4h, v6.4s, #16 /* ROW4L.4h <-> ROW0R.4h */ 5052ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* 1-D IDCT, pass 2, right 4x8 half */ 5062ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v2.4h}, [x15] /* reload constants */ 5072ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull v12.4s, ROW5R.4h, XFIX_1_175875602 5082ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v12.4s, ROW5L.4h, XFIX_1_175875602 /* ROW5L.4h <-> ROW1R.4h */ 5092ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v12.4s, ROW7R.4h, XFIX_1_175875602_MINUS_1_961570560 5102ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v12.4s, ROW7L.4h, XFIX_1_175875602_MINUS_1_961570560 /* ROW7L.4h <-> ROW3R.4h */ 5112ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull v14.4s, ROW7R.4h, XFIX_1_175875602 5122ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v14.4s, ROW7L.4h, XFIX_1_175875602 /* ROW7L.4h <-> ROW3R.4h */ 5132ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v14.4s, ROW5R.4h, XFIX_1_175875602_MINUS_0_390180644 5142ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v14.4s, ROW5L.4h, XFIX_1_175875602_MINUS_0_390180644 /* ROW5L.4h <-> ROW1R.4h */ 5152ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ssubl v6.4s, ROW4L.4h, ROW4R.4h /* ROW4L.4h <-> ROW0R.4h */ 5162ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull v4.4s, ROW6L.4h, XFIX_0_541196100 /* ROW6L.4h <-> ROW2R.4h */ 5172ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v4.4s, ROW6R.4h, XFIX_0_541196100_MINUS_1_847759065 5182ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mov v8.16b, v12.16b 5192ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlsl v12.4s, ROW5R.4h, XFIX_2_562915447 5202ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v12.4s, ROW7L.4h, XFIX_3_072711026_MINUS_2_562915447 /* ROW7L.4h <-> ROW3R.4h */ 5212ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org shl v6.4s, v6.4s, #13 5222ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlsl v8.4s, ROW5L.4h, XFIX_0_899976223 /* ROW5L.4h <-> ROW1R.4h */ 5232ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v2.4s, v6.4s, v4.4s 5242ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mov v10.16b, v14.16b 5252ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v2.4s, v2.4s, v12.4s 5262ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlsl v14.4s, ROW7R.4h, XFIX_0_899976223 5272ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v14.4s, ROW5L.4h, XFIX_1_501321110_MINUS_0_899976223 /* ROW5L.4h <-> ROW1R.4h */ 5282ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org shrn ROW5L.4h, v2.4s, #16 /* ROW5L.4h <-> ROW1R.4h */ 5292ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v2.4s, v2.4s, v12.4s 5302ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v10.4s, ROW5R.4h, XFIX_2_053119869_MINUS_2_562915447 5312ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlsl v10.4s, ROW7L.4h, XFIX_2_562915447 /* ROW7L.4h <-> ROW3R.4h */ 5322ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v2.4s, v2.4s, v12.4s 5332ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull v12.4s, ROW6L.4h, XFIX_0_541196100_PLUS_0_765366865 /* ROW6L.4h <-> ROW2R.4h */ 5342ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v12.4s, ROW6R.4h, XFIX_0_541196100 5352ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v6.4s, v6.4s, v4.4s 5362ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org shrn ROW6R.4h, v2.4s, #16 5372ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v2.4s, v6.4s, v10.4s 5382ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v6.4s, v6.4s, v10.4s 5392ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org saddl v10.4s, ROW4L.4h, ROW4R.4h /* ROW4L.4h <-> ROW0R.4h */ 5402ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org shrn ROW6L.4h, v2.4s, #16 /* ROW6L.4h <-> ROW2R.4h */ 5412ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org shrn ROW5R.4h, v6.4s, #16 5422ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org shl v10.4s, v10.4s, #13 5432ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v8.4s, ROW7R.4h, XFIX_0_298631336_MINUS_0_899976223 5442ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v4.4s, v10.4s, v12.4s 5452ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v2.4s, v10.4s, v12.4s 5462ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v12.4s, v4.4s, v14.4s 5472ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v4.4s, v4.4s, v14.4s 5482ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v10.4s, v2.4s, v8.4s 5492ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v6.4s, v2.4s, v8.4s 5502ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org shrn ROW7R.4h, v4.4s, #16 5512ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org shrn ROW7L.4h, v10.4s, #16 /* ROW7L.4h <-> ROW3R.4h */ 5522ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org shrn ROW4L.4h, v12.4s, #16 /* ROW4L.4h <-> ROW0R.4h */ 5532ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org shrn ROW4R.4h, v6.4s, #16 5542ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 5552ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org2: /* Descale to 8-bit and range limit */ 5562ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v16.2d[1], v17.2d[0] 5572ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v18.2d[1], v19.2d[0] 5582ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v20.2d[1], v21.2d[0] 5592ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v22.2d[1], v23.2d[0] 5602ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sqrshrn v16.8b, v16.8h, #2 5612ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sqrshrn2 v16.16b, v18.8h, #2 5622ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sqrshrn v18.8b, v20.8h, #2 5632ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sqrshrn2 v18.16b, v22.8h, #2 5642ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 5652ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* vpop {v8.4h - d15.4h} */ /* restore NEON registers */ 5662ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v8.4h - v11.4h}, [sp], 32 5672ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v12.4h - v15.4h}, [sp], 32 5682ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v24.2d[1], v25.2d[0] 5692ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 5702ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sqrshrn v20.8b, v24.8h, #2 5712ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Transpose the final 8-bit samples and do signed->unsigned conversion */ 5722ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* trn1 v16.8h, v16.8h, v18.8h */ 5732ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org transpose v16, v18, v3, .16b, .8h 5742ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v26.2d[1], v27.2d[0] 5752ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v28.2d[1], v29.2d[0] 5762ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v30.2d[1], v31.2d[0] 5772ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sqrshrn2 v20.16b, v26.8h, #2 5782ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sqrshrn v22.8b, v28.8h, #2 5792ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org movi v0.16b, #(CENTERJSAMPLE) 5802ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sqrshrn2 v22.16b, v30.8h, #2 5812ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org transpose_single v16, v17, v3, .2d, .8b 5822ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org transpose_single v18, v19, v3, .2d, .8b 5832ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v16.8b, v16.8b, v0.8b 5842ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v17.8b, v17.8b, v0.8b 5852ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v18.8b, v18.8b, v0.8b 5862ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v19.8b, v19.8b, v0.8b 5872ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org transpose v20, v22, v3, .16b, .8h 5882ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Store results to the output buffer */ 5892ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ldp TMP1, TMP2, [OUTPUT_BUF], 16 5902ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add TMP1, TMP1, OUTPUT_COL 5912ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add TMP2, TMP2, OUTPUT_COL 5922ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v16.8b}, [TMP1] 5932ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org transpose_single v20, v21, v3, .2d, .8b 5942ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v17.8b}, [TMP2] 5952ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ldp TMP1, TMP2, [OUTPUT_BUF], 16 5962ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add TMP1, TMP1, OUTPUT_COL 5972ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add TMP2, TMP2, OUTPUT_COL 5982ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v18.8b}, [TMP1] 5992ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v20.8b, v20.8b, v0.8b 6002ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v21.8b, v21.8b, v0.8b 6012ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v19.8b}, [TMP2] 6022ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ldp TMP1, TMP2, [OUTPUT_BUF], 16 6032ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ldp TMP3, TMP4, [OUTPUT_BUF] 6042ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add TMP1, TMP1, OUTPUT_COL 6052ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add TMP2, TMP2, OUTPUT_COL 6062ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add TMP3, TMP3, OUTPUT_COL 6072ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add TMP4, TMP4, OUTPUT_COL 6082ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org transpose_single v22, v23, v3, .2d, .8b 6092ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v20.8b}, [TMP1] 6102ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v22.8b, v22.8b, v0.8b 6112ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v23.8b, v23.8b, v0.8b 6122ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v21.8b}, [TMP2] 6132ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v22.8b}, [TMP3] 6142ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v23.8b}, [TMP4] 6152ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ldr x15, [sp], 16 6162ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v0.8b - v3.8b}, [sp], 32 6172ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v4.8b - v7.8b}, [sp], 32 6182ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v8.8b - v11.8b}, [sp], 32 6192ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v12.8b - v15.8b}, [sp], 32 6202ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v16.8b - v19.8b}, [sp], 32 6212ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v20.8b - v23.8b}, [sp], 32 6222ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v24.8b - v27.8b}, [sp], 32 6232ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v28.8b - v31.8b}, [sp], 32 6242ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org blr x30 6252ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 6262ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org3: /* Left 4x8 half is done, right 4x8 half contains mostly zeros */ 6272ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 6282ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Transpose left 4x8 half */ 6292ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org transpose ROW6L, ROW7L, v3, .16b, .4h 6302ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org transpose ROW2L, ROW3L, v3, .16b, .4h 6312ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org transpose ROW0L, ROW1L, v3, .16b, .4h 6322ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org transpose ROW4L, ROW5L, v3, .16b, .4h 6332ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org shl ROW0R.4h, ROW0R.4h, #2 /* PASS1_BITS */ 6342ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org transpose ROW1L, ROW3L, v3, .16b, .2s 6352ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org transpose ROW4L, ROW6L, v3, .16b, .2s 6362ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org transpose ROW0L, ROW2L, v3, .16b, .2s 6372ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org transpose ROW5L, ROW7L, v3, .16b, .2s 6382ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org cmp x0, #0 6392ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org beq 4f /* Right 4x8 half has all zeros, go to 'sparse' second pass */ 6402ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 6412ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Only row 0 is non-zero for the right 4x8 half */ 6422ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org dup ROW1R.4h, ROW0R.4h[1] 6432ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org dup ROW2R.4h, ROW0R.4h[2] 6442ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org dup ROW3R.4h, ROW0R.4h[3] 6452ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org dup ROW4R.4h, ROW0R.4h[0] 6462ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org dup ROW5R.4h, ROW0R.4h[1] 6472ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org dup ROW6R.4h, ROW0R.4h[2] 6482ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org dup ROW7R.4h, ROW0R.4h[3] 6492ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org dup ROW0R.4h, ROW0R.4h[0] 6502ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org b 1b /* Go to 'normal' second pass */ 6512ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 6522ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org4: /* 1-D IDCT, pass 2 (sparse variant with zero rows 4-7), left 4x8 half */ 6532ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v2.4h}, [x15] /* reload constants */ 6542ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull v12.4s, ROW1L.4h, XFIX_1_175875602 6552ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v12.4s, ROW3L.4h, XFIX_1_175875602_MINUS_1_961570560 6562ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull v14.4s, ROW3L.4h, XFIX_1_175875602 6572ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v14.4s, ROW1L.4h, XFIX_1_175875602_MINUS_0_390180644 6582ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull v4.4s, ROW2L.4h, XFIX_0_541196100 6592ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sshll v6.4s, ROW0L.4h, #13 6602ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mov v8.16b, v12.16b 6612ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v12.4s, ROW3L.4h, XFIX_3_072711026_MINUS_2_562915447 6622ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlsl v8.4s, ROW1L.4h, XFIX_0_899976223 6632ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v2.4s, v6.4s, v4.4s 6642ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mov v10.16b, v14.16b 6652ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v14.4s, ROW1L.4h, XFIX_1_501321110_MINUS_0_899976223 6662ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v2.4s, v2.4s, v12.4s 6672ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v12.4s, v12.4s, v12.4s 6682ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlsl v10.4s, ROW3L.4h, XFIX_2_562915447 6692ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org shrn ROW1L.4h, v2.4s, #16 6702ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v2.4s, v2.4s, v12.4s 6712ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull v12.4s, ROW2L.4h, XFIX_0_541196100_PLUS_0_765366865 6722ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v6.4s, v6.4s, v4.4s 6732ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org shrn ROW2R.4h, v2.4s, #16 /* ROW6L.4h <-> ROW2R.4h */ 6742ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v2.4s, v6.4s, v10.4s 6752ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v6.4s, v6.4s, v10.4s 6762ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sshll v10.4s, ROW0L.4h, #13 6772ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org shrn ROW2L.4h, v2.4s, #16 6782ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org shrn ROW1R.4h, v6.4s, #16 /* ROW5L.4h <-> ROW1R.4h */ 6792ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v4.4s, v10.4s, v12.4s 6802ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v2.4s, v10.4s, v12.4s 6812ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v12.4s, v4.4s, v14.4s 6822ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v4.4s, v4.4s, v14.4s 6832ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v10.4s, v2.4s, v8.4s 6842ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v6.4s, v2.4s, v8.4s 6852ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org shrn ROW3R.4h, v4.4s, #16 /* ROW7L.4h <-> ROW3R.4h */ 6862ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org shrn ROW3L.4h, v10.4s, #16 6872ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org shrn ROW0L.4h, v12.4s, #16 6882ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org shrn ROW0R.4h, v6.4s, #16 /* ROW4L.4h <-> ROW0R.4h */ 6892ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* 1-D IDCT, pass 2 (sparse variant with zero rows 4-7), right 4x8 half */ 6902ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v2.4h}, [x15] /* reload constants */ 6912ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull v12.4s, ROW5L.4h, XFIX_1_175875602 6922ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v12.4s, ROW7L.4h, XFIX_1_175875602_MINUS_1_961570560 6932ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull v14.4s, ROW7L.4h, XFIX_1_175875602 6942ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v14.4s, ROW5L.4h, XFIX_1_175875602_MINUS_0_390180644 6952ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull v4.4s, ROW6L.4h, XFIX_0_541196100 6962ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sshll v6.4s, ROW4L.4h, #13 6972ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mov v8.16b, v12.16b 6982ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v12.4s, ROW7L.4h, XFIX_3_072711026_MINUS_2_562915447 6992ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlsl v8.4s, ROW5L.4h, XFIX_0_899976223 7002ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v2.4s, v6.4s, v4.4s 7012ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mov v10.16b, v14.16b 7022ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v14.4s, ROW5L.4h, XFIX_1_501321110_MINUS_0_899976223 7032ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v2.4s, v2.4s, v12.4s 7042ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v12.4s, v12.4s, v12.4s 7052ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlsl v10.4s, ROW7L.4h, XFIX_2_562915447 7062ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org shrn ROW5L.4h, v2.4s, #16 /* ROW5L.4h <-> ROW1R.4h */ 7072ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v2.4s, v2.4s, v12.4s 7082ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull v12.4s, ROW6L.4h, XFIX_0_541196100_PLUS_0_765366865 7092ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v6.4s, v6.4s, v4.4s 7102ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org shrn ROW6R.4h, v2.4s, #16 7112ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v2.4s, v6.4s, v10.4s 7122ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v6.4s, v6.4s, v10.4s 7132ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sshll v10.4s, ROW4L.4h, #13 7142ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org shrn ROW6L.4h, v2.4s, #16 /* ROW6L.4h <-> ROW2R.4h */ 7152ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org shrn ROW5R.4h, v6.4s, #16 7162ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v4.4s, v10.4s, v12.4s 7172ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v2.4s, v10.4s, v12.4s 7182ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v12.4s, v4.4s, v14.4s 7192ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v4.4s, v4.4s, v14.4s 7202ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v10.4s, v2.4s, v8.4s 7212ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v6.4s, v2.4s, v8.4s 7222ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org shrn ROW7R.4h, v4.4s, #16 7232ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org shrn ROW7L.4h, v10.4s, #16 /* ROW7L.4h <-> ROW3R.4h */ 7242ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org shrn ROW4L.4h, v12.4s, #16 /* ROW4L.4h <-> ROW0R.4h */ 7252ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org shrn ROW4R.4h, v6.4s, #16 7262ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org b 2b /* Go to epilogue */ 7272ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 7282ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq DCT_TABLE 7292ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq COEF_BLOCK 7302ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq OUTPUT_BUF 7312ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq OUTPUT_COL 7322ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq TMP1 7332ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq TMP2 7342ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq TMP3 7352ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq TMP4 7362ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 7372ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq ROW0L 7382ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq ROW0R 7392ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq ROW1L 7402ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq ROW1R 7412ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq ROW2L 7422ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq ROW2R 7432ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq ROW3L 7442ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq ROW3R 7452ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq ROW4L 7462ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq ROW4R 7472ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq ROW5L 7482ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq ROW5R 7492ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq ROW6L 7502ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq ROW6R 7512ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq ROW7L 7522ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq ROW7R 7532ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 7542ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 7552ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org/*****************************************************************************/ 7562ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 7572ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org/* 7582ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * jsimd_idct_ifast_neon 7592ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 7602ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * This function contains a fast, not so accurate integer implementation of 7612ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * the inverse DCT (Discrete Cosine Transform). It uses the same calculations 7622ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * and produces exactly the same output as IJG's original 'jpeg_idct_ifast' 7632ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * function from jidctfst.c 7642ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 7652ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * Normally 1-D AAN DCT needs 5 multiplications and 29 additions. 7662ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * But in ARM NEON case some extra additions are required because VQDMULH 7672ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * instruction can't handle the constants larger than 1. So the expressions 7682ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * like "x * 1.082392200" have to be converted to "x * 0.082392200 + x", 7692ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * which introduces an extra addition. Overall, there are 6 extra additions 7702ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * per 1-D IDCT pass, totalling to 5 VQDMULH and 35 VADD/VSUB instructions. 7712ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org */ 7722ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 7732ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define XFIX_1_082392200 v0.4h[0] 7742ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define XFIX_1_414213562 v0.4h[1] 7752ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define XFIX_1_847759065 v0.4h[2] 7762ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define XFIX_2_613125930 v0.4h[3] 7772ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 7782ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.balign 16 7792ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.orgjsimd_idct_ifast_neon_consts: 7802ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .short (277 * 128 - 256 * 128) /* XFIX_1_082392200 */ 7812ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .short (362 * 128 - 256 * 128) /* XFIX_1_414213562 */ 7822ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .short (473 * 128 - 256 * 128) /* XFIX_1_847759065 */ 7832ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .short (669 * 128 - 512 * 128) /* XFIX_2_613125930 */ 7842ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 7852ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.orgasm_function jsimd_idct_ifast_neon 7862ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 7872ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org DCT_TABLE .req x0 7882ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org COEF_BLOCK .req x1 7892ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org OUTPUT_BUF .req x2 7902ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org OUTPUT_COL .req x3 7912ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org TMP1 .req x0 7922ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org TMP2 .req x1 7932ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org TMP3 .req x2 7942ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org TMP4 .req x22 7952ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org TMP5 .req x23 7962ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 7972ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Load and dequantize coefficients into NEON registers 7982ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * with the following allocation: 7992ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 0 1 2 3 | 4 5 6 7 8002ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * ---------+-------- 8012ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 0 | d16 | d17 ( v8.8h ) 8022ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 1 | d18 | d19 ( v9.8h ) 8032ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 2 | d20 | d21 ( v10.8h ) 8042ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 3 | d22 | d23 ( v11.8h ) 8052ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 4 | d24 | d25 ( v12.8h ) 8062ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 5 | d26 | d27 ( v13.8h ) 8072ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 6 | d28 | d29 ( v14.8h ) 8082ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 7 | d30 | d31 ( v15.8h ) 8092ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org */ 8102ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Save NEON registers used in fast IDCT */ 8112ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub sp, sp, #176 8122ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org stp x22, x23, [sp], 16 8132ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org adr x23, jsimd_idct_ifast_neon_consts 8142ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v0.8b - v3.8b}, [sp], 32 8152ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v4.8b - v7.8b}, [sp], 32 8162ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v8.8b - v11.8b}, [sp], 32 8172ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v12.8b - v15.8b}, [sp], 32 8182ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v16.8b - v19.8b}, [sp], 32 8192ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v8.8h, v9.8h}, [COEF_BLOCK], 32 8202ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v0.8h, v1.8h}, [DCT_TABLE], 32 8212ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v10.8h, v11.8h}, [COEF_BLOCK], 32 8222ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v8.8h, v8.8h, v0.8h 8232ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v2.8h, v3.8h}, [DCT_TABLE], 32 8242ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v9.8h, v9.8h, v1.8h 8252ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v12.8h, v13.8h}, [COEF_BLOCK], 32 8262ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v10.8h, v10.8h, v2.8h 8272ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v0.8h, v1.8h}, [DCT_TABLE], 32 8282ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v11.8h, v11.8h, v3.8h 8292ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v14.8h, v15.8h}, [COEF_BLOCK], 32 8302ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v12.8h, v12.8h, v0.8h 8312ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v2.8h, v3.8h}, [DCT_TABLE], 32 8322ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v14.8h, v14.8h, v2.8h 8332ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v13.8h, v13.8h, v1.8h 8342ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v0.4h}, [x23] /* load constants */ 8352ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v15.8h, v15.8h, v3.8h 8362ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 8372ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* 1-D IDCT, pass 1 */ 8382ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v2.8h, v10.8h, v14.8h 8392ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v14.8h, v10.8h, v14.8h 8402ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v1.8h, v11.8h, v13.8h 8412ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v13.8h, v11.8h, v13.8h 8422ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v5.8h, v9.8h, v15.8h 8432ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v15.8h, v9.8h, v15.8h 8442ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sqdmulh v4.8h, v2.8h, XFIX_1_414213562 8452ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sqdmulh v6.8h, v1.8h, XFIX_2_613125930 8462ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v3.8h, v1.8h, v1.8h 8472ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v1.8h, v5.8h, v1.8h 8482ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v10.8h, v2.8h, v4.8h 8492ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sqdmulh v4.8h, v1.8h, XFIX_1_847759065 8502ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v2.8h, v15.8h, v13.8h 8512ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v3.8h, v3.8h, v6.8h 8522ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sqdmulh v6.8h, v2.8h, XFIX_1_414213562 8532ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v1.8h, v1.8h, v4.8h 8542ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sqdmulh v4.8h, v5.8h, XFIX_1_082392200 8552ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v10.8h, v10.8h, v14.8h 8562ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v2.8h, v2.8h, v6.8h 8572ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v6.8h, v8.8h, v12.8h 8582ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v12.8h, v8.8h, v12.8h 8592ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v9.8h, v5.8h, v4.8h 8602ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v5.8h, v6.8h, v10.8h 8612ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v10.8h, v6.8h, v10.8h 8622ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v6.8h, v15.8h, v13.8h 8632ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v8.8h, v12.8h, v14.8h 8642ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v3.8h, v6.8h, v3.8h 8652ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v12.8h, v12.8h, v14.8h 8662ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v3.8h, v3.8h, v1.8h 8672ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v1.8h, v9.8h, v1.8h 8682ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v2.8h, v3.8h, v2.8h 8692ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v15.8h, v8.8h, v6.8h 8702ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v1.8h, v1.8h, v2.8h 8712ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v8.8h, v8.8h, v6.8h 8722ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v14.8h, v5.8h, v3.8h 8732ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v9.8h, v5.8h, v3.8h 8742ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v13.8h, v10.8h, v2.8h 8752ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v10.8h, v10.8h, v2.8h 8762ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Transpose q8-q9 */ 8772ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mov v18.16b, v8.16b 8782ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org trn1 v8.8h, v8.8h, v9.8h 8792ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org trn2 v9.8h, v18.8h, v9.8h 8802ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v11.8h, v12.8h, v1.8h 8812ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Transpose q14-q15 */ 8822ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mov v18.16b, v14.16b 8832ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org trn1 v14.8h, v14.8h, v15.8h 8842ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org trn2 v15.8h, v18.8h, v15.8h 8852ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v12.8h, v12.8h, v1.8h 8862ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Transpose q10-q11 */ 8872ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mov v18.16b, v10.16b 8882ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org trn1 v10.8h, v10.8h, v11.8h 8892ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org trn2 v11.8h, v18.8h, v11.8h 8902ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Transpose q12-q13 */ 8912ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mov v18.16b, v12.16b 8922ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org trn1 v12.8h, v12.8h, v13.8h 8932ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org trn2 v13.8h, v18.8h, v13.8h 8942ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Transpose q9-q11 */ 8952ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mov v18.16b, v9.16b 8962ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org trn1 v9.4s, v9.4s, v11.4s 8972ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org trn2 v11.4s, v18.4s, v11.4s 8982ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Transpose q12-q14 */ 8992ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mov v18.16b, v12.16b 9002ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org trn1 v12.4s, v12.4s, v14.4s 9012ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org trn2 v14.4s, v18.4s, v14.4s 9022ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Transpose q8-q10 */ 9032ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mov v18.16b, v8.16b 9042ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org trn1 v8.4s, v8.4s, v10.4s 9052ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org trn2 v10.4s, v18.4s, v10.4s 9062ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Transpose q13-q15 */ 9072ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mov v18.16b, v13.16b 9082ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org trn1 v13.4s, v13.4s, v15.4s 9092ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org trn2 v15.4s, v18.4s, v15.4s 9102ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* vswp v14.4h, v10-MSB.4h */ 9112ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org umov x22, v14.d[0] 9122ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v14.2d[0], v10.2d[1] 9132ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v10.2d[1], x22 9142ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* vswp v13.4h, v9MSB.4h */ 9152ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 9162ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org umov x22, v13.d[0] 9172ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v13.2d[0], v9.2d[1] 9182ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v9.2d[1], x22 9192ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* 1-D IDCT, pass 2 */ 9202ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v2.8h, v10.8h, v14.8h 9212ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* vswp v15.4h, v11MSB.4h */ 9222ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org umov x22, v15.d[0] 9232ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v15.2d[0], v11.2d[1] 9242ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v11.2d[1], x22 9252ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v14.8h, v10.8h, v14.8h 9262ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* vswp v12.4h, v8-MSB.4h */ 9272ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org umov x22, v12.d[0] 9282ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v12.2d[0], v8.2d[1] 9292ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v8.2d[1], x22 9302ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v1.8h, v11.8h, v13.8h 9312ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v13.8h, v11.8h, v13.8h 9322ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v5.8h, v9.8h, v15.8h 9332ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v15.8h, v9.8h, v15.8h 9342ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sqdmulh v4.8h, v2.8h, XFIX_1_414213562 9352ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sqdmulh v6.8h, v1.8h, XFIX_2_613125930 9362ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v3.8h, v1.8h, v1.8h 9372ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v1.8h, v5.8h, v1.8h 9382ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v10.8h, v2.8h, v4.8h 9392ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sqdmulh v4.8h, v1.8h, XFIX_1_847759065 9402ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v2.8h, v15.8h, v13.8h 9412ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v3.8h, v3.8h, v6.8h 9422ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sqdmulh v6.8h, v2.8h, XFIX_1_414213562 9432ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v1.8h, v1.8h, v4.8h 9442ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sqdmulh v4.8h, v5.8h, XFIX_1_082392200 9452ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v10.8h, v10.8h, v14.8h 9462ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v2.8h, v2.8h, v6.8h 9472ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v6.8h, v8.8h, v12.8h 9482ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v12.8h, v8.8h, v12.8h 9492ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v9.8h, v5.8h, v4.8h 9502ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v5.8h, v6.8h, v10.8h 9512ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v10.8h, v6.8h, v10.8h 9522ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v6.8h, v15.8h, v13.8h 9532ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v8.8h, v12.8h, v14.8h 9542ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v3.8h, v6.8h, v3.8h 9552ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v12.8h, v12.8h, v14.8h 9562ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v3.8h, v3.8h, v1.8h 9572ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v1.8h, v9.8h, v1.8h 9582ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v2.8h, v3.8h, v2.8h 9592ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v15.8h, v8.8h, v6.8h 9602ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v1.8h, v1.8h, v2.8h 9612ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v8.8h, v8.8h, v6.8h 9622ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v14.8h, v5.8h, v3.8h 9632ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v9.8h, v5.8h, v3.8h 9642ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v13.8h, v10.8h, v2.8h 9652ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v10.8h, v10.8h, v2.8h 9662ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v11.8h, v12.8h, v1.8h 9672ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v12.8h, v12.8h, v1.8h 9682ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Descale to 8-bit and range limit */ 9692ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org movi v0.16b, #0x80 9702ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sqshrn v8.8b, v8.8h, #5 9712ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sqshrn2 v8.16b, v9.8h, #5 9722ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sqshrn v9.8b, v10.8h, #5 9732ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sqshrn2 v9.16b, v11.8h, #5 9742ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sqshrn v10.8b, v12.8h, #5 9752ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sqshrn2 v10.16b, v13.8h, #5 9762ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sqshrn v11.8b, v14.8h, #5 9772ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sqshrn2 v11.16b, v15.8h, #5 9782ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v8.16b, v8.16b, v0.16b 9792ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v9.16b, v9.16b, v0.16b 9802ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v10.16b, v10.16b, v0.16b 9812ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v11.16b, v11.16b, v0.16b 9822ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Transpose the final 8-bit samples */ 9832ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Transpose q8-q9 */ 9842ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mov v18.16b, v8.16b 9852ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org trn1 v8.8h, v8.8h, v9.8h 9862ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org trn2 v9.8h, v18.8h, v9.8h 9872ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Transpose q10-q11 */ 9882ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mov v18.16b, v10.16b 9892ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org trn1 v10.8h, v10.8h, v11.8h 9902ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org trn2 v11.8h, v18.8h, v11.8h 9912ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Transpose q8-q10 */ 9922ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mov v18.16b, v8.16b 9932ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org trn1 v8.4s, v8.4s, v10.4s 9942ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org trn2 v10.4s, v18.4s, v10.4s 9952ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Transpose q9-q11 */ 9962ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mov v18.16b, v9.16b 9972ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org trn1 v9.4s, v9.4s, v11.4s 9982ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org trn2 v11.4s, v18.4s, v11.4s 9992ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* make copy */ 10002ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v17.2d[0], v8.2d[1] 10012ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Transpose d16-d17-msb */ 10022ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mov v18.16b, v8.16b 10032ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org trn1 v8.8b, v8.8b, v17.8b 10042ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org trn2 v17.8b, v18.8b, v17.8b 10052ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* make copy */ 10062ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v19.2d[0], v9.2d[1] 10072ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mov v18.16b, v9.16b 10082ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org trn1 v9.8b, v9.8b, v19.8b 10092ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org trn2 v19.8b, v18.8b, v19.8b 10102ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Store results to the output buffer */ 10112ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ldp TMP1, TMP2, [OUTPUT_BUF], 16 10122ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add TMP1, TMP1, OUTPUT_COL 10132ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add TMP2, TMP2, OUTPUT_COL 10142ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v8.8b}, [TMP1] 10152ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v17.8b}, [TMP2] 10162ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ldp TMP1, TMP2, [OUTPUT_BUF], 16 10172ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add TMP1, TMP1, OUTPUT_COL 10182ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add TMP2, TMP2, OUTPUT_COL 10192ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v9.8b}, [TMP1] 10202ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* make copy */ 10212ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v7.2d[0], v10.2d[1] 10222ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mov v18.16b, v10.16b 10232ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org trn1 v10.8b, v10.8b, v7.8b 10242ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org trn2 v7.8b, v18.8b, v7.8b 10252ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v19.8b}, [TMP2] 10262ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ldp TMP1, TMP2, [OUTPUT_BUF], 16 10272ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ldp TMP4, TMP5, [OUTPUT_BUF], 16 10282ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add TMP1, TMP1, OUTPUT_COL 10292ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add TMP2, TMP2, OUTPUT_COL 10302ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add TMP4, TMP4, OUTPUT_COL 10312ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add TMP5, TMP5, OUTPUT_COL 10322ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v10.8b}, [TMP1] 10332ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* make copy */ 10342ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v16.2d[0], v11.2d[1] 10352ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mov v18.16b, v11.16b 10362ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org trn1 v11.8b, v11.8b, v16.8b 10372ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org trn2 v16.8b, v18.8b, v16.8b 10382ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v7.8b}, [TMP2] 10392ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v11.8b}, [TMP4] 10402ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v16.8b}, [TMP5] 10412ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub sp, sp, #176 10422ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ldp x22, x23, [sp], 16 10432ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v0.8b - v3.8b}, [sp], 32 10442ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v4.8b - v7.8b}, [sp], 32 10452ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v8.8b - v11.8b}, [sp], 32 10462ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v12.8b - v15.8b}, [sp], 32 10472ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v16.8b - v19.8b}, [sp], 32 10482ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org blr x30 10492ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 10502ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq DCT_TABLE 10512ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq COEF_BLOCK 10522ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq OUTPUT_BUF 10532ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq OUTPUT_COL 10542ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq TMP1 10552ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq TMP2 10562ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq TMP3 10572ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq TMP4 10582ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 10592ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 10602ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org/*****************************************************************************/ 10612ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 10622ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org/* 10632ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * jsimd_idct_4x4_neon 10642ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 10652ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * This function contains inverse-DCT code for getting reduced-size 10662ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 4x4 pixels output from an 8x8 DCT block. It uses the same calculations 10672ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * and produces exactly the same output as IJG's original 'jpeg_idct_4x4' 10682ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * function from jpeg-6b (jidctred.c). 10692ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 10702ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * NOTE: jpeg-8 has an improved implementation of 4x4 inverse-DCT, which 10712ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * requires much less arithmetic operations and hence should be faster. 10722ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * The primary purpose of this particular NEON optimized function is 10732ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * bit exact compatibility with jpeg-6b. 10742ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 10752ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * TODO: a bit better instructions scheduling can be achieved by expanding 10762ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * idct_helper/transpose_4x4 macros and reordering instructions, 10772ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * but readability will suffer somewhat. 10782ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org */ 10792ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 10802ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define CONST_BITS 13 10812ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 10822ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define FIX_0_211164243 (1730) /* FIX(0.211164243) */ 10832ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define FIX_0_509795579 (4176) /* FIX(0.509795579) */ 10842ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define FIX_0_601344887 (4926) /* FIX(0.601344887) */ 10852ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define FIX_0_720959822 (5906) /* FIX(0.720959822) */ 10862ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define FIX_0_765366865 (6270) /* FIX(0.765366865) */ 10872ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define FIX_0_850430095 (6967) /* FIX(0.850430095) */ 10882ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define FIX_0_899976223 (7373) /* FIX(0.899976223) */ 10892ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define FIX_1_061594337 (8697) /* FIX(1.061594337) */ 10902ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define FIX_1_272758580 (10426) /* FIX(1.272758580) */ 10912ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define FIX_1_451774981 (11893) /* FIX(1.451774981) */ 10922ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define FIX_1_847759065 (15137) /* FIX(1.847759065) */ 10932ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define FIX_2_172734803 (17799) /* FIX(2.172734803) */ 10942ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define FIX_2_562915447 (20995) /* FIX(2.562915447) */ 10952ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#define FIX_3_624509785 (29692) /* FIX(3.624509785) */ 10962ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 10972ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.balign 16 10982ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.orgjsimd_idct_4x4_neon_consts: 10992ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .short FIX_1_847759065 /* v0.4h[0] */ 11002ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .short -FIX_0_765366865 /* v0.4h[1] */ 11012ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .short -FIX_0_211164243 /* v0.4h[2] */ 11022ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .short FIX_1_451774981 /* v0.4h[3] */ 11032ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .short -FIX_2_172734803 /* d1[0] */ 11042ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .short FIX_1_061594337 /* d1[1] */ 11052ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .short -FIX_0_509795579 /* d1[2] */ 11062ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .short -FIX_0_601344887 /* d1[3] */ 11072ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .short FIX_0_899976223 /* v2.4h[0] */ 11082ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .short FIX_2_562915447 /* v2.4h[1] */ 11092ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .short 1 << (CONST_BITS+1) /* v2.4h[2] */ 11102ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .short 0 /* v2.4h[3] */ 11112ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 11122ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.macro idct_helper x4, x6, x8, x10, x12, x14, x16, shift, y26, y27, y28, y29 11132ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull v28.4s, \x4, v2.4h[2] 11142ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v28.4s, \x8, v0.4h[0] 11152ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v28.4s, \x14, v0.4h[1] 11162ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 11172ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull v26.4s, \x16, v1.4h[2] 11182ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v26.4s, \x12, v1.4h[3] 11192ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v26.4s, \x10, v2.4h[0] 11202ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v26.4s, \x6, v2.4h[1] 11212ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 11222ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull v30.4s, \x4, v2.4h[2] 11232ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlsl v30.4s, \x8, v0.4h[0] 11242ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlsl v30.4s, \x14, v0.4h[1] 11252ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 11262ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull v24.4s, \x16, v0.4h[2] 11272ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v24.4s, \x12, v0.4h[3] 11282ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v24.4s, \x10, v1.4h[0] 11292ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v24.4s, \x6, v1.4h[1] 11302ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 11312ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v20.4s, v28.4s, v26.4s 11322ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v28.4s, v28.4s, v26.4s 11332ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 11342ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.if \shift > 16 11352ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org srshr v20.4s, v20.4s, #\shift 11362ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org srshr v28.4s, v28.4s, #\shift 11372ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org xtn \y26, v20.4s 11382ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org xtn \y29, v28.4s 11392ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.else 11402ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org rshrn \y26, v20.4s, #\shift 11412ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org rshrn \y29, v28.4s, #\shift 11422ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.endif 11432ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 11442ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v20.4s, v30.4s, v24.4s 11452ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v30.4s, v30.4s, v24.4s 11462ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 11472ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.if \shift > 16 11482ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org srshr v20.4s, v20.4s, #\shift 11492ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org srshr v30.4s, v30.4s, #\shift 11502ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org xtn \y27, v20.4s 11512ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org xtn \y28, v30.4s 11522ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.else 11532ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org rshrn \y27, v20.4s, #\shift 11542ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org rshrn \y28, v30.4s, #\shift 11552ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.endif 11562ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 11572ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.endm 11582ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 11592ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.orgasm_function jsimd_idct_4x4_neon 11602ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 11612ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org DCT_TABLE .req x0 11622ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org COEF_BLOCK .req x1 11632ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org OUTPUT_BUF .req x2 11642ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org OUTPUT_COL .req x3 11652ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org TMP1 .req x0 11662ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org TMP2 .req x1 11672ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org TMP3 .req x2 11682ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org TMP4 .req x15 11692ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 11702ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Save all used NEON registers */ 11712ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub sp, sp, 272 11722ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org str x15, [sp], 16 11732ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Load constants (v3.4h is just used for padding) */ 11742ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org adr TMP4, jsimd_idct_4x4_neon_consts 11752ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v0.8b - v3.8b}, [sp], 32 11762ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v4.8b - v7.8b}, [sp], 32 11772ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v8.8b - v11.8b}, [sp], 32 11782ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v12.8b - v15.8b}, [sp], 32 11792ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v16.8b - v19.8b}, [sp], 32 11802ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v20.8b - v23.8b}, [sp], 32 11812ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v24.8b - v27.8b}, [sp], 32 11822ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v28.8b - v31.8b}, [sp], 32 11832ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v0.4h, v1.4h, v2.4h, v3.4h}, [TMP4] 11842ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 11852ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Load all COEF_BLOCK into NEON registers with the following allocation: 11862ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 0 1 2 3 | 4 5 6 7 11872ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * ---------+-------- 11882ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 0 | v4.4h | v5.4h 11892ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 1 | v6.4h | v7.4h 11902ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 2 | v8.4h | v9.4h 11912ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 3 | v10.4h | v11.4h 11922ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 4 | - | - 11932ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 5 | v12.4h | v13.4h 11942ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 6 | v14.4h | v15.4h 11952ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 7 | v16.4h | v17.4h 11962ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org */ 11972ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v4.4h, v5.4h, v6.4h, v7.4h}, [COEF_BLOCK], 32 11982ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v8.4h, v9.4h, v10.4h, v11.4h}, [COEF_BLOCK], 32 11992ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add COEF_BLOCK, COEF_BLOCK, #16 12002ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v12.4h, v13.4h, v14.4h, v15.4h}, [COEF_BLOCK], 32 12012ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v16.4h, v17.4h}, [COEF_BLOCK], 16 12022ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* dequantize */ 12032ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v18.4h, v19.4h, v20.4h, v21.4h}, [DCT_TABLE], 32 12042ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v4.4h, v4.4h, v18.4h 12052ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v5.4h, v5.4h, v19.4h 12062ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v4.2d[1], v5.2d[0] /* 128 bit q4 */ 12072ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v22.4h, v23.4h, v24.4h, v25.4h}, [DCT_TABLE], 32 12082ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v6.4h, v6.4h, v20.4h 12092ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v7.4h, v7.4h, v21.4h 12102ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v6.2d[1], v7.2d[0] /* 128 bit q6 */ 12112ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v8.4h, v8.4h, v22.4h 12122ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v9.4h, v9.4h, v23.4h 12132ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v8.2d[1], v9.2d[0] /* 128 bit q8 */ 12142ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add DCT_TABLE, DCT_TABLE, #16 12152ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v26.4h, v27.4h, v28.4h, v29.4h}, [DCT_TABLE], 32 12162ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v10.4h, v10.4h, v24.4h 12172ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v11.4h, v11.4h, v25.4h 12182ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v10.2d[1], v11.2d[0] /* 128 bit q10 */ 12192ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v12.4h, v12.4h, v26.4h 12202ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v13.4h, v13.4h, v27.4h 12212ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v12.2d[1], v13.2d[0] /* 128 bit q12 */ 12222ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v30.4h, v31.4h}, [DCT_TABLE], 16 12232ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v14.4h, v14.4h, v28.4h 12242ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v15.4h, v15.4h, v29.4h 12252ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v14.2d[1], v15.2d[0] /* 128 bit q14 */ 12262ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v16.4h, v16.4h, v30.4h 12272ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v17.4h, v17.4h, v31.4h 12282ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v16.2d[1], v17.2d[0] /* 128 bit q16 */ 12292ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 12302ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Pass 1 */ 12312ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org idct_helper v4.4h, v6.4h, v8.4h, v10.4h, v12.4h, v14.4h, v16.4h, 12, v4.4h, v6.4h, v8.4h, v10.4h 12322ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org transpose_4x4 v4, v6, v8, v10, v3 12332ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v10.2d[1], v11.2d[0] 12342ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org idct_helper v5.4h, v7.4h, v9.4h, v11.4h, v13.4h, v15.4h, v17.4h, 12, v5.4h, v7.4h, v9.4h, v11.4h 12352ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org transpose_4x4 v5, v7, v9, v11, v3 12362ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v10.2d[1], v11.2d[0] 12372ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Pass 2 */ 12382ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org idct_helper v4.4h, v6.4h, v8.4h, v10.4h, v7.4h, v9.4h, v11.4h, 19, v26.4h, v27.4h, v28.4h, v29.4h 12392ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org transpose_4x4 v26, v27, v28, v29, v3 12402ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 12412ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Range limit */ 12422ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org movi v30.8h, #0x80 12432ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v26.2d[1], v27.2d[0] 12442ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v28.2d[1], v29.2d[0] 12452ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v26.8h, v26.8h, v30.8h 12462ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v28.8h, v28.8h, v30.8h 12472ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sqxtun v26.8b, v26.8h 12482ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sqxtun v27.8b, v28.8h 12492ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 12502ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Store results to the output buffer */ 12512ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ldp TMP1, TMP2, [OUTPUT_BUF], 16 12522ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ldp TMP3, TMP4, [OUTPUT_BUF] 12532ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add TMP1, TMP1, OUTPUT_COL 12542ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add TMP2, TMP2, OUTPUT_COL 12552ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add TMP3, TMP3, OUTPUT_COL 12562ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add TMP4, TMP4, OUTPUT_COL 12572ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 12582ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#if defined(__ARMEL__) && !RESPECT_STRICT_ALIGNMENT 12592ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* We can use much less instructions on little endian systems if the 12602ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * OS kernel is not configured to trap unaligned memory accesses 12612ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org */ 12622ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v26.s}[0], [TMP1], 4 12632ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v27.s}[0], [TMP3], 4 12642ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v26.s}[1], [TMP2], 4 12652ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v27.s}[1], [TMP4], 4 12662ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#else 12672ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v26.b}[0], [TMP1], 1 12682ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v27.b}[0], [TMP3], 1 12692ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v26.b}[1], [TMP1], 1 12702ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v27.b}[1], [TMP3], 1 12712ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v26.b}[2], [TMP1], 1 12722ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v27.b}[2], [TMP3], 1 12732ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v26.b}[3], [TMP1], 1 12742ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v27.b}[3], [TMP3], 1 12752ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 12762ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v26.b}[4], [TMP2], 1 12772ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v27.b}[4], [TMP4], 1 12782ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v26.b}[5], [TMP2], 1 12792ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v27.b}[5], [TMP4], 1 12802ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v26.b}[6], [TMP2], 1 12812ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v27.b}[6], [TMP4], 1 12822ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v26.b}[7], [TMP2], 1 12832ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v27.b}[7], [TMP4], 1 12842ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#endif 12852ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 12862ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* vpop {v8.4h - v15.4h} ;not available */ 12872ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub sp, sp, #272 12882ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ldr x15, [sp], 16 12892ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v0.8b - v3.8b}, [sp], 32 12902ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v4.8b - v7.8b}, [sp], 32 12912ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v8.8b - v11.8b}, [sp], 32 12922ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v12.8b - v15.8b}, [sp], 32 12932ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v16.8b - v19.8b}, [sp], 32 12942ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v20.8b - v23.8b}, [sp], 32 12952ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v24.8b - v27.8b}, [sp], 32 12962ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v28.8b - v31.8b}, [sp], 32 12972ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org blr x30 12982ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 12992ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq DCT_TABLE 13002ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq COEF_BLOCK 13012ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq OUTPUT_BUF 13022ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq OUTPUT_COL 13032ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq TMP1 13042ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq TMP2 13052ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq TMP3 13062ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq TMP4 13072ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 13082ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.purgem idct_helper 13092ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 13102ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 13112ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org/*****************************************************************************/ 13122ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 13132ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org/* 13142ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * jsimd_idct_2x2_neon 13152ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 13162ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * This function contains inverse-DCT code for getting reduced-size 13172ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 2x2 pixels output from an 8x8 DCT block. It uses the same calculations 13182ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * and produces exactly the same output as IJG's original 'jpeg_idct_2x2' 13192ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * function from jpeg-6b (jidctred.c). 13202ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 13212ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * NOTE: jpeg-8 has an improved implementation of 2x2 inverse-DCT, which 13222ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * requires much less arithmetic operations and hence should be faster. 13232ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * The primary purpose of this particular NEON optimized function is 13242ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * bit exact compatibility with jpeg-6b. 13252ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org */ 13262ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 13272ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.balign 8 13282ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.orgjsimd_idct_2x2_neon_consts: 13292ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .short -FIX_0_720959822 /* v14[0] */ 13302ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .short FIX_0_850430095 /* v14[1] */ 13312ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .short -FIX_1_272758580 /* v14[2] */ 13322ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .short FIX_3_624509785 /* v14[3] */ 13332ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 13342ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.macro idct_helper x4, x6, x10, x12, x16, shift, y26, y27 13352ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sshll v15.4s, \x4, #15 13362ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull v26.4s, \x6, v14.4h[3] 13372ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v26.4s, \x10, v14.4h[2] 13382ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v26.4s, \x12, v14.4h[1] 13392ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v26.4s, \x16, v14.4h[0] 13402ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 13412ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v20.4s, v15.4s, v26.4s 13422ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v15.4s, v15.4s, v26.4s 13432ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 13442ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.if \shift > 16 13452ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org srshr v20.4s, v20.4s, #\shift 13462ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org srshr v15.4s, v15.4s, #\shift 13472ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org xtn \y26, v20.4s 13482ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org xtn \y27, v15.4s 13492ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.else 13502ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org rshrn \y26, v20.4s, #\shift 13512ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org rshrn \y27, v15.4s, #\shift 13522ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.endif 13532ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 13542ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.endm 13552ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 13562ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.orgasm_function jsimd_idct_2x2_neon 13572ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 13582ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org DCT_TABLE .req x0 13592ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org COEF_BLOCK .req x1 13602ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org OUTPUT_BUF .req x2 13612ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org OUTPUT_COL .req x3 13622ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org TMP1 .req x0 13632ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org TMP2 .req x15 13642ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 13652ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* vpush {v8.4h - v15.4h} ; not available */ 13662ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub sp, sp, 208 13672ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org str x15, [sp], 16 13682ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 13692ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Load constants */ 13702ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org adr TMP2, jsimd_idct_2x2_neon_consts 13712ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v4.8b - v7.8b}, [sp], 32 13722ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v8.8b - v11.8b}, [sp], 32 13732ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v12.8b - v15.8b}, [sp], 32 13742ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v16.8b - v19.8b}, [sp], 32 13752ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v21.8b - v22.8b}, [sp], 16 13762ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v24.8b - v27.8b}, [sp], 32 13772ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v30.8b - v31.8b}, [sp], 16 13782ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v14.4h}, [TMP2] 13792ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 13802ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Load all COEF_BLOCK into NEON registers with the following allocation: 13812ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 0 1 2 3 | 4 5 6 7 13822ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * ---------+-------- 13832ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 0 | v4.4h | v5.4h 13842ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 1 | v6.4h | v7.4h 13852ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 2 | - | - 13862ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 3 | v10.4h | v11.4h 13872ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 4 | - | - 13882ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 5 | v12.4h | v13.4h 13892ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 6 | - | - 13902ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 7 | v16.4h | v17.4h 13912ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org */ 13922ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v4.4h, v5.4h, v6.4h, v7.4h}, [COEF_BLOCK], 32 13932ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add COEF_BLOCK, COEF_BLOCK, #16 13942ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v10.4h, v11.4h}, [COEF_BLOCK], 16 13952ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add COEF_BLOCK, COEF_BLOCK, #16 13962ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v12.4h, v13.4h}, [COEF_BLOCK], 16 13972ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add COEF_BLOCK, COEF_BLOCK, #16 13982ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v16.4h, v17.4h}, [COEF_BLOCK], 16 13992ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Dequantize */ 14002ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v18.4h, v19.4h, v20.4h, v21.4h}, [DCT_TABLE], 32 14012ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v4.4h, v4.4h, v18.4h 14022ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v5.4h, v5.4h, v19.4h 14032ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v4.2d[1], v5.2d[0] 14042ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v6.4h, v6.4h, v20.4h 14052ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v7.4h, v7.4h, v21.4h 14062ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v6.2d[1], v7.2d[0] 14072ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add DCT_TABLE, DCT_TABLE, #16 14082ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v24.4h, v25.4h}, [DCT_TABLE], 16 14092ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v10.4h, v10.4h, v24.4h 14102ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v11.4h, v11.4h, v25.4h 14112ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v10.2d[1], v11.2d[0] 14122ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add DCT_TABLE, DCT_TABLE, #16 14132ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v26.4h, v27.4h}, [DCT_TABLE], 16 14142ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v12.4h, v12.4h, v26.4h 14152ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v13.4h, v13.4h, v27.4h 14162ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v12.2d[1], v13.2d[0] 14172ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add DCT_TABLE, DCT_TABLE, #16 14182ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v30.4h, v31.4h}, [DCT_TABLE], 16 14192ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v16.4h, v16.4h, v30.4h 14202ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mul v17.4h, v17.4h, v31.4h 14212ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v16.2d[1], v17.2d[0] 14222ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 14232ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Pass 1 */ 14242ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#if 0 14252ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org idct_helper v4.4h, v6.4h, v10.4h, v12.4h, v16.4h, 13, v4.4h, v6.4h 14262ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org transpose_4x4 v4.4h, v6.4h, v8.4h, v10.4h 14272ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org idct_helper v5.4h, v7.4h, v11.4h, v13.4h, v17.4h, 13, v5.4h, v7.4h 14282ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org transpose_4x4 v5.4h, v7.4h, v9.4h, v11.4h 14292ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#else 14302ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull v26.4s, v6.4h, v14.4h[3] 14312ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v26.4s, v10.4h, v14.4h[2] 14322ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v26.4s, v12.4h, v14.4h[1] 14332ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v26.4s, v16.4h, v14.4h[0] 14342ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull v24.4s, v7.4h, v14.4h[3] 14352ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v24.4s, v11.4h, v14.4h[2] 14362ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v24.4s, v13.4h, v14.4h[1] 14372ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v24.4s, v17.4h, v14.4h[0] 14382ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sshll v15.4s, v4.4h, #15 14392ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sshll v30.4s, v5.4h, #15 14402ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v20.4s, v15.4s, v26.4s 14412ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v15.4s, v15.4s, v26.4s 14422ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org rshrn v4.4h, v20.4s, #13 14432ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org rshrn v6.4h, v15.4s, #13 14442ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v20.4s, v30.4s, v24.4s 14452ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub v15.4s, v30.4s, v24.4s 14462ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org rshrn v5.4h, v20.4s, #13 14472ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org rshrn v7.4h, v15.4s, #13 14482ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v4.2d[1], v5.2d[0] 14492ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v6.2d[1], v7.2d[0] 14502ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org transpose v4, v6, v3, .16b, .8h 14512ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org transpose v6, v10, v3, .16b, .4s 14522ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v11.2d[0], v10.2d[1] 14532ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v7.2d[0], v6.2d[1] 14542ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org#endif 14552ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 14562ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Pass 2 */ 14572ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org idct_helper v4.4h, v6.4h, v10.4h, v7.4h, v11.4h, 20, v26.4h, v27.4h 14582ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 14592ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Range limit */ 14602ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org movi v30.8h, #0x80 14612ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v26.2d[1], v27.2d[0] 14622ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add v26.8h, v26.8h, v30.8h 14632ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sqxtun v30.8b, v26.8h 14642ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ins v26.2d[0], v30.2d[0] 14652ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sqxtun v27.8b, v26.8h 14662ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 14672ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Store results to the output buffer */ 14682ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ldp TMP1, TMP2, [OUTPUT_BUF] 14692ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add TMP1, TMP1, OUTPUT_COL 14702ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add TMP2, TMP2, OUTPUT_COL 14712ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 14722ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v26.b}[0], [TMP1], 1 14732ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v27.b}[4], [TMP1], 1 14742ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v26.b}[1], [TMP2], 1 14752ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v27.b}[5], [TMP2], 1 14762ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 14772ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub sp, sp, #208 14782ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ldr x15, [sp], 16 14792ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v4.8b - v7.8b}, [sp], 32 14802ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v8.8b - v11.8b}, [sp], 32 14812ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v12.8b - v15.8b}, [sp], 32 14822ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v16.8b - v19.8b}, [sp], 32 14832ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v21.8b - v22.8b}, [sp], 16 14842ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v24.8b - v27.8b}, [sp], 32 14852ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v30.8b - v31.8b}, [sp], 16 14862ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org blr x30 14872ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 14882ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq DCT_TABLE 14892ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq COEF_BLOCK 14902ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq OUTPUT_BUF 14912ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq OUTPUT_COL 14922ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq TMP1 14932ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq TMP2 14942ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 14952ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.purgem idct_helper 14962ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 14972ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 14982ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org/*****************************************************************************/ 14992ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 15002ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org/* 15012ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * jsimd_ycc_extrgb_convert_neon 15022ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * jsimd_ycc_extbgr_convert_neon 15032ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * jsimd_ycc_extrgbx_convert_neon 15042ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * jsimd_ycc_extbgrx_convert_neon 15052ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * jsimd_ycc_extxbgr_convert_neon 15062ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * jsimd_ycc_extxrgb_convert_neon 15072ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 15082ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * Colorspace conversion YCbCr -> RGB 15092ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org */ 15102ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 15112ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 15122ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.macro do_load size 15132ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .if \size == 8 15142ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v4.8b}, [U], 8 15152ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v5.8b}, [V], 8 15162ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v0.8b}, [Y], 8 15172ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org prfm PLDL1KEEP, [U, #64] 15182ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org prfm PLDL1KEEP, [V, #64] 15192ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org prfm PLDL1KEEP, [Y, #64] 15202ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .elseif \size == 4 15212ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v4.b}[0], [U], 1 15222ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v4.b}[1], [U], 1 15232ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v4.b}[2], [U], 1 15242ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v4.b}[3], [U], 1 15252ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v5.b}[0], [V], 1 15262ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v5.b}[1], [V], 1 15272ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v5.b}[2], [V], 1 15282ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v5.b}[3], [V], 1 15292ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v0.b}[0], [Y], 1 15302ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v0.b}[1], [Y], 1 15312ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v0.b}[2], [Y], 1 15322ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v0.b}[3], [Y], 1 15332ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .elseif \size == 2 15342ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v4.b}[4], [U], 1 15352ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v4.b}[5], [U], 1 15362ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v5.b}[4], [V], 1 15372ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v5.b}[5], [V], 1 15382ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v0.b}[4], [Y], 1 15392ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v0.b}[5], [Y], 1 15402ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .elseif \size == 1 15412ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v4.b}[6], [U], 1 15422ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v5.b}[6], [V], 1 15432ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v0.b}[6], [Y], 1 15442ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .else 15452ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .error unsupported macroblock size 15462ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .endif 15472ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.endm 15482ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 15492ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.macro do_store bpp, size 15502ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .if \bpp == 24 15512ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .if \size == 8 15522ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st3 {v10.8b, v11.8b, v12.8b}, [RGB], 24 15532ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .elseif \size == 4 15542ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st3 {v10.b, v11.b, v12.b}[0], [RGB], 3 15552ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st3 {v10.b, v11.b, v12.b}[1], [RGB], 3 15562ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st3 {v10.b, v11.b, v12.b}[2], [RGB], 3 15572ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st3 {v10.b, v11.b, v12.b}[3], [RGB], 3 15582ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .elseif \size == 2 15592ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st3 {v10.b, v11.b, v12.b}[4], [RGB], 3 15602ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st3 {v10.b, v11.b, v12.b}[5], [RGB], 3 15612ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .elseif \size == 1 15622ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st3 {v10.b, v11.b, v12.b}[6], [RGB], 3 15632ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .else 15642ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .error unsupported macroblock size 15652ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .endif 15662ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .elseif \bpp == 32 15672ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .if \size == 8 15682ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st4 {v10.8b, v11.8b, v12.8b, v13.8b}, [RGB], 32 15692ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .elseif \size == 4 15702ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st4 {v10.b, v11.b, v12.b, v13.b}[0], [RGB], 4 15712ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st4 {v10.b, v11.b, v12.b, v13.b}[1], [RGB], 4 15722ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st4 {v10.b, v11.b, v12.b, v13.b}[2], [RGB], 4 15732ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st4 {v10.b, v11.b, v12.b, v13.b}[3], [RGB], 4 15742ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .elseif \size == 2 15752ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st4 {v10.b, v11.b, v12.b, v13.b}[4], [RGB], 4 15762ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st4 {v10.b, v11.b, v12.b, v13.b}[5], [RGB], 4 15772ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .elseif \size == 1 15782ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st4 {v10.b, v11.b, v12.b, v13.b}[6], [RGB], 4 15792ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .else 15802ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .error unsupported macroblock size 15812ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .endif 15822ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .elseif \bpp==16 15832ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .if \size == 8 15842ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v25.8h}, [RGB],16 15852ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .elseif \size == 4 15862ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v25.4h}, [RGB],8 15872ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .elseif \size == 2 15882ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v25.h}[4], [RGB],2 15892ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v25.h}[5], [RGB],2 15902ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .elseif \size == 1 15912ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v25.h}[6], [RGB],2 15922ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .else 15932ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .error unsupported macroblock size 15942ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .endif 15952ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .else 15962ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .error unsupported bpp 15972ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .endif 15982ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.endm 15992ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 16002ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.macro generate_jsimd_ycc_rgb_convert_neon colorid, bpp, r_offs, rsize, g_offs, gsize, b_offs, bsize, defsize 16012ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 16022ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org/* 16032ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * 2-stage pipelined YCbCr->RGB conversion 16042ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org */ 16052ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 16062ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.macro do_yuv_to_rgb_stage1 16072ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org uaddw v6.8h, v2.8h, v4.8b /* q3 = u - 128 */ 16082ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org uaddw v8.8h, v2.8h, v5.8b /* q2 = v - 128 */ 16092ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull v20.4s, v6.4h, v1.4h[1] /* multiply by -11277 */ 16102ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v20.4s, v8.4h, v1.4h[2] /* multiply by -23401 */ 16112ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull2 v22.4s, v6.8h, v1.4h[1] /* multiply by -11277 */ 16122ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal2 v22.4s, v8.8h, v1.4h[2] /* multiply by -23401 */ 16132ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull v24.4s, v8.4h, v1.4h[0] /* multiply by 22971 */ 16142ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull2 v26.4s, v8.8h, v1.4h[0] /* multiply by 22971 */ 16152ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull v28.4s, v6.4h, v1.4h[3] /* multiply by 29033 */ 16162ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull2 v30.4s, v6.8h, v1.4h[3] /* multiply by 29033 */ 16172ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.endm 16182ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 16192ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.macro do_yuv_to_rgb_stage2 16202ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org rshrn v20.4h, v20.4s, #15 16212ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org rshrn2 v20.8h, v22.4s, #15 16222ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org rshrn v24.4h, v24.4s, #14 16232ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org rshrn2 v24.8h, v26.4s, #14 16242ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org rshrn v28.4h, v28.4s, #14 16252ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org rshrn2 v28.8h, v30.4s, #14 16262ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org uaddw v20.8h, v20.8h, v0.8b 16272ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org uaddw v24.8h, v24.8h, v0.8b 16282ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org uaddw v28.8h, v28.8h, v0.8b 16292ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.if \bpp != 16 16302ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sqxtun v1\g_offs\defsize, v20.8h 16312ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sqxtun v1\r_offs\defsize, v24.8h 16322ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sqxtun v1\b_offs\defsize, v28.8h 16332ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.else 16342ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sqshlu v21.8h, v20.8h, #8 16352ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sqshlu v25.8h, v24.8h, #8 16362ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sqshlu v29.8h, v28.8h, #8 16372ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sri v25.8h, v21.8h, #5 16382ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sri v25.8h, v29.8h, #11 16392ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.endif 16402ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 16412ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.endm 16422ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 16432ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.macro do_yuv_to_rgb_stage2_store_load_stage1 16442ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org rshrn v20.4h, v20.4s, #15 16452ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org rshrn v24.4h, v24.4s, #14 16462ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org rshrn v28.4h, v28.4s, #14 16472ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v4.8b}, [U], 8 16482ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org rshrn2 v20.8h, v22.4s, #15 16492ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org rshrn2 v24.8h, v26.4s, #14 16502ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org rshrn2 v28.8h, v30.4s, #14 16512ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v5.8b}, [V], 8 16522ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org uaddw v20.8h, v20.8h, v0.8b 16532ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org uaddw v24.8h, v24.8h, v0.8b 16542ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org uaddw v28.8h, v28.8h, v0.8b 16552ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.if \bpp != 16 /**************** rgb24/rgb32 *********************************/ 16562ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sqxtun v1\g_offs\defsize, v20.8h 16572ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v0.8b}, [Y], 8 16582ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sqxtun v1\r_offs\defsize, v24.8h 16592ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org prfm PLDL1KEEP, [U, #64] 16602ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org prfm PLDL1KEEP, [V, #64] 16612ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org prfm PLDL1KEEP, [Y, #64] 16622ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sqxtun v1\b_offs\defsize, v28.8h 16632ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org uaddw v6.8h, v2.8h, v4.8b /* v6.16b = u - 128 */ 16642ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org uaddw v8.8h, v2.8h, v5.8b /* q2 = v - 128 */ 16652ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull v20.4s, v6.4h, v1.4h[1] /* multiply by -11277 */ 16662ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v20.4s, v8.4h, v1.4h[2] /* multiply by -23401 */ 16672ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull2 v22.4s, v6.8h, v1.4h[1] /* multiply by -11277 */ 16682ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal2 v22.4s, v8.8h, v1.4h[2] /* multiply by -23401 */ 16692ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull v24.4s, v8.4h, v1.4h[0] /* multiply by 22971 */ 16702ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull2 v26.4s, v8.8h, v1.4h[0] /* multiply by 22971 */ 16712ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.else /**************************** rgb565 ***********************************/ 16722ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sqshlu v21.8h, v20.8h, #8 16732ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sqshlu v25.8h, v24.8h, #8 16742ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sqshlu v29.8h, v28.8h, #8 16752ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org uaddw v6.8h, v2.8h, v4.8b /* v6.16b = u - 128 */ 16762ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org uaddw v8.8h, v2.8h, v5.8b /* q2 = v - 128 */ 16772ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v0.8b}, [Y], 8 16782ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull v20.4s, v6.4h, v1.4h[1] /* multiply by -11277 */ 16792ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal v20.4s, v8.4h, v1.4h[2] /* multiply by -23401 */ 16802ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull2 v22.4s, v6.8h, v1.4h[1] /* multiply by -11277 */ 16812ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smlal2 v22.4s, v8.8h, v1.4h[2] /* multiply by -23401 */ 16822ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sri v25.8h, v21.8h, #5 16832ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull v24.4s, v8.4h, v1.4h[0] /* multiply by 22971 */ 16842ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull2 v26.4s, v8.8h, v1.4h[0] /* multiply by 22971 */ 16852ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org prfm PLDL1KEEP, [U, #64] 16862ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org prfm PLDL1KEEP, [V, #64] 16872ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org prfm PLDL1KEEP, [Y, #64] 16882ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sri v25.8h, v29.8h, #11 16892ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.endif 16902ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org do_store \bpp, 8 16912ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull v28.4s, v6.4h, v1.4h[3] /* multiply by 29033 */ 16922ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org smull2 v30.4s, v6.8h, v1.4h[3] /* multiply by 29033 */ 16932ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.endm 16942ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 16952ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.macro do_yuv_to_rgb 16962ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org do_yuv_to_rgb_stage1 16972ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org do_yuv_to_rgb_stage2 16982ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.endm 16992ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 17002ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org/* Apple gas crashes on adrl, work around that by using adr. 17012ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org * But this requires a copy of these constants for each function. 17022ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org */ 17032ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 17042ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.balign 16 17052ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.orgjsimd_ycc_\colorid\()_neon_consts: 17062ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .short 0, 0, 0, 0 17072ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .short 22971, -11277, -23401, 29033 17082ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .short -128, -128, -128, -128 17092ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .short -128, -128, -128, -128 17102ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 17112ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.orgasm_function jsimd_ycc_\colorid\()_convert_neon 17122ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org OUTPUT_WIDTH .req x0 17132ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org INPUT_BUF .req x1 17142ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org INPUT_ROW .req x2 17152ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org OUTPUT_BUF .req x3 17162ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org NUM_ROWS .req x4 17172ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 17182ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org INPUT_BUF0 .req x5 17192ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org INPUT_BUF1 .req x6 17202ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org INPUT_BUF2 .req INPUT_BUF 17212ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 17222ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org RGB .req x7 17232ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org Y .req x8 17242ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org U .req x9 17252ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org V .req x10 17262ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org N .req x15 17272ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 17282ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub sp, sp, 336 17292ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org str x15, [sp], 16 17302ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Load constants to d1, d2, d3 (v0.4h is just used for padding) */ 17312ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org adr x15, jsimd_ycc_\colorid\()_neon_consts 17322ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Save NEON registers */ 17332ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v0.8b - v3.8b}, [sp], 32 17342ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v4.8b - v7.8b}, [sp], 32 17352ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v8.8b - v11.8b}, [sp], 32 17362ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v12.8b - v15.8b}, [sp], 32 17372ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v16.8b - v19.8b}, [sp], 32 17382ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v20.8b - v23.8b}, [sp], 32 17392ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v24.8b - v27.8b}, [sp], 32 17402ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org st1 {v28.8b - v31.8b}, [sp], 32 17412ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v0.4h, v1.4h}, [x15], 16 17422ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v2.8h}, [x15] 17432ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 17442ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Save ARM registers and handle input arguments */ 17452ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* push {x4, x5, x6, x7, x8, x9, x10, x30} */ 17462ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org stp x4, x5, [sp], 16 17472ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org stp x6, x7, [sp], 16 17482ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org stp x8, x9, [sp], 16 17492ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org stp x10, x30, [sp], 16 17502ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ldr INPUT_BUF0, [INPUT_BUF] 17512ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ldr INPUT_BUF1, [INPUT_BUF, 8] 17522ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ldr INPUT_BUF2, [INPUT_BUF, 16] 17532ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq INPUT_BUF 17542ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 17552ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Initially set v10, v11.4h, v12.8b, d13 to 0xFF */ 17562ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org movi v10.16b, #255 17572ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org movi v13.16b, #255 17582ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 17592ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Outer loop over scanlines */ 17602ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org cmp NUM_ROWS, #1 17612ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org blt 9f 17622ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org0: 17632ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org lsl x16, INPUT_ROW, #3 17642ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ldr Y, [INPUT_BUF0, x16] 17652ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ldr U, [INPUT_BUF1, x16] 17662ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org mov N, OUTPUT_WIDTH 17672ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ldr V, [INPUT_BUF2, x16] 17682ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org add INPUT_ROW, INPUT_ROW, #1 17692ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ldr RGB, [OUTPUT_BUF], #8 17702ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 17712ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Inner loop over pixels */ 17722ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org subs N, N, #8 17732ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org blt 3f 17742ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org do_load 8 17752ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org do_yuv_to_rgb_stage1 17762ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org subs N, N, #8 17772ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org blt 2f 17782ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org1: 17792ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org do_yuv_to_rgb_stage2_store_load_stage1 17802ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org subs N, N, #8 17812ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org bge 1b 17822ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org2: 17832ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org do_yuv_to_rgb_stage2 17842ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org do_store \bpp, 8 17852ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org tst N, #7 17862ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org beq 8f 17872ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org3: 17882ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org tst N, #4 17892ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org beq 3f 17902ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org do_load 4 17912ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org3: 17922ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org tst N, #2 17932ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org beq 4f 17942ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org do_load 2 17952ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org4: 17962ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org tst N, #1 17972ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org beq 5f 17982ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org do_load 1 17992ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org5: 18002ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org do_yuv_to_rgb 18012ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org tst N, #4 18022ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org beq 6f 18032ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org do_store \bpp, 4 18042ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org6: 18052ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org tst N, #2 18062ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org beq 7f 18072ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org do_store \bpp, 2 18082ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org7: 18092ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org tst N, #1 18102ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org beq 8f 18112ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org do_store \bpp, 1 18122ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org8: 18132ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org subs NUM_ROWS, NUM_ROWS, #1 18142ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org bgt 0b 18152ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org9: 18162ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* Restore all registers and return */ 18172ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org sub sp, sp, #336 18182ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ldr x15, [sp], 16 18192ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v0.8b - v3.8b}, [sp], 32 18202ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v4.8b - v7.8b}, [sp], 32 18212ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v8.8b - v11.8b}, [sp], 32 18222ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v12.8b - v15.8b}, [sp], 32 18232ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v16.8b - v19.8b}, [sp], 32 18242ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v20.8b - v23.8b}, [sp], 32 18252ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v24.8b - v27.8b}, [sp], 32 18262ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ld1 {v28.8b - v31.8b}, [sp], 32 18272ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org /* pop {r4, r5, r6, r7, r8, r9, r10, pc} */ 18282ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ldp x4, x5, [sp], 16 18292ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ldp x6, x7, [sp], 16 18302ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ldp x8, x9, [sp], 16 18312ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org ldp x10, x30, [sp], 16 18322ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org br x30 18332ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq OUTPUT_WIDTH 18342ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq INPUT_ROW 18352ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq OUTPUT_BUF 18362ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq NUM_ROWS 18372ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq INPUT_BUF0 18382ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq INPUT_BUF1 18392ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq INPUT_BUF2 18402ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq RGB 18412ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq Y 18422ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq U 18432ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq V 18442ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org .unreq N 18452ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 18462ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.purgem do_yuv_to_rgb 18472ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.purgem do_yuv_to_rgb_stage1 18482ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.purgem do_yuv_to_rgb_stage2 18492ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.purgem do_yuv_to_rgb_stage2_store_load_stage1 18502ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.endm 18512ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org 18522ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org/*--------------------------------- id ----- bpp R rsize G gsize B bsize defsize */ 18532ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.orggenerate_jsimd_ycc_rgb_convert_neon extrgb, 24, 0, .4h, 1, .4h, 2, .4h, .8b 18542ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.orggenerate_jsimd_ycc_rgb_convert_neon extbgr, 24, 2, .4h, 1, .4h, 0, .4h, .8b 18552ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.orggenerate_jsimd_ycc_rgb_convert_neon extrgbx, 32, 0, .4h, 1, .4h, 2, .4h, .8b 18562ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.orggenerate_jsimd_ycc_rgb_convert_neon extbgrx, 32, 2, .4h, 1, .4h, 0, .4h, .8b 18572ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.orggenerate_jsimd_ycc_rgb_convert_neon extxbgr, 32, 3, .4h, 2, .4h, 1, .4h, .8b 18582ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.orggenerate_jsimd_ycc_rgb_convert_neon extxrgb, 32, 1, .4h, 2, .4h, 3, .4h, .8b 18592ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.orggenerate_jsimd_ycc_rgb_convert_neon rgb565, 16, 0, .4h, 0, .4h, 0, .4h, .8b 18602ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.purgem do_load 18612ed5319ce40b0ba2cd9b962713ea0ef775781e69rmcilroy@chromium.org.purgem do_store 1862