10c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 20c1bc742181ded4930842b46e9507372f0b1b963James Dong;// This confidential and proprietary software may be used only as 30c1bc742181ded4930842b46e9507372f0b1b963James Dong;// authorised by a licensing agreement from ARM Limited 40c1bc742181ded4930842b46e9507372f0b1b963James Dong;// (C) COPYRIGHT 2004 ARM Limited 50c1bc742181ded4930842b46e9507372f0b1b963James Dong;// ALL RIGHTS RESERVED 60c1bc742181ded4930842b46e9507372f0b1b963James Dong;// The entire notice above must be reproduced on all authorised 70c1bc742181ded4930842b46e9507372f0b1b963James Dong;// copies and copies may only be made to the extent permitted 80c1bc742181ded4930842b46e9507372f0b1b963James Dong;// by a licensing agreement from ARM Limited. 90c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 100c1bc742181ded4930842b46e9507372f0b1b963James Dong;// IDCT_s.s 110c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 120c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Inverse DCT module 130c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 140c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 150c1bc742181ded4930842b46e9507372f0b1b963James Dong;// ALGORITHM DESCRIPTION 160c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 170c1bc742181ded4930842b46e9507372f0b1b963James Dong;// The 8x8 2D IDCT is performed by calculating a 1D IDCT for each 180c1bc742181ded4930842b46e9507372f0b1b963James Dong;// column and then a 1D IDCT for each row. 190c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 200c1bc742181ded4930842b46e9507372f0b1b963James Dong;// The 8-point 1D IDCT is defined by 210c1bc742181ded4930842b46e9507372f0b1b963James Dong;// f(x) = (C(0)*T(0)*c(0,x) + ... + C(7)*T(7)*c(7,x))/2 220c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 230c1bc742181ded4930842b46e9507372f0b1b963James Dong;// C(u) = 1/sqrt(2) if u=0 or 1 if u!=0 240c1bc742181ded4930842b46e9507372f0b1b963James Dong;// c(u,x) = cos( (2x+1)*u*pi/16 ) 250c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 260c1bc742181ded4930842b46e9507372f0b1b963James Dong;// We compute the 8-point 1D IDCT using the reverse of 270c1bc742181ded4930842b46e9507372f0b1b963James Dong;// the Arai-Agui-Nakajima flow graph which we split into 280c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 5 stages named in reverse order to identify with the 290c1bc742181ded4930842b46e9507372f0b1b963James Dong;// forward DCT. Direct inversion of the forward formulae 300c1bc742181ded4930842b46e9507372f0b1b963James Dong;// in file FDCT_s.s gives: 310c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 320c1bc742181ded4930842b46e9507372f0b1b963James Dong;// IStage 5: j(u) = T(u)*A(u) [ A(u)=4*C(u)*c(u,0) ] 330c1bc742181ded4930842b46e9507372f0b1b963James Dong;// [ A(0) = 2*sqrt(2) 340c1bc742181ded4930842b46e9507372f0b1b963James Dong;// A(u) = 4*cos(u*pi/16) for (u!=0) ] 350c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 360c1bc742181ded4930842b46e9507372f0b1b963James Dong;// IStage 4: i0 = j0 i1 = j4 370c1bc742181ded4930842b46e9507372f0b1b963James Dong;// i3 = (j2+j6)/2 i2 = (j2-j6)/2 380c1bc742181ded4930842b46e9507372f0b1b963James Dong;// i7 = (j5+j3)/2 i4 = (j5-j3)/2 390c1bc742181ded4930842b46e9507372f0b1b963James Dong;// i5 = (j1+j7)/2 i6 = (j1-j7)/2 400c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 410c1bc742181ded4930842b46e9507372f0b1b963James Dong;// IStage 3: h0 = (i0+i1)/2 h1 = (i0-i1)/2 420c1bc742181ded4930842b46e9507372f0b1b963James Dong;// h2 = (i2*sqrt2)-i3 h3 = i3 430c1bc742181ded4930842b46e9507372f0b1b963James Dong;// h4 = cos(pi/8)*i4 + sin(pi/8)*i6 440c1bc742181ded4930842b46e9507372f0b1b963James Dong;// h6 = -sin(pi/8)*i4 + cos(pi/8)*i6 450c1bc742181ded4930842b46e9507372f0b1b963James Dong;// [ The above two lines rotate by -(pi/8) ] 460c1bc742181ded4930842b46e9507372f0b1b963James Dong;// h5 = (i5-i7)/sqrt2 h7 = (i5+i7)/2 470c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 480c1bc742181ded4930842b46e9507372f0b1b963James Dong;// IStage 2: g0 = (h0+h3)/2 g3 = (h0-h3)/2 490c1bc742181ded4930842b46e9507372f0b1b963James Dong;// g1 = (h1+h2)/2 g2 = (h1-h2)/2 500c1bc742181ded4930842b46e9507372f0b1b963James Dong;// g7 = h7 g6 = h6 - h7 510c1bc742181ded4930842b46e9507372f0b1b963James Dong;// g5 = h5 - g6 g4 = h4 - g5 520c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 530c1bc742181ded4930842b46e9507372f0b1b963James Dong;// IStage 1: f0 = (g0+g7)/2 f7 = (g0-g7)/2 540c1bc742181ded4930842b46e9507372f0b1b963James Dong;// f1 = (g1+g6)/2 f6 = (g1-g6)/2 550c1bc742181ded4930842b46e9507372f0b1b963James Dong;// f2 = (g2+g5)/2 f5 = (g2-g5)/2 560c1bc742181ded4930842b46e9507372f0b1b963James Dong;// f3 = (g3+g4)/2 f4 = (g3-g4)/2 570c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 580c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Note that most coefficients are halved 3 times during the 590c1bc742181ded4930842b46e9507372f0b1b963James Dong;// above calculation. We can rescale the algorithm dividing 600c1bc742181ded4930842b46e9507372f0b1b963James Dong;// the input by 8 to remove the halvings. 610c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 620c1bc742181ded4930842b46e9507372f0b1b963James Dong;// IStage 5: j(u) = T(u)*A(u)/8 630c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 640c1bc742181ded4930842b46e9507372f0b1b963James Dong;// IStage 4: i0 = j0 i1 = j4 650c1bc742181ded4930842b46e9507372f0b1b963James Dong;// i3 = j2 + j6 i2 = j2 - j6 660c1bc742181ded4930842b46e9507372f0b1b963James Dong;// i7 = j5 + j3 i4 = j5 - j3 670c1bc742181ded4930842b46e9507372f0b1b963James Dong;// i5 = j1 + j7 i6 = j1 - j7 680c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 690c1bc742181ded4930842b46e9507372f0b1b963James Dong;// IStage 3: h0 = i0 + i1 h1 = i0 - i1 700c1bc742181ded4930842b46e9507372f0b1b963James Dong;// h2 = (i2*sqrt2)-i3 h3 = i3 710c1bc742181ded4930842b46e9507372f0b1b963James Dong;// h4 = 2*( cos(pi/8)*i4 + sin(pi/8)*i6) 720c1bc742181ded4930842b46e9507372f0b1b963James Dong;// h6 = 2*(-sin(pi/8)*i4 + cos(pi/8)*i6) 730c1bc742181ded4930842b46e9507372f0b1b963James Dong;// h5 = (i5-i7)*sqrt2 h7 = i5 + i7 740c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 750c1bc742181ded4930842b46e9507372f0b1b963James Dong;// IStage 2: g0 = h0 + h3 g3 = h0 - h3 760c1bc742181ded4930842b46e9507372f0b1b963James Dong;// g1 = h1 + h2 g2 = h1 - h2 770c1bc742181ded4930842b46e9507372f0b1b963James Dong;// g7 = h7 g6 = h6 - h7 780c1bc742181ded4930842b46e9507372f0b1b963James Dong;// g5 = h5 - g6 g4 = h4 - g5 790c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 800c1bc742181ded4930842b46e9507372f0b1b963James Dong;// IStage 1: f0 = g0 + g7 f7 = g0 - g7 810c1bc742181ded4930842b46e9507372f0b1b963James Dong;// f1 = g1 + g6 f6 = g1 - g6 820c1bc742181ded4930842b46e9507372f0b1b963James Dong;// f2 = g2 + g5 f5 = g2 - g5 830c1bc742181ded4930842b46e9507372f0b1b963James Dong;// f3 = g3 + g4 f4 = g3 - g4 840c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 850c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Note: 860c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 1. The scaling by A(u)/8 can often be combined with inverse 870c1bc742181ded4930842b46e9507372f0b1b963James Dong;// quantization. The column and row scalings can be combined. 880c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 2. The flowgraph in the AAN paper has h4,g6 negated compared 890c1bc742181ded4930842b46e9507372f0b1b963James Dong;// to the above code but is otherwise identical. 900c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 3. The rotation by -pi/8 can be peformed using three multiplies 910c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Eg c*i4+s*i6 = (i6-i4)*s + (c+s)*i4 920c1bc742181ded4930842b46e9507372f0b1b963James Dong;// -s*i4+c*i6 = (i6-i4)*s + (c-s)*i6 930c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 4. If |T(u)|<=1 then from the IDCT definition, 940c1bc742181ded4930842b46e9507372f0b1b963James Dong;// |f(x)| <= ((1/sqrt2) + |c(1,x)| + .. + |c(7,x)|)/2 950c1bc742181ded4930842b46e9507372f0b1b963James Dong;// = ((1/sqrt2) + cos(pi/16) + ... + cos(7*pi/16))/2 960c1bc742181ded4930842b46e9507372f0b1b963James Dong;// = ((1/sqrt2) + (cot(pi/32)-1)/2)/2 970c1bc742181ded4930842b46e9507372f0b1b963James Dong;// = (1 + cos(pi/16) + cos(2pi/16) + cos(3pi/16))/sqrt(2) 980c1bc742181ded4930842b46e9507372f0b1b963James Dong;// = (approx)2.64 990c1bc742181ded4930842b46e9507372f0b1b963James Dong;// So the max gain of the 2D IDCT is ~x7.0 = 3 bits. 1000c1bc742181ded4930842b46e9507372f0b1b963James Dong;// The table below shows input patterns generating the maximum 1010c1bc742181ded4930842b46e9507372f0b1b963James Dong;// value of |f(u)| for input in the range |T(x)|<=1. M=-1, P=+1 1020c1bc742181ded4930842b46e9507372f0b1b963James Dong;// InputPattern Max |f(x)| 1030c1bc742181ded4930842b46e9507372f0b1b963James Dong;// PPPPPPPP |f0| = 2.64 1040c1bc742181ded4930842b46e9507372f0b1b963James Dong;// PPPMMMMM |f1| = 2.64 1050c1bc742181ded4930842b46e9507372f0b1b963James Dong;// PPMMMPPP |f2| = 2.64 1060c1bc742181ded4930842b46e9507372f0b1b963James Dong;// PPMMPPMM |f3| = 2.64 1070c1bc742181ded4930842b46e9507372f0b1b963James Dong;// PMMPPMMP |f4| = 2.64 1080c1bc742181ded4930842b46e9507372f0b1b963James Dong;// PMMPMMPM |f5| = 2.64 1090c1bc742181ded4930842b46e9507372f0b1b963James Dong;// PMPPMPMP |f6| = 2.64 1100c1bc742181ded4930842b46e9507372f0b1b963James Dong;// PMPMPMPM |f7| = 2.64 1110c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Note that this input pattern is the transpose of the 1120c1bc742181ded4930842b46e9507372f0b1b963James Dong;// corresponding max input patter for the FDCT. 1130c1bc742181ded4930842b46e9507372f0b1b963James Dong 1140c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Arguments 1150c1bc742181ded4930842b46e9507372f0b1b963James Dong 1160c1bc742181ded4930842b46e9507372f0b1b963James DongpSrc RN 0 ;// source data buffer 1170c1bc742181ded4930842b46e9507372f0b1b963James DongStride RN 1 ;// destination stride in bytes 1180c1bc742181ded4930842b46e9507372f0b1b963James DongpDest RN 2 ;// destination data buffer 1190c1bc742181ded4930842b46e9507372f0b1b963James DongpScale RN 3 ;// pointer to scaling table 1200c1bc742181ded4930842b46e9507372f0b1b963James Dong 1210c1bc742181ded4930842b46e9507372f0b1b963James Dong 1220c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// DCT Inverse Macro 1230c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// The DCT code should be parametrized according 1240c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// to the following inputs: 1250c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// $outsize = "u8" : 8-bit unsigned data saturated (0 to +255) 1260c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// "s9" : 16-bit signed data saturated to 9-bit (-256 to +255) 1270c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// "s16" : 16-bit signed data not saturated (max size ~+/-14273) 1280c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// $inscale = "s16" : signed 16-bit aan-scale table, Q15 format, with 4 byte alignment 1290c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// "s32" : signed 32-bit aan-scale table, Q23 format, with 4 byte alignment 1300c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 1310c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Inputs: 1320c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// pSrc = r0 = Pointer to input data 1330c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Range is -256 to +255 (9-bit) 1340c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Stride = r1 = Stride between input lines 1350c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// pDest = r2 = Pointer to output data 1360c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// pScale = r3 = Pointer to aan-scale table in the format defined by $inscale 1370c1bc742181ded4930842b46e9507372f0b1b963James Dong 1380c1bc742181ded4930842b46e9507372f0b1b963James Dong 1390c1bc742181ded4930842b46e9507372f0b1b963James Dong 1400c1bc742181ded4930842b46e9507372f0b1b963James Dong MACRO 1410c1bc742181ded4930842b46e9507372f0b1b963James Dong M_IDCT $outsize, $inscale, $stride 1420c1bc742181ded4930842b46e9507372f0b1b963James Dong LCLA SHIFT 1430c1bc742181ded4930842b46e9507372f0b1b963James Dong 1440c1bc742181ded4930842b46e9507372f0b1b963James Dong 1450c1bc742181ded4930842b46e9507372f0b1b963James Dong IF ARM1136JS 1460c1bc742181ded4930842b46e9507372f0b1b963James Dong 1470c1bc742181ded4930842b46e9507372f0b1b963James Dong;// REGISTER ALLOCATION 1480c1bc742181ded4930842b46e9507372f0b1b963James Dong;// This is hard since we have 8 values, 9 free registers and each 1490c1bc742181ded4930842b46e9507372f0b1b963James Dong;// butterfly requires a temporary register. We also want to 1500c1bc742181ded4930842b46e9507372f0b1b963James Dong;// maintain register order so we can use LDM/STM. The table below 1510c1bc742181ded4930842b46e9507372f0b1b963James Dong;// summarises the register allocation that meets all these criteria. 1520c1bc742181ded4930842b46e9507372f0b1b963James Dong;// a=1stcol, b=2ndcol, f,g,h,i are dataflow points described above. 1530c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 1540c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r1 a01 g0 h0 1550c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r4 b01 f0 g1 h1 i0 1560c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r5 a23 f1 g2 i1 1570c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r6 b23 f2 g3 h2 i2 1580c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r7 a45 f3 h3 i3 1590c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r8 b45 f4 g4 h4 i4 1600c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r9 a67 f5 g5 h5 i5 1610c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r10 b67 f6 g6 h6 i6 1620c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r11 f7 g7 h7 i7 1630c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 1640c1bc742181ded4930842b46e9507372f0b1b963James Dongra01 RN 1 1650c1bc742181ded4930842b46e9507372f0b1b963James Dongrb01 RN 4 1660c1bc742181ded4930842b46e9507372f0b1b963James Dongra23 RN 5 1670c1bc742181ded4930842b46e9507372f0b1b963James Dongrb23 RN 6 1680c1bc742181ded4930842b46e9507372f0b1b963James Dongra45 RN 7 1690c1bc742181ded4930842b46e9507372f0b1b963James Dongrb45 RN 8 1700c1bc742181ded4930842b46e9507372f0b1b963James Dongra67 RN 9 1710c1bc742181ded4930842b46e9507372f0b1b963James Dongrb67 RN 10 1720c1bc742181ded4930842b46e9507372f0b1b963James Dongrtmp RN 11 1730c1bc742181ded4930842b46e9507372f0b1b963James DongcsPiBy8 RN 12 ;// [ (Sin(pi/8)@Q15), (Cos(pi/8)@Q15) ] 1740c1bc742181ded4930842b46e9507372f0b1b963James DongLoopRR2 RN 14 ;// [ LoopNumber<<13 , (1/Sqrt(2))@Q15 ] 1750c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Transpose allocation 1760c1bc742181ded4930842b46e9507372f0b1b963James Dongxft RN ra01 1770c1bc742181ded4930842b46e9507372f0b1b963James Dongxf0 RN rb01 1780c1bc742181ded4930842b46e9507372f0b1b963James Dongxf1 RN ra23 1790c1bc742181ded4930842b46e9507372f0b1b963James Dongxf2 RN rb23 1800c1bc742181ded4930842b46e9507372f0b1b963James Dongxf3 RN ra45 1810c1bc742181ded4930842b46e9507372f0b1b963James Dongxf4 RN rb45 1820c1bc742181ded4930842b46e9507372f0b1b963James Dongxf5 RN ra67 1830c1bc742181ded4930842b46e9507372f0b1b963James Dongxf6 RN rb67 1840c1bc742181ded4930842b46e9507372f0b1b963James Dongxf7 RN rtmp 1850c1bc742181ded4930842b46e9507372f0b1b963James Dong;// IStage 1 allocation 1860c1bc742181ded4930842b46e9507372f0b1b963James Dongxg0 RN xft 1870c1bc742181ded4930842b46e9507372f0b1b963James Dongxg1 RN xf0 1880c1bc742181ded4930842b46e9507372f0b1b963James Dongxg2 RN xf1 1890c1bc742181ded4930842b46e9507372f0b1b963James Dongxg3 RN xf2 1900c1bc742181ded4930842b46e9507372f0b1b963James Dongxgt RN xf3 1910c1bc742181ded4930842b46e9507372f0b1b963James Dongxg4 RN xf4 1920c1bc742181ded4930842b46e9507372f0b1b963James Dongxg5 RN xf5 1930c1bc742181ded4930842b46e9507372f0b1b963James Dongxg6 RN xf6 1940c1bc742181ded4930842b46e9507372f0b1b963James Dongxg7 RN xf7 1950c1bc742181ded4930842b46e9507372f0b1b963James Dong;// IStage 2 allocation 1960c1bc742181ded4930842b46e9507372f0b1b963James Dongxh0 RN xg0 1970c1bc742181ded4930842b46e9507372f0b1b963James Dongxh1 RN xg1 1980c1bc742181ded4930842b46e9507372f0b1b963James Dongxht RN xg2 1990c1bc742181ded4930842b46e9507372f0b1b963James Dongxh2 RN xg3 2000c1bc742181ded4930842b46e9507372f0b1b963James Dongxh3 RN xgt 2010c1bc742181ded4930842b46e9507372f0b1b963James Dongxh4 RN xg4 2020c1bc742181ded4930842b46e9507372f0b1b963James Dongxh5 RN xg5 2030c1bc742181ded4930842b46e9507372f0b1b963James Dongxh6 RN xg6 2040c1bc742181ded4930842b46e9507372f0b1b963James Dongxh7 RN xg7 2050c1bc742181ded4930842b46e9507372f0b1b963James Dong;// IStage 3,4 allocation 2060c1bc742181ded4930842b46e9507372f0b1b963James Dongxit RN xh0 2070c1bc742181ded4930842b46e9507372f0b1b963James Dongxi0 RN xh1 2080c1bc742181ded4930842b46e9507372f0b1b963James Dongxi1 RN xht 2090c1bc742181ded4930842b46e9507372f0b1b963James Dongxi2 RN xh2 2100c1bc742181ded4930842b46e9507372f0b1b963James Dongxi3 RN xh3 2110c1bc742181ded4930842b46e9507372f0b1b963James Dongxi4 RN xh4 2120c1bc742181ded4930842b46e9507372f0b1b963James Dongxi5 RN xh5 2130c1bc742181ded4930842b46e9507372f0b1b963James Dongxi6 RN xh6 2140c1bc742181ded4930842b46e9507372f0b1b963James Dongxi7 RN xh7 2150c1bc742181ded4930842b46e9507372f0b1b963James Dong 2160c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR pDest, ppDest 2170c1bc742181ded4930842b46e9507372f0b1b963James Dong IF "$stride"="s" 2180c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR Stride, pStride 2190c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 2200c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pDest, pBlk 2210c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR csPiBy8, =0x30fc7642 2220c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR LoopRR2, =0x00005a82 2230c1bc742181ded4930842b46e9507372f0b1b963James Dong 2240c1bc742181ded4930842b46e9507372f0b1b963James Dongv6_idct_col$_F 2250c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Load even values 2260c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi4, [pSrc], #4 ;// j0 2270c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi5, [pSrc, #4*16-4] ;// j4 2280c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi6, [pSrc, #2*16-4] ;// j2 2290c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi7, [pSrc, #6*16-4] ;// j6 2300c1bc742181ded4930842b46e9507372f0b1b963James Dong 2310c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Scale Even Values 2320c1bc742181ded4930842b46e9507372f0b1b963James Dong IF "$inscale"="s16" ;// 16x16 mul 2330c1bc742181ded4930842b46e9507372f0b1b963James DongSHIFT SETA 12 2340c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi0, [pScale], #4 2350c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi1, [pScale, #4*16-4] 2360c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi2, [pScale, #2*16-4] 2370c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xit, #1<<(SHIFT-1) 2380c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB xi3, xi0, xi4, xit 2390c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATT xi4, xi0, xi4, xit 2400c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB xi0, xi1, xi5, xit 2410c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATT xi5, xi1, xi5, xit 2420c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xi3, xi3, ASR #SHIFT 2430c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT xi4, xi3, xi4, LSL #(16-SHIFT) 2440c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi3, [pScale, #6*16-4] 2450c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB xi1, xi2, xi6, xit 2460c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATT xi6, xi2, xi6, xit 2470c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xi0, xi0, ASR #SHIFT 2480c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT xi5, xi0, xi5, LSL #(16-SHIFT) 2490c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB xi2, xi3, xi7, xit 2500c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATT xi7, xi3, xi7, xit 2510c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xi1, xi1, ASR #SHIFT 2520c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT xi6, xi1, xi6, LSL #(16-SHIFT) 2530c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xi2, xi2, ASR #SHIFT 2540c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT xi7, xi2, xi7, LSL #(16-SHIFT) 2550c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 2560c1bc742181ded4930842b46e9507372f0b1b963James Dong IF "$inscale"="s32" ;// 32x16 mul 2570c1bc742181ded4930842b46e9507372f0b1b963James DongSHIFT SETA (12+8-16) 2580c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xit, #1<<(SHIFT-1) 2590c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi0, [pScale], #8 2600c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi1, [pScale, #0*32+4-8] 2610c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi2, [pScale, #4*32-8] 2620c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi3, [pScale, #4*32+4-8] 2630c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAWB xi0, xi0, xi4, xit 2640c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAWT xi1, xi1, xi4, xit 2650c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAWB xi2, xi2, xi5, xit 2660c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAWT xi3, xi3, xi5, xit 2670c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xi0, xi0, ASR #SHIFT 2680c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT xi4, xi0, xi1, LSL #(16-SHIFT) 2690c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xi2, xi2, ASR #SHIFT 2700c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT xi5, xi2, xi3, LSL #(16-SHIFT) 2710c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi0, [pScale, #2*32-8] 2720c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi1, [pScale, #2*32+4-8] 2730c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi2, [pScale, #6*32-8] 2740c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi3, [pScale, #6*32+4-8] 2750c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAWB xi0, xi0, xi6, xit 2760c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAWT xi1, xi1, xi6, xit 2770c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAWB xi2, xi2, xi7, xit 2780c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAWT xi3, xi3, xi7, xit 2790c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xi0, xi0, ASR #SHIFT 2800c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT xi6, xi0, xi1, LSL #(16-SHIFT) 2810c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xi2, xi2, ASR #SHIFT 2820c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT xi7, xi2, xi3, LSL #(16-SHIFT) 2830c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 2840c1bc742181ded4930842b46e9507372f0b1b963James Dong 2850c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Load odd values 2860c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi0, [pSrc, #1*16-4] ;// j1 2870c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi1, [pSrc, #7*16-4] ;// j7 2880c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi2, [pSrc, #5*16-4] ;// j5 2890c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi3, [pSrc, #3*16-4] ;// j3 2900c1bc742181ded4930842b46e9507372f0b1b963James Dong 2910c1bc742181ded4930842b46e9507372f0b1b963James Dong IF {TRUE} 2920c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// shortcut if odd values 0 2930c1bc742181ded4930842b46e9507372f0b1b963James Dong TEQ xi0, #0 2940c1bc742181ded4930842b46e9507372f0b1b963James Dong TEQEQ xi1, #0 2950c1bc742181ded4930842b46e9507372f0b1b963James Dong TEQEQ xi2, #0 2960c1bc742181ded4930842b46e9507372f0b1b963James Dong TEQEQ xi3, #0 2970c1bc742181ded4930842b46e9507372f0b1b963James Dong BEQ v6OddZero$_F 2980c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 2990c1bc742181ded4930842b46e9507372f0b1b963James Dong 3000c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Store scaled even values 3010c1bc742181ded4930842b46e9507372f0b1b963James Dong STMIA pDest, {xi4, xi5, xi6, xi7} 3020c1bc742181ded4930842b46e9507372f0b1b963James Dong 3030c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Scale odd values 3040c1bc742181ded4930842b46e9507372f0b1b963James Dong IF "$inscale"="s16" 3050c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Perform AAN Scale 3060c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi4, [pScale, #1*16-4] 3070c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi5, [pScale, #7*16-4] 3080c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi6, [pScale, #5*16-4] 3090c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB xi7, xi0, xi4, xit 3100c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATT xi0, xi0, xi4, xit 3110c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB xi4, xi1, xi5, xit 3120c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATT xi1, xi1, xi5, xit 3130c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xi7, xi7, ASR #SHIFT 3140c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT xi0, xi7, xi0, LSL #(16-SHIFT) 3150c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi7, [pScale, #3*16-4] 3160c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB xi5, xi2, xi6, xit 3170c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATT xi2, xi2, xi6, xit 3180c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xi4, xi4, ASR #SHIFT 3190c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT xi1, xi4, xi1, LSL #(16-SHIFT) 3200c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB xi6, xi3, xi7, xit 3210c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATT xi3, xi3, xi7, xit 3220c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xi5, xi5, ASR #SHIFT 3230c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT xi2, xi5, xi2, LSL #(16-SHIFT) 3240c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xi6, xi6, ASR #SHIFT 3250c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT xi3, xi6, xi3, LSL #(16-SHIFT) 3260c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 3270c1bc742181ded4930842b46e9507372f0b1b963James Dong IF "$inscale"="s32" ;// 32x16 mul 3280c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi4, [pScale, #1*32-8] 3290c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi5, [pScale, #1*32+4-8] 3300c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi6, [pScale, #7*32-8] 3310c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi7, [pScale, #7*32+4-8] 3320c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAWB xi4, xi4, xi0, xit 3330c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAWT xi5, xi5, xi0, xit 3340c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAWB xi6, xi6, xi1, xit 3350c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAWT xi7, xi7, xi1, xit 3360c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xi4, xi4, ASR #SHIFT 3370c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT xi0, xi4, xi5, LSL #(16-SHIFT) 3380c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xi6, xi6, ASR #SHIFT 3390c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT xi1, xi6, xi7, LSL #(16-SHIFT) 3400c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi4, [pScale, #5*32-8] 3410c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi5, [pScale, #5*32+4-8] 3420c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi6, [pScale, #3*32-8] 3430c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi7, [pScale, #3*32+4-8] 3440c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAWB xi4, xi4, xi2, xit 3450c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAWT xi5, xi5, xi2, xit 3460c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAWB xi6, xi6, xi3, xit 3470c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAWT xi7, xi7, xi3, xit 3480c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xi4, xi4, ASR #SHIFT 3490c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT xi2, xi4, xi5, LSL #(16-SHIFT) 3500c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xi6, xi6, ASR #SHIFT 3510c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT xi3, xi6, xi7, LSL #(16-SHIFT) 3520c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 3530c1bc742181ded4930842b46e9507372f0b1b963James Dong 3540c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xit, =0x00010001 ;// rounding constant 3550c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 xi5, xi0, xi1 ;// (j1+j7)/2 3560c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xi5, xi5, xit 3570c1bc742181ded4930842b46e9507372f0b1b963James Dong 3580c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 xi6, xi0, xi1 ;// j1-j7 3590c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 xi7, xi2, xi3 ;// (j5+j3)/2 3600c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xi7, xi7, xit 3610c1bc742181ded4930842b46e9507372f0b1b963James Dong 3620c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 xi4, xi2, xi3 ;// j5-j3 3630c1bc742181ded4930842b46e9507372f0b1b963James Dong 3640c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 xi3, xi5, xi7 ;// (i5-i7)/2 3650c1bc742181ded4930842b46e9507372f0b1b963James Dong 3660c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT xi0, xi6, xi4, LSL#16 ;// [i4,i6] row a 3670c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB xi1, xi4, xi6, ASR#16 ;// [i4,i6] row b 3680c1bc742181ded4930842b46e9507372f0b1b963James Dong 3690c1bc742181ded4930842b46e9507372f0b1b963James Dong SMUADX xi2, xi0, csPiBy8 ;// rowa by [c,s] 3700c1bc742181ded4930842b46e9507372f0b1b963James Dong SMUADX xi4, xi1, csPiBy8 ;// rowb by [c,s] 3710c1bc742181ded4930842b46e9507372f0b1b963James Dong SMUSD xi0, xi0, csPiBy8 ;// rowa by [-s,c] 3720c1bc742181ded4930842b46e9507372f0b1b963James Dong SMUSD xi6, xi1, csPiBy8 ;// rowb by [-s,c] 3730c1bc742181ded4930842b46e9507372f0b1b963James Dong 3740c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULBB xi1, xi3, LoopRR2 3750c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULTB xi3, xi3, LoopRR2 3760c1bc742181ded4930842b46e9507372f0b1b963James Dong 3770c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB xh4, xi4, xi2, ASR#16 ;// h4/4 3780c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB xh6, xi6, xi0, ASR#16 ;// h6/4 3790c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xh7, xi5, xi7 ;// (i5+i7)/4 3800c1bc742181ded4930842b46e9507372f0b1b963James Dong 3810c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// xi0,xi1,xi2,xi3 now free 3820c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// IStage 4,3, rows 2to3 x1/2 3830c1bc742181ded4930842b46e9507372f0b1b963James Dong 3840c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xi3, xi3, LSL #1 3850c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB xh5, xi3, xi1, ASR#15 ;// h5/4 3860c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRD xi0, [pDest, #8] ;// j2,j6 scaled 3870c1bc742181ded4930842b46e9507372f0b1b963James Dong 3880c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// IStage 2, rows4to7 3890c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 xg6, xh6, xh7 3900c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 xg5, xh5, xg6 3910c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 xg4, xh4, xg5 3920c1bc742181ded4930842b46e9507372f0b1b963James Dong 3930c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 xi2, xi0, xi1 ;// (j2-j6) 3940c1bc742181ded4930842b46e9507372f0b1b963James Dong 3950c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xi3, xi0, xi1 ;// (j2+j6)/2 3960c1bc742181ded4930842b46e9507372f0b1b963James Dong 3970c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULBB xi0, xi2, LoopRR2 3980c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULTB xi2, xi2, LoopRR2 3990c1bc742181ded4930842b46e9507372f0b1b963James Dong 4000c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xi2, xi2, LSL #1 4010c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB xh2, xi2, xi0, ASR#15 ;// i2*sqrt(2)/4 4020c1bc742181ded4930842b46e9507372f0b1b963James Dong 4030c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// xi0, xi1 now free 4040c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// IStage 4,3 rows 0to1 x 1/2 4050c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRD xi0, [pDest] ;// j0, j4 scaled 4060c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 xh2, xh2, xi3 4070c1bc742181ded4930842b46e9507372f0b1b963James Dong ADDS LoopRR2, LoopRR2, #2<<29 ;// done two rows 4080c1bc742181ded4930842b46e9507372f0b1b963James Dong 4090c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xh0, xi0, xi1 4100c1bc742181ded4930842b46e9507372f0b1b963James Dong SHSUB16 xh1, xi0, xi1 4110c1bc742181ded4930842b46e9507372f0b1b963James Dong 4120c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// IStage 2 rows 0to3 x 1/2 4130c1bc742181ded4930842b46e9507372f0b1b963James Dong SHSUB16 xg2, xh1, xh2 4140c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xg1, xh1, xh2 4150c1bc742181ded4930842b46e9507372f0b1b963James Dong SHSUB16 xg3, xh0, xh3 4160c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xg0, xh0, xh3 4170c1bc742181ded4930842b46e9507372f0b1b963James Dong 4180c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// IStage 1 all rows 4190c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 xf3, xg3, xg4 4200c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 xf4, xg3, xg4 4210c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 xf2, xg2, xg5 4220c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 xf5, xg2, xg5 4230c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 xf1, xg1, xg6 4240c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 xf6, xg1, xg6 4250c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 xf0, xg0, xg7 4260c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 xf7, xg0, xg7 4270c1bc742181ded4930842b46e9507372f0b1b963James Dong 4280c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Transpose, store and loop 4290c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT ra01, xf0, xf1, LSL #16 4300c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB rb01, xf1, xf0, ASR #16 4310c1bc742181ded4930842b46e9507372f0b1b963James Dong 4320c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT ra23, xf2, xf3, LSL #16 4330c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB rb23, xf3, xf2, ASR #16 4340c1bc742181ded4930842b46e9507372f0b1b963James Dong 4350c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT ra45, xf4, xf5, LSL #16 4360c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB rb45, xf5, xf4, ASR #16 4370c1bc742181ded4930842b46e9507372f0b1b963James Dong 4380c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT ra67, xf6, xf7, LSL #16 4390c1bc742181ded4930842b46e9507372f0b1b963James Dong STMIA pDest!, {ra01, ra23, ra45, ra67} 4400c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB rb67, xf7, xf6, ASR #16 4410c1bc742181ded4930842b46e9507372f0b1b963James Dong STMIA pDest!, {rb01, rb23, rb45, rb67} 4420c1bc742181ded4930842b46e9507372f0b1b963James Dong BCC v6_idct_col$_F 4430c1bc742181ded4930842b46e9507372f0b1b963James Dong 4440c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pDest, #(64*2) 4450c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR pDest, ppDest 4460c1bc742181ded4930842b46e9507372f0b1b963James Dong IF "$stride"="s" 4470c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR pScale, pStride 4480c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 4490c1bc742181ded4930842b46e9507372f0b1b963James Dong B v6_idct_row$_F 4500c1bc742181ded4930842b46e9507372f0b1b963James Dong 4510c1bc742181ded4930842b46e9507372f0b1b963James Dongv6OddZero$_F 4520c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 xi2, xi6, xi7 ;// (j2-j6) 4530c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xi3, xi6, xi7 ;// (j2+j6)/2 4540c1bc742181ded4930842b46e9507372f0b1b963James Dong 4550c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULBB xi0, xi2, LoopRR2 4560c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULTB xi2, xi2, LoopRR2 4570c1bc742181ded4930842b46e9507372f0b1b963James Dong 4580c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xi2, xi2, LSL #1 4590c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB xh2, xi2, xi0, ASR#15 ;// i2*sqrt(2)/4 4600c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 xh2, xh2, xi3 4610c1bc742181ded4930842b46e9507372f0b1b963James Dong 4620c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// xi0, xi1 now free 4630c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// IStage 4,3 rows 0to1 x 1/2 4640c1bc742181ded4930842b46e9507372f0b1b963James Dong 4650c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xh0, xi4, xi5 4660c1bc742181ded4930842b46e9507372f0b1b963James Dong SHSUB16 xh1, xi4, xi5 4670c1bc742181ded4930842b46e9507372f0b1b963James Dong 4680c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// IStage 2 rows 0to3 x 1/2 4690c1bc742181ded4930842b46e9507372f0b1b963James Dong SHSUB16 xg2, xh1, xh2 4700c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xg1, xh1, xh2 4710c1bc742181ded4930842b46e9507372f0b1b963James Dong SHSUB16 xg3, xh0, xh3 4720c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xg0, xh0, xh3 4730c1bc742181ded4930842b46e9507372f0b1b963James Dong 4740c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// IStage 1 all rows 4750c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xf3, xg3 4760c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xf4, xg3 4770c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xf2, xg2 4780c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xf5, xg2 4790c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xf1, xg1 4800c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xf6, xg1 4810c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xf0, xg0 4820c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xf7, xg0 4830c1bc742181ded4930842b46e9507372f0b1b963James Dong 4840c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Transpose 4850c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT ra01, xf0, xf1, LSL #16 4860c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB rb01, xf1, xf0, ASR #16 4870c1bc742181ded4930842b46e9507372f0b1b963James Dong 4880c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT ra23, xf2, xf3, LSL #16 4890c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB rb23, xf3, xf2, ASR #16 4900c1bc742181ded4930842b46e9507372f0b1b963James Dong 4910c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT ra45, xf4, xf5, LSL #16 4920c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB rb45, xf5, xf4, ASR #16 4930c1bc742181ded4930842b46e9507372f0b1b963James Dong 4940c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT ra67, xf6, xf7, LSL #16 4950c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB rb67, xf7, xf6, ASR #16 4960c1bc742181ded4930842b46e9507372f0b1b963James Dong 4970c1bc742181ded4930842b46e9507372f0b1b963James Dong STMIA pDest!, {ra01, ra23, ra45, ra67} 4980c1bc742181ded4930842b46e9507372f0b1b963James Dong ADDS LoopRR2, LoopRR2, #2<<29 ;// done two rows 4990c1bc742181ded4930842b46e9507372f0b1b963James Dong STMIA pDest!, {rb01, rb23, rb45, rb67} 5000c1bc742181ded4930842b46e9507372f0b1b963James Dong 5010c1bc742181ded4930842b46e9507372f0b1b963James Dong BCC v6_idct_col$_F 5020c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pDest, #(64*2) 5030c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR pDest, ppDest 5040c1bc742181ded4930842b46e9507372f0b1b963James Dong IF "$stride"="s" 5050c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR pScale, pStride 5060c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 5070c1bc742181ded4930842b46e9507372f0b1b963James Dong 5080c1bc742181ded4930842b46e9507372f0b1b963James Dong 5090c1bc742181ded4930842b46e9507372f0b1b963James Dongv6_idct_row$_F 5100c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// IStage 4,3, rows4to7 x1/4 5110c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xit, =0x00010001 ;// rounding constant 5120c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi0, [pSrc, #1*16] ;// j1 5130c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi1, [pSrc, #7*16] ;// 4*j7 5140c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi2, [pSrc, #5*16] ;// j5 5150c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi3, [pSrc, #3*16] ;// j3 5160c1bc742181ded4930842b46e9507372f0b1b963James Dong 5170c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xi1, xi1, xit ;// 2*j7 5180c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xi1, xi1, xit ;// j7 5190c1bc742181ded4930842b46e9507372f0b1b963James Dong 5200c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xi5, xi0, xi1 ;// (j1+j7)/2 5210c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 xi6, xi0, xi1 ;// j1-j7 5220c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xi7, xi2, xi3 ;// (j5+j3)/2 5230c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 xi4, xi2, xi3 ;// j5-j3 5240c1bc742181ded4930842b46e9507372f0b1b963James Dong 5250c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 xi3, xi5, xi7 ;// (i5-i7)/2 5260c1bc742181ded4930842b46e9507372f0b1b963James Dong 5270c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT xi0, xi6, xi4, LSL#16 ;// [i4,i6] row a 5280c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB xi1, xi4, xi6, ASR#16 ;// [i4,i6] row b 5290c1bc742181ded4930842b46e9507372f0b1b963James Dong 5300c1bc742181ded4930842b46e9507372f0b1b963James Dong SMUADX xi2, xi0, csPiBy8 ;// rowa by [c,s] 5310c1bc742181ded4930842b46e9507372f0b1b963James Dong SMUADX xi4, xi1, csPiBy8 ;// rowb by [c,s] 5320c1bc742181ded4930842b46e9507372f0b1b963James Dong SMUSD xi0, xi0, csPiBy8 ;// rowa by [-s,c] 5330c1bc742181ded4930842b46e9507372f0b1b963James Dong SMUSD xi6, xi1, csPiBy8 ;// rowb by [-s,c] 5340c1bc742181ded4930842b46e9507372f0b1b963James Dong 5350c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULBB xi1, xi3, LoopRR2 5360c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULTB xi3, xi3, LoopRR2 5370c1bc742181ded4930842b46e9507372f0b1b963James Dong 5380c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB xh4, xi4, xi2, ASR#16 ;// h4/4 5390c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB xh6, xi6, xi0, ASR#16 ;// h6/4 5400c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xh7, xi5, xi7 ;// (i5+i7)/4 5410c1bc742181ded4930842b46e9507372f0b1b963James Dong 5420c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xi3, xi3, LSL #1 5430c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB xh5, xi3, xi1, ASR#15 ;// h5/4 5440c1bc742181ded4930842b46e9507372f0b1b963James Dong 5450c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// xi0,xi1,xi2,xi3 now free 5460c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// IStage 4,3, rows 2to3 x1/2 5470c1bc742181ded4930842b46e9507372f0b1b963James Dong 5480c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi0, [pSrc, #2*16] ;// j2 5490c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi1, [pSrc, #6*16] ;// 2*j6 5500c1bc742181ded4930842b46e9507372f0b1b963James Dong 5510c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// IStage 2, rows4to7 5520c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 xg6, xh6, xh7 5530c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 xg5, xh5, xg6 5540c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 xg4, xh4, xg5 5550c1bc742181ded4930842b46e9507372f0b1b963James Dong 5560c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xi1, xi1, xit ;// j6 5570c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 xi2, xi0, xi1 ;// (j2-j6) 5580c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xi3, xi0, xi1 ;// (j2+j6)/2 5590c1bc742181ded4930842b46e9507372f0b1b963James Dong 5600c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULBB xi0, xi2, LoopRR2 5610c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULTB xi2, xi2, LoopRR2 5620c1bc742181ded4930842b46e9507372f0b1b963James Dong 5630c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xi2, xi2, LSL #1 5640c1bc742181ded4930842b46e9507372f0b1b963James Dong 5650c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB xh2, xi2, xi0, ASR#15 ;// i2*sqrt(2)/4 5660c1bc742181ded4930842b46e9507372f0b1b963James Dong 5670c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// xi0, xi1 now free 5680c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// IStage 4,3 rows 0to1 x 1/2 5690c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi1, [pSrc, #4*16] ;// j4 5700c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi0, [pSrc], #4 ;// j0 5710c1bc742181ded4930842b46e9507372f0b1b963James Dong 5720c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 xh2, xh2, xi3 5730c1bc742181ded4930842b46e9507372f0b1b963James Dong ADDS LoopRR2, LoopRR2, #2<<29 ;// done two rows 5740c1bc742181ded4930842b46e9507372f0b1b963James Dong 5750c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD xi0, xi0, xit, LSL #2 ;// ensure correct round 5760c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xh0, xi0, xi1 ;// of DC result 5770c1bc742181ded4930842b46e9507372f0b1b963James Dong SHSUB16 xh1, xi0, xi1 5780c1bc742181ded4930842b46e9507372f0b1b963James Dong 5790c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// IStage 2 rows 0to3 x 1/2 5800c1bc742181ded4930842b46e9507372f0b1b963James Dong SHSUB16 xg2, xh1, xh2 5810c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xg1, xh1, xh2 5820c1bc742181ded4930842b46e9507372f0b1b963James Dong SHSUB16 xg3, xh0, xh3 5830c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xg0, xh0, xh3 5840c1bc742181ded4930842b46e9507372f0b1b963James Dong 5850c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// IStage 1 all rows 5860c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xf3, xg3, xg4 5870c1bc742181ded4930842b46e9507372f0b1b963James Dong SHSUB16 xf4, xg3, xg4 5880c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xf2, xg2, xg5 5890c1bc742181ded4930842b46e9507372f0b1b963James Dong SHSUB16 xf5, xg2, xg5 5900c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xf1, xg1, xg6 5910c1bc742181ded4930842b46e9507372f0b1b963James Dong SHSUB16 xf6, xg1, xg6 5920c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xf0, xg0, xg7 5930c1bc742181ded4930842b46e9507372f0b1b963James Dong SHSUB16 xf7, xg0, xg7 5940c1bc742181ded4930842b46e9507372f0b1b963James Dong 5950c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Saturate 5960c1bc742181ded4930842b46e9507372f0b1b963James Dong IF ("$outsize"="u8") 5970c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 xf0, #8, xf0 5980c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 xf1, #8, xf1 5990c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 xf2, #8, xf2 6000c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 xf3, #8, xf3 6010c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 xf4, #8, xf4 6020c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 xf5, #8, xf5 6030c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 xf6, #8, xf6 6040c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 xf7, #8, xf7 6050c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 6060c1bc742181ded4930842b46e9507372f0b1b963James Dong IF ("$outsize"="s9") 6070c1bc742181ded4930842b46e9507372f0b1b963James Dong SSAT16 xf0, #9, xf0 6080c1bc742181ded4930842b46e9507372f0b1b963James Dong SSAT16 xf1, #9, xf1 6090c1bc742181ded4930842b46e9507372f0b1b963James Dong SSAT16 xf2, #9, xf2 6100c1bc742181ded4930842b46e9507372f0b1b963James Dong SSAT16 xf3, #9, xf3 6110c1bc742181ded4930842b46e9507372f0b1b963James Dong SSAT16 xf4, #9, xf4 6120c1bc742181ded4930842b46e9507372f0b1b963James Dong SSAT16 xf5, #9, xf5 6130c1bc742181ded4930842b46e9507372f0b1b963James Dong SSAT16 xf6, #9, xf6 6140c1bc742181ded4930842b46e9507372f0b1b963James Dong SSAT16 xf7, #9, xf7 6150c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 6160c1bc742181ded4930842b46e9507372f0b1b963James Dong 6170c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Transpose to Row, Pack and store 6180c1bc742181ded4930842b46e9507372f0b1b963James Dong IF ("$outsize"="u8") 6190c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR xf0, xf0, xf1, LSL #8 ;// [ b1 b0 a1 a0 ] 6200c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR xf2, xf2, xf3, LSL #8 ;// [ b3 b2 a3 a2 ] 6210c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR xf4, xf4, xf5, LSL #8 ;// [ b5 b4 a5 a4 ] 6220c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR xf6, xf6, xf7, LSL #8 ;// [ b7 b6 a7 a6 ] 6230c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT ra01, xf0, xf2, LSL #16 6240c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB rb01, xf2, xf0, ASR #16 6250c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT ra23, xf4, xf6, LSL #16 6260c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB rb23, xf6, xf4, ASR #16 6270c1bc742181ded4930842b46e9507372f0b1b963James Dong STMIA pDest, {ra01, ra23} 6280c1bc742181ded4930842b46e9507372f0b1b963James Dong IF "$stride"="s" 6290c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pDest, pDest, pScale 6300c1bc742181ded4930842b46e9507372f0b1b963James Dong STMIA pDest, {rb01, rb23} 6310c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pDest, pDest, pScale 6320c1bc742181ded4930842b46e9507372f0b1b963James Dong ELSE 6330c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pDest, pDest, #($stride) 6340c1bc742181ded4930842b46e9507372f0b1b963James Dong STMIA pDest, {rb01, rb23} 6350c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pDest, pDest, #($stride) 6360c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 6370c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 6380c1bc742181ded4930842b46e9507372f0b1b963James Dong IF ("$outsize"="s9"):LOR:("$outsize"="s16") 6390c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT ra01, xf0, xf1, LSL #16 6400c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB rb01, xf1, xf0, ASR #16 6410c1bc742181ded4930842b46e9507372f0b1b963James Dong 6420c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT ra23, xf2, xf3, LSL #16 6430c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB rb23, xf3, xf2, ASR #16 6440c1bc742181ded4930842b46e9507372f0b1b963James Dong 6450c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT ra45, xf4, xf5, LSL #16 6460c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB rb45, xf5, xf4, ASR #16 6470c1bc742181ded4930842b46e9507372f0b1b963James Dong 6480c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT ra67, xf6, xf7, LSL #16 6490c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB rb67, xf7, xf6, ASR #16 6500c1bc742181ded4930842b46e9507372f0b1b963James Dong 6510c1bc742181ded4930842b46e9507372f0b1b963James Dong STMIA pDest, {ra01, ra23, ra45, ra67} 6520c1bc742181ded4930842b46e9507372f0b1b963James Dong IF "$stride"="s" 6530c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pDest, pDest, pScale 6540c1bc742181ded4930842b46e9507372f0b1b963James Dong STMIA pDest, {rb01, rb23, rb45, rb67} 6550c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pDest, pDest, pScale 6560c1bc742181ded4930842b46e9507372f0b1b963James Dong ELSE 6570c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pDest, pDest, #($stride) 6580c1bc742181ded4930842b46e9507372f0b1b963James Dong STMIA pDest, {rb01, rb23, rb45, rb67} 6590c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pDest, pDest, #($stride) 6600c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 6610c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 6620c1bc742181ded4930842b46e9507372f0b1b963James Dong 6630c1bc742181ded4930842b46e9507372f0b1b963James Dong BCC v6_idct_row$_F 6640c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF ;// ARM1136JS 6650c1bc742181ded4930842b46e9507372f0b1b963James Dong 6660c1bc742181ded4930842b46e9507372f0b1b963James Dong 6670c1bc742181ded4930842b46e9507372f0b1b963James Dong IF CortexA8 6680c1bc742181ded4930842b46e9507372f0b1b963James Dong 6690c1bc742181ded4930842b46e9507372f0b1b963James DongSrc0 EQU 7 6700c1bc742181ded4930842b46e9507372f0b1b963James DongSrc1 EQU 8 6710c1bc742181ded4930842b46e9507372f0b1b963James DongSrc2 EQU 9 6720c1bc742181ded4930842b46e9507372f0b1b963James DongSrc3 EQU 10 6730c1bc742181ded4930842b46e9507372f0b1b963James DongSrc4 EQU 11 6740c1bc742181ded4930842b46e9507372f0b1b963James DongSrc5 EQU 12 6750c1bc742181ded4930842b46e9507372f0b1b963James DongSrc6 EQU 13 6760c1bc742181ded4930842b46e9507372f0b1b963James DongSrc7 EQU 14 6770c1bc742181ded4930842b46e9507372f0b1b963James DongTmp EQU 15 6780c1bc742181ded4930842b46e9507372f0b1b963James Dong 6790c1bc742181ded4930842b46e9507372f0b1b963James DongqXj0 QN Src0.S16 6800c1bc742181ded4930842b46e9507372f0b1b963James DongqXj1 QN Src1.S16 6810c1bc742181ded4930842b46e9507372f0b1b963James DongqXj2 QN Src2.S16 6820c1bc742181ded4930842b46e9507372f0b1b963James DongqXj3 QN Src3.S16 6830c1bc742181ded4930842b46e9507372f0b1b963James DongqXj4 QN Src4.S16 6840c1bc742181ded4930842b46e9507372f0b1b963James DongqXj5 QN Src5.S16 6850c1bc742181ded4930842b46e9507372f0b1b963James DongqXj6 QN Src6.S16 6860c1bc742181ded4930842b46e9507372f0b1b963James DongqXj7 QN Src7.S16 6870c1bc742181ded4930842b46e9507372f0b1b963James DongqXjt QN Tmp.S16 6880c1bc742181ded4930842b46e9507372f0b1b963James Dong 6890c1bc742181ded4930842b46e9507372f0b1b963James DongdXj0lo DN (Src0*2).S16 6900c1bc742181ded4930842b46e9507372f0b1b963James DongdXj0hi DN (Src0*2+1).S16 6910c1bc742181ded4930842b46e9507372f0b1b963James DongdXj1lo DN (Src1*2).S16 6920c1bc742181ded4930842b46e9507372f0b1b963James DongdXj1hi DN (Src1*2+1).S16 6930c1bc742181ded4930842b46e9507372f0b1b963James DongdXj2lo DN (Src2*2).S16 6940c1bc742181ded4930842b46e9507372f0b1b963James DongdXj2hi DN (Src2*2+1).S16 6950c1bc742181ded4930842b46e9507372f0b1b963James DongdXj3lo DN (Src3*2).S16 6960c1bc742181ded4930842b46e9507372f0b1b963James DongdXj3hi DN (Src3*2+1).S16 6970c1bc742181ded4930842b46e9507372f0b1b963James DongdXj4lo DN (Src4*2).S16 6980c1bc742181ded4930842b46e9507372f0b1b963James DongdXj4hi DN (Src4*2+1).S16 6990c1bc742181ded4930842b46e9507372f0b1b963James DongdXj5lo DN (Src5*2).S16 7000c1bc742181ded4930842b46e9507372f0b1b963James DongdXj5hi DN (Src5*2+1).S16 7010c1bc742181ded4930842b46e9507372f0b1b963James DongdXj6lo DN (Src6*2).S16 7020c1bc742181ded4930842b46e9507372f0b1b963James DongdXj6hi DN (Src6*2+1).S16 7030c1bc742181ded4930842b46e9507372f0b1b963James DongdXj7lo DN (Src7*2).S16 7040c1bc742181ded4930842b46e9507372f0b1b963James DongdXj7hi DN (Src7*2+1).S16 7050c1bc742181ded4930842b46e9507372f0b1b963James DongdXjtlo DN (Tmp*2).S16 7060c1bc742181ded4930842b46e9507372f0b1b963James DongdXjthi DN (Tmp*2+1).S16 7070c1bc742181ded4930842b46e9507372f0b1b963James Dong 7080c1bc742181ded4930842b46e9507372f0b1b963James DongqXi0 QN qXj0 7090c1bc742181ded4930842b46e9507372f0b1b963James DongqXi1 QN qXj4 7100c1bc742181ded4930842b46e9507372f0b1b963James DongqXi2 QN qXj2 7110c1bc742181ded4930842b46e9507372f0b1b963James DongqXi3 QN qXj7 7120c1bc742181ded4930842b46e9507372f0b1b963James DongqXi4 QN qXj5 7130c1bc742181ded4930842b46e9507372f0b1b963James DongqXi5 QN qXjt 7140c1bc742181ded4930842b46e9507372f0b1b963James DongqXi6 QN qXj1 7150c1bc742181ded4930842b46e9507372f0b1b963James DongqXi7 QN qXj6 7160c1bc742181ded4930842b46e9507372f0b1b963James DongqXit QN qXj3 7170c1bc742181ded4930842b46e9507372f0b1b963James Dong 7180c1bc742181ded4930842b46e9507372f0b1b963James DongdXi0lo DN dXj0lo 7190c1bc742181ded4930842b46e9507372f0b1b963James DongdXi0hi DN dXj0hi 7200c1bc742181ded4930842b46e9507372f0b1b963James DongdXi1lo DN dXj4lo 7210c1bc742181ded4930842b46e9507372f0b1b963James DongdXi1hi DN dXj4hi 7220c1bc742181ded4930842b46e9507372f0b1b963James DongdXi2lo DN dXj2lo 7230c1bc742181ded4930842b46e9507372f0b1b963James DongdXi2hi DN dXj2hi 7240c1bc742181ded4930842b46e9507372f0b1b963James DongdXi3lo DN dXj7lo 7250c1bc742181ded4930842b46e9507372f0b1b963James DongdXi3hi DN dXj7hi 7260c1bc742181ded4930842b46e9507372f0b1b963James DongdXi4lo DN dXj5lo 7270c1bc742181ded4930842b46e9507372f0b1b963James DongdXi4hi DN dXj5hi 7280c1bc742181ded4930842b46e9507372f0b1b963James DongdXi5lo DN dXjtlo 7290c1bc742181ded4930842b46e9507372f0b1b963James DongdXi5hi DN dXjthi 7300c1bc742181ded4930842b46e9507372f0b1b963James DongdXi6lo DN dXj1lo 7310c1bc742181ded4930842b46e9507372f0b1b963James DongdXi6hi DN dXj1hi 7320c1bc742181ded4930842b46e9507372f0b1b963James DongdXi7lo DN dXj6lo 7330c1bc742181ded4930842b46e9507372f0b1b963James DongdXi7hi DN dXj6hi 7340c1bc742181ded4930842b46e9507372f0b1b963James DongdXitlo DN dXj3lo 7350c1bc742181ded4930842b46e9507372f0b1b963James DongdXithi DN dXj3hi 7360c1bc742181ded4930842b46e9507372f0b1b963James Dong 7370c1bc742181ded4930842b46e9507372f0b1b963James DongqXh0 QN qXit 7380c1bc742181ded4930842b46e9507372f0b1b963James DongqXh1 QN qXi0 7390c1bc742181ded4930842b46e9507372f0b1b963James DongqXh2 QN qXi2 7400c1bc742181ded4930842b46e9507372f0b1b963James DongqXh3 QN qXi3 7410c1bc742181ded4930842b46e9507372f0b1b963James DongqXh4 QN qXi7 7420c1bc742181ded4930842b46e9507372f0b1b963James DongqXh5 QN qXi5 7430c1bc742181ded4930842b46e9507372f0b1b963James DongqXh6 QN qXi4 7440c1bc742181ded4930842b46e9507372f0b1b963James DongqXh7 QN qXi1 7450c1bc742181ded4930842b46e9507372f0b1b963James DongqXht QN qXi6 7460c1bc742181ded4930842b46e9507372f0b1b963James Dong 7470c1bc742181ded4930842b46e9507372f0b1b963James DongdXh0lo DN dXitlo 7480c1bc742181ded4930842b46e9507372f0b1b963James DongdXh0hi DN dXithi 7490c1bc742181ded4930842b46e9507372f0b1b963James DongdXh1lo DN dXi0lo 7500c1bc742181ded4930842b46e9507372f0b1b963James DongdXh1hi DN dXi0hi 7510c1bc742181ded4930842b46e9507372f0b1b963James DongdXh2lo DN dXi2lo 7520c1bc742181ded4930842b46e9507372f0b1b963James DongdXh2hi DN dXi2hi 7530c1bc742181ded4930842b46e9507372f0b1b963James DongdXh3lo DN dXi3lo 7540c1bc742181ded4930842b46e9507372f0b1b963James DongdXh3hi DN dXi3hi 7550c1bc742181ded4930842b46e9507372f0b1b963James DongdXh4lo DN dXi7lo 7560c1bc742181ded4930842b46e9507372f0b1b963James DongdXh4hi DN dXi7hi 7570c1bc742181ded4930842b46e9507372f0b1b963James DongdXh5lo DN dXi5lo 7580c1bc742181ded4930842b46e9507372f0b1b963James DongdXh5hi DN dXi5hi 7590c1bc742181ded4930842b46e9507372f0b1b963James DongdXh6lo DN dXi4lo 7600c1bc742181ded4930842b46e9507372f0b1b963James DongdXh6hi DN dXi4hi 7610c1bc742181ded4930842b46e9507372f0b1b963James DongdXh7lo DN dXi1lo 7620c1bc742181ded4930842b46e9507372f0b1b963James DongdXh7hi DN dXi1hi 7630c1bc742181ded4930842b46e9507372f0b1b963James DongdXhtlo DN dXi6lo 7640c1bc742181ded4930842b46e9507372f0b1b963James DongdXhthi DN dXi6hi 7650c1bc742181ded4930842b46e9507372f0b1b963James Dong 7660c1bc742181ded4930842b46e9507372f0b1b963James DongqXg0 QN qXh2 7670c1bc742181ded4930842b46e9507372f0b1b963James DongqXg1 QN qXht 7680c1bc742181ded4930842b46e9507372f0b1b963James DongqXg2 QN qXh1 7690c1bc742181ded4930842b46e9507372f0b1b963James DongqXg3 QN qXh0 7700c1bc742181ded4930842b46e9507372f0b1b963James DongqXg4 QN qXh4 7710c1bc742181ded4930842b46e9507372f0b1b963James DongqXg5 QN qXh5 7720c1bc742181ded4930842b46e9507372f0b1b963James DongqXg6 QN qXh6 7730c1bc742181ded4930842b46e9507372f0b1b963James DongqXg7 QN qXh7 7740c1bc742181ded4930842b46e9507372f0b1b963James DongqXgt QN qXh3 7750c1bc742181ded4930842b46e9507372f0b1b963James Dong 7760c1bc742181ded4930842b46e9507372f0b1b963James DongqXf0 QN qXg6 7770c1bc742181ded4930842b46e9507372f0b1b963James DongqXf1 QN qXg5 7780c1bc742181ded4930842b46e9507372f0b1b963James DongqXf2 QN qXg4 7790c1bc742181ded4930842b46e9507372f0b1b963James DongqXf3 QN qXgt 7800c1bc742181ded4930842b46e9507372f0b1b963James DongqXf4 QN qXg3 7810c1bc742181ded4930842b46e9507372f0b1b963James DongqXf5 QN qXg2 7820c1bc742181ded4930842b46e9507372f0b1b963James DongqXf6 QN qXg1 7830c1bc742181ded4930842b46e9507372f0b1b963James DongqXf7 QN qXg0 7840c1bc742181ded4930842b46e9507372f0b1b963James DongqXft QN qXg7 7850c1bc742181ded4930842b46e9507372f0b1b963James Dong 7860c1bc742181ded4930842b46e9507372f0b1b963James Dong 7870c1bc742181ded4930842b46e9507372f0b1b963James DongqXt0 QN 1.S32 7880c1bc742181ded4930842b46e9507372f0b1b963James DongqXt1 QN 2.S32 7890c1bc742181ded4930842b46e9507372f0b1b963James DongqT0lo QN 1.S32 7900c1bc742181ded4930842b46e9507372f0b1b963James DongqT0hi QN 2.S32 7910c1bc742181ded4930842b46e9507372f0b1b963James DongqT1lo QN 3.S32 7920c1bc742181ded4930842b46e9507372f0b1b963James DongqT1hi QN 4.S32 7930c1bc742181ded4930842b46e9507372f0b1b963James DongqScalelo QN 5.S32 ;// used to read post scale values 7940c1bc742181ded4930842b46e9507372f0b1b963James DongqScalehi QN 6.S32 7950c1bc742181ded4930842b46e9507372f0b1b963James DongqTemp0 QN 5.S32 7960c1bc742181ded4930842b46e9507372f0b1b963James DongqTemp1 QN 6.S32 7970c1bc742181ded4930842b46e9507372f0b1b963James Dong 7980c1bc742181ded4930842b46e9507372f0b1b963James Dong 7990c1bc742181ded4930842b46e9507372f0b1b963James DongScale1 EQU 6 8000c1bc742181ded4930842b46e9507372f0b1b963James DongScale2 EQU 15 8010c1bc742181ded4930842b46e9507372f0b1b963James DongqScale1 QN Scale1.S16 8020c1bc742181ded4930842b46e9507372f0b1b963James DongqScale2 QN Scale2.S16 8030c1bc742181ded4930842b46e9507372f0b1b963James DongdScale1lo DN (Scale1*2).S16 8040c1bc742181ded4930842b46e9507372f0b1b963James DongdScale1hi DN (Scale1*2+1).S16 8050c1bc742181ded4930842b46e9507372f0b1b963James DongdScale2lo DN (Scale2*2).S16 8060c1bc742181ded4930842b46e9507372f0b1b963James DongdScale2hi DN (Scale2*2+1).S16 8070c1bc742181ded4930842b46e9507372f0b1b963James Dong 8080c1bc742181ded4930842b46e9507372f0b1b963James DongdCoefs DN 0.S16 ;// Scale coefficients in format {[0] [C] [S] [InvSqrt2]} 8090c1bc742181ded4930842b46e9507372f0b1b963James DongInvSqrt2 DN dCoefs[0] ;// 1/sqrt(2) in Q15 8100c1bc742181ded4930842b46e9507372f0b1b963James DongS DN dCoefs[1] ;// Sin(PI/8) in Q15 8110c1bc742181ded4930842b46e9507372f0b1b963James DongC DN dCoefs[2] ;// Cos(PI/8) in Q15 8120c1bc742181ded4930842b46e9507372f0b1b963James Dong 8130c1bc742181ded4930842b46e9507372f0b1b963James DongpTemp RN 12 8140c1bc742181ded4930842b46e9507372f0b1b963James Dong 8150c1bc742181ded4930842b46e9507372f0b1b963James Dong 8160c1bc742181ded4930842b46e9507372f0b1b963James Dong IMPORT armCOMM_IDCTCoef 8170c1bc742181ded4930842b46e9507372f0b1b963James Dong 8180c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 {qXj0,qXj1}, [pSrc @64]! 8190c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 {qXj2,qXj3}, [pSrc @64]! 8200c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 {qXj4,qXj5}, [pSrc @64]! 8210c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 {qXj6,qXj7}, [pSrc @64]! 8220c1bc742181ded4930842b46e9507372f0b1b963James Dong 8230c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Load PreScale and multiply with Src 8240c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// IStage 4 8250c1bc742181ded4930842b46e9507372f0b1b963James Dong 8260c1bc742181ded4930842b46e9507372f0b1b963James Dong IF "$inscale"="s16" ;// 16X16 Mul 8270c1bc742181ded4930842b46e9507372f0b1b963James Dong M_IDCT_PRESCALE16 8280c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 8290c1bc742181ded4930842b46e9507372f0b1b963James Dong 8300c1bc742181ded4930842b46e9507372f0b1b963James Dong IF "$inscale"="s32" ;// 32X32 ,ul 8310c1bc742181ded4930842b46e9507372f0b1b963James Dong M_IDCT_PRESCALE32 8320c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 8330c1bc742181ded4930842b46e9507372f0b1b963James Dong 8340c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// IStage 3 8350c1bc742181ded4930842b46e9507372f0b1b963James Dong VQDMULH qXi2, qXi2, InvSqrt2 ;// i2/sqrt(2) 8360c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qXh0, qXi0, qXi1 ;// (i0+i1)/2 8370c1bc742181ded4930842b46e9507372f0b1b963James Dong VHSUB qXh1, qXi0, qXi1 ;// (i0-i1)/2 8380c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qXh7, qXi5, qXi7 ;// (i5+i7)/4 8390c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qXh5, qXi5, qXi7 ;// (i5-i7)/2 8400c1bc742181ded4930842b46e9507372f0b1b963James Dong VQDMULH qXh5, qXh5, InvSqrt2 ;// h5/sqrt(2) 8410c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qXh2, qXi2, qXi3 ;// h2, h3 8420c1bc742181ded4930842b46e9507372f0b1b963James Dong 8430c1bc742181ded4930842b46e9507372f0b1b963James Dong VMULL qXt0, dXi4lo, C ;// c*i4 8440c1bc742181ded4930842b46e9507372f0b1b963James Dong VMLAL qXt0, dXi6lo, S ;// c*i4+s*i6 8450c1bc742181ded4930842b46e9507372f0b1b963James Dong VMULL qXt1, dXi4hi, C 8460c1bc742181ded4930842b46e9507372f0b1b963James Dong VMLAL qXt1, dXi6hi, S 8470c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHRN dXh4lo, qXt0, #16 ;// h4 8480c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHRN dXh4hi, qXt1, #16 8490c1bc742181ded4930842b46e9507372f0b1b963James Dong 8500c1bc742181ded4930842b46e9507372f0b1b963James Dong VMULL qXt0, dXi6lo, C ;// c*i6 8510c1bc742181ded4930842b46e9507372f0b1b963James Dong VMLSL qXt0, dXi4lo, S ;// -s*i4 + c*h6 8520c1bc742181ded4930842b46e9507372f0b1b963James Dong VMULL qXt1, dXi6hi, C 8530c1bc742181ded4930842b46e9507372f0b1b963James Dong VMLSL qXt1, dXi4hi, S 8540c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHRN dXh6lo, qXt0, #16 ;// h6 8550c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHRN dXh6hi, qXt1, #16 8560c1bc742181ded4930842b46e9507372f0b1b963James Dong 8570c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// IStage 2 8580c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qXg6, qXh6, qXh7 8590c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qXg5, qXh5, qXg6 8600c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qXg4, qXh4, qXg5 8610c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qXg1, qXh1, qXh2 ;// (h1+h2)/2 8620c1bc742181ded4930842b46e9507372f0b1b963James Dong VHSUB qXg2, qXh1, qXh2 ;// (h1-h2)/2 8630c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qXg0, qXh0, qXh3 ;// (h0+h3)/2 8640c1bc742181ded4930842b46e9507372f0b1b963James Dong VHSUB qXg3, qXh0, qXh3 ;// (h0-h3)/2 8650c1bc742181ded4930842b46e9507372f0b1b963James Dong 8660c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// IStage 1 all rows 8670c1bc742181ded4930842b46e9507372f0b1b963James Dong VADD qXf3, qXg3, qXg4 8680c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qXf4, qXg3, qXg4 8690c1bc742181ded4930842b46e9507372f0b1b963James Dong VADD qXf2, qXg2, qXg5 8700c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qXf5, qXg2, qXg5 8710c1bc742181ded4930842b46e9507372f0b1b963James Dong VADD qXf1, qXg1, qXg6 8720c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qXf6, qXg1, qXg6 8730c1bc742181ded4930842b46e9507372f0b1b963James Dong VADD qXf0, qXg0, qXg7 8740c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qXf7, qXg0, qXg7 8750c1bc742181ded4930842b46e9507372f0b1b963James Dong 8760c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Transpose, store and loop 8770c1bc742181ded4930842b46e9507372f0b1b963James DongXTR0 EQU Src5 8780c1bc742181ded4930842b46e9507372f0b1b963James DongXTR1 EQU Tmp 8790c1bc742181ded4930842b46e9507372f0b1b963James DongXTR2 EQU Src6 8800c1bc742181ded4930842b46e9507372f0b1b963James DongXTR3 EQU Src7 8810c1bc742181ded4930842b46e9507372f0b1b963James DongXTR4 EQU Src3 8820c1bc742181ded4930842b46e9507372f0b1b963James DongXTR5 EQU Src0 8830c1bc742181ded4930842b46e9507372f0b1b963James DongXTR6 EQU Src1 8840c1bc742181ded4930842b46e9507372f0b1b963James DongXTR7 EQU Src2 8850c1bc742181ded4930842b46e9507372f0b1b963James DongXTRt EQU Src4 8860c1bc742181ded4930842b46e9507372f0b1b963James Dong 8870c1bc742181ded4930842b46e9507372f0b1b963James DongqA0 QN XTR0.S32 ;// for XTRpose 8880c1bc742181ded4930842b46e9507372f0b1b963James DongqA1 QN XTR1.S32 8890c1bc742181ded4930842b46e9507372f0b1b963James DongqA2 QN XTR2.S32 8900c1bc742181ded4930842b46e9507372f0b1b963James DongqA3 QN XTR3.S32 8910c1bc742181ded4930842b46e9507372f0b1b963James DongqA4 QN XTR4.S32 8920c1bc742181ded4930842b46e9507372f0b1b963James DongqA5 QN XTR5.S32 8930c1bc742181ded4930842b46e9507372f0b1b963James DongqA6 QN XTR6.S32 8940c1bc742181ded4930842b46e9507372f0b1b963James DongqA7 QN XTR7.S32 8950c1bc742181ded4930842b46e9507372f0b1b963James Dong 8960c1bc742181ded4930842b46e9507372f0b1b963James DongdB0 DN XTR0*2+1 ;// for using VSWP 8970c1bc742181ded4930842b46e9507372f0b1b963James DongdB1 DN XTR1*2+1 8980c1bc742181ded4930842b46e9507372f0b1b963James DongdB2 DN XTR2*2+1 8990c1bc742181ded4930842b46e9507372f0b1b963James DongdB3 DN XTR3*2+1 9000c1bc742181ded4930842b46e9507372f0b1b963James DongdB4 DN XTR4*2 9010c1bc742181ded4930842b46e9507372f0b1b963James DongdB5 DN XTR5*2 9020c1bc742181ded4930842b46e9507372f0b1b963James DongdB6 DN XTR6*2 9030c1bc742181ded4930842b46e9507372f0b1b963James DongdB7 DN XTR7*2 9040c1bc742181ded4930842b46e9507372f0b1b963James Dong 9050c1bc742181ded4930842b46e9507372f0b1b963James Dong 9060c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN qXf0, qXf1 9070c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN qXf2, qXf3 9080c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN qXf4, qXf5 9090c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN qXf6, qXf7 9100c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN qA0, qA2 9110c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN qA1, qA3 9120c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN qA4, qA6 9130c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN qA5, qA7 9140c1bc742181ded4930842b46e9507372f0b1b963James Dong VSWP dB0, dB4 9150c1bc742181ded4930842b46e9507372f0b1b963James Dong VSWP dB1, dB5 9160c1bc742181ded4930842b46e9507372f0b1b963James Dong VSWP dB2, dB6 9170c1bc742181ded4930842b46e9507372f0b1b963James Dong VSWP dB3, dB7 9180c1bc742181ded4930842b46e9507372f0b1b963James Dong 9190c1bc742181ded4930842b46e9507372f0b1b963James Dong 9200c1bc742181ded4930842b46e9507372f0b1b963James DongqYj0 QN qXf0 9210c1bc742181ded4930842b46e9507372f0b1b963James DongqYj1 QN qXf1 9220c1bc742181ded4930842b46e9507372f0b1b963James DongqYj2 QN qXf2 9230c1bc742181ded4930842b46e9507372f0b1b963James DongqYj3 QN qXf3 9240c1bc742181ded4930842b46e9507372f0b1b963James DongqYj4 QN qXf4 9250c1bc742181ded4930842b46e9507372f0b1b963James DongqYj5 QN qXf5 9260c1bc742181ded4930842b46e9507372f0b1b963James DongqYj6 QN qXf6 9270c1bc742181ded4930842b46e9507372f0b1b963James DongqYj7 QN qXf7 9280c1bc742181ded4930842b46e9507372f0b1b963James DongqYjt QN qXft 9290c1bc742181ded4930842b46e9507372f0b1b963James Dong 9300c1bc742181ded4930842b46e9507372f0b1b963James DongdYj0lo DN (XTR0*2).S16 9310c1bc742181ded4930842b46e9507372f0b1b963James DongdYj0hi DN (XTR0*2+1).S16 9320c1bc742181ded4930842b46e9507372f0b1b963James DongdYj1lo DN (XTR1*2).S16 9330c1bc742181ded4930842b46e9507372f0b1b963James DongdYj1hi DN (XTR1*2+1).S16 9340c1bc742181ded4930842b46e9507372f0b1b963James DongdYj2lo DN (XTR2*2).S16 9350c1bc742181ded4930842b46e9507372f0b1b963James DongdYj2hi DN (XTR2*2+1).S16 9360c1bc742181ded4930842b46e9507372f0b1b963James DongdYj3lo DN (XTR3*2).S16 9370c1bc742181ded4930842b46e9507372f0b1b963James DongdYj3hi DN (XTR3*2+1).S16 9380c1bc742181ded4930842b46e9507372f0b1b963James DongdYj4lo DN (XTR4*2).S16 9390c1bc742181ded4930842b46e9507372f0b1b963James DongdYj4hi DN (XTR4*2+1).S16 9400c1bc742181ded4930842b46e9507372f0b1b963James DongdYj5lo DN (XTR5*2).S16 9410c1bc742181ded4930842b46e9507372f0b1b963James DongdYj5hi DN (XTR5*2+1).S16 9420c1bc742181ded4930842b46e9507372f0b1b963James DongdYj6lo DN (XTR6*2).S16 9430c1bc742181ded4930842b46e9507372f0b1b963James DongdYj6hi DN (XTR6*2+1).S16 9440c1bc742181ded4930842b46e9507372f0b1b963James DongdYj7lo DN (XTR7*2).S16 9450c1bc742181ded4930842b46e9507372f0b1b963James DongdYj7hi DN (XTR7*2+1).S16 9460c1bc742181ded4930842b46e9507372f0b1b963James DongdYjtlo DN (XTRt*2).S16 9470c1bc742181ded4930842b46e9507372f0b1b963James DongdYjthi DN (XTRt*2+1).S16 9480c1bc742181ded4930842b46e9507372f0b1b963James Dong 9490c1bc742181ded4930842b46e9507372f0b1b963James DongqYi0 QN qYj0 9500c1bc742181ded4930842b46e9507372f0b1b963James DongqYi1 QN qYj4 9510c1bc742181ded4930842b46e9507372f0b1b963James DongqYi2 QN qYj2 9520c1bc742181ded4930842b46e9507372f0b1b963James DongqYi3 QN qYj7 9530c1bc742181ded4930842b46e9507372f0b1b963James DongqYi4 QN qYj5 9540c1bc742181ded4930842b46e9507372f0b1b963James DongqYi5 QN qYjt 9550c1bc742181ded4930842b46e9507372f0b1b963James DongqYi6 QN qYj1 9560c1bc742181ded4930842b46e9507372f0b1b963James DongqYi7 QN qYj6 9570c1bc742181ded4930842b46e9507372f0b1b963James DongqYit QN qYj3 9580c1bc742181ded4930842b46e9507372f0b1b963James Dong 9590c1bc742181ded4930842b46e9507372f0b1b963James DongdYi0lo DN dYj0lo 9600c1bc742181ded4930842b46e9507372f0b1b963James DongdYi0hi DN dYj0hi 9610c1bc742181ded4930842b46e9507372f0b1b963James DongdYi1lo DN dYj4lo 9620c1bc742181ded4930842b46e9507372f0b1b963James DongdYi1hi DN dYj4hi 9630c1bc742181ded4930842b46e9507372f0b1b963James DongdYi2lo DN dYj2lo 9640c1bc742181ded4930842b46e9507372f0b1b963James DongdYi2hi DN dYj2hi 9650c1bc742181ded4930842b46e9507372f0b1b963James DongdYi3lo DN dYj7lo 9660c1bc742181ded4930842b46e9507372f0b1b963James DongdYi3hi DN dYj7hi 9670c1bc742181ded4930842b46e9507372f0b1b963James DongdYi4lo DN dYj5lo 9680c1bc742181ded4930842b46e9507372f0b1b963James DongdYi4hi DN dYj5hi 9690c1bc742181ded4930842b46e9507372f0b1b963James DongdYi5lo DN dYjtlo 9700c1bc742181ded4930842b46e9507372f0b1b963James DongdYi5hi DN dYjthi 9710c1bc742181ded4930842b46e9507372f0b1b963James DongdYi6lo DN dYj1lo 9720c1bc742181ded4930842b46e9507372f0b1b963James DongdYi6hi DN dYj1hi 9730c1bc742181ded4930842b46e9507372f0b1b963James DongdYi7lo DN dYj6lo 9740c1bc742181ded4930842b46e9507372f0b1b963James DongdYi7hi DN dYj6hi 9750c1bc742181ded4930842b46e9507372f0b1b963James DongdYitlo DN dYj3lo 9760c1bc742181ded4930842b46e9507372f0b1b963James DongdYithi DN dYj3hi 9770c1bc742181ded4930842b46e9507372f0b1b963James Dong 9780c1bc742181ded4930842b46e9507372f0b1b963James DongqYh0 QN qYit 9790c1bc742181ded4930842b46e9507372f0b1b963James DongqYh1 QN qYi0 9800c1bc742181ded4930842b46e9507372f0b1b963James DongqYh2 QN qYi2 9810c1bc742181ded4930842b46e9507372f0b1b963James DongqYh3 QN qYi3 9820c1bc742181ded4930842b46e9507372f0b1b963James DongqYh4 QN qYi7 9830c1bc742181ded4930842b46e9507372f0b1b963James DongqYh5 QN qYi5 9840c1bc742181ded4930842b46e9507372f0b1b963James DongqYh6 QN qYi4 9850c1bc742181ded4930842b46e9507372f0b1b963James DongqYh7 QN qYi1 9860c1bc742181ded4930842b46e9507372f0b1b963James DongqYht QN qYi6 9870c1bc742181ded4930842b46e9507372f0b1b963James Dong 9880c1bc742181ded4930842b46e9507372f0b1b963James DongdYh0lo DN dYitlo 9890c1bc742181ded4930842b46e9507372f0b1b963James DongdYh0hi DN dYithi 9900c1bc742181ded4930842b46e9507372f0b1b963James DongdYh1lo DN dYi0lo 9910c1bc742181ded4930842b46e9507372f0b1b963James DongdYh1hi DN dYi0hi 9920c1bc742181ded4930842b46e9507372f0b1b963James DongdYh2lo DN dYi2lo 9930c1bc742181ded4930842b46e9507372f0b1b963James DongdYh2hi DN dYi2hi 9940c1bc742181ded4930842b46e9507372f0b1b963James DongdYh3lo DN dYi3lo 9950c1bc742181ded4930842b46e9507372f0b1b963James DongdYh3hi DN dYi3hi 9960c1bc742181ded4930842b46e9507372f0b1b963James DongdYh4lo DN dYi7lo 9970c1bc742181ded4930842b46e9507372f0b1b963James DongdYh4hi DN dYi7hi 9980c1bc742181ded4930842b46e9507372f0b1b963James DongdYh5lo DN dYi5lo 9990c1bc742181ded4930842b46e9507372f0b1b963James DongdYh5hi DN dYi5hi 10000c1bc742181ded4930842b46e9507372f0b1b963James DongdYh6lo DN dYi4lo 10010c1bc742181ded4930842b46e9507372f0b1b963James DongdYh6hi DN dYi4hi 10020c1bc742181ded4930842b46e9507372f0b1b963James DongdYh7lo DN dYi1lo 10030c1bc742181ded4930842b46e9507372f0b1b963James DongdYh7hi DN dYi1hi 10040c1bc742181ded4930842b46e9507372f0b1b963James DongdYhtlo DN dYi6lo 10050c1bc742181ded4930842b46e9507372f0b1b963James DongdYhthi DN dYi6hi 10060c1bc742181ded4930842b46e9507372f0b1b963James Dong 10070c1bc742181ded4930842b46e9507372f0b1b963James DongqYg0 QN qYh2 10080c1bc742181ded4930842b46e9507372f0b1b963James DongqYg1 QN qYht 10090c1bc742181ded4930842b46e9507372f0b1b963James DongqYg2 QN qYh1 10100c1bc742181ded4930842b46e9507372f0b1b963James DongqYg3 QN qYh0 10110c1bc742181ded4930842b46e9507372f0b1b963James DongqYg4 QN qYh4 10120c1bc742181ded4930842b46e9507372f0b1b963James DongqYg5 QN qYh5 10130c1bc742181ded4930842b46e9507372f0b1b963James DongqYg6 QN qYh6 10140c1bc742181ded4930842b46e9507372f0b1b963James DongqYg7 QN qYh7 10150c1bc742181ded4930842b46e9507372f0b1b963James DongqYgt QN qYh3 10160c1bc742181ded4930842b46e9507372f0b1b963James Dong 10170c1bc742181ded4930842b46e9507372f0b1b963James DongqYf0 QN qYg6 10180c1bc742181ded4930842b46e9507372f0b1b963James DongqYf1 QN qYg5 10190c1bc742181ded4930842b46e9507372f0b1b963James DongqYf2 QN qYg4 10200c1bc742181ded4930842b46e9507372f0b1b963James DongqYf3 QN qYgt 10210c1bc742181ded4930842b46e9507372f0b1b963James DongqYf4 QN qYg3 10220c1bc742181ded4930842b46e9507372f0b1b963James DongqYf5 QN qYg2 10230c1bc742181ded4930842b46e9507372f0b1b963James DongqYf6 QN qYg1 10240c1bc742181ded4930842b46e9507372f0b1b963James DongqYf7 QN qYg0 10250c1bc742181ded4930842b46e9507372f0b1b963James DongqYft QN qYg7 10260c1bc742181ded4930842b46e9507372f0b1b963James Dong 10270c1bc742181ded4930842b46e9507372f0b1b963James Dong VRSHR qYj7, qYj7, #2 10280c1bc742181ded4930842b46e9507372f0b1b963James Dong VRSHR qYj6, qYj6, #1 10290c1bc742181ded4930842b46e9507372f0b1b963James Dong 10300c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qYi5, qYj1, qYj7 ;// i5 = (j1+j7)/2 10310c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qYi6, qYj1, qYj7 ;// i6 = j1-j7 10320c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qYi3, qYj2, qYj6 ;// i3 = (j2+j6)/2 10330c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qYi2, qYj2, qYj6 ;// i2 = j2-j6 10340c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qYi7, qYj5, qYj3 ;// i7 = (j5+j3)/2 10350c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qYi4, qYj5, qYj3 ;// i4 = j5-j3 10360c1bc742181ded4930842b46e9507372f0b1b963James Dong 10370c1bc742181ded4930842b46e9507372f0b1b963James Dong VQDMULH qYi2, qYi2, InvSqrt2 ;// i2/sqrt(2) 10380c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// IStage 4,3 rows 0to1 x 1/2 10390c1bc742181ded4930842b46e9507372f0b1b963James Dong 10400c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV pTemp, #0x4 ;// ensure correct round 10410c1bc742181ded4930842b46e9507372f0b1b963James Dong VDUP qScale1, pTemp ;// of DC result 10420c1bc742181ded4930842b46e9507372f0b1b963James Dong VADD qYi0, qYi0, qScale1 10430c1bc742181ded4930842b46e9507372f0b1b963James Dong 10440c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qYh0, qYi0, qYi1 ;// (i0+i1)/2 10450c1bc742181ded4930842b46e9507372f0b1b963James Dong VHSUB qYh1, qYi0, qYi1 ;// (i0-i1)/2 10460c1bc742181ded4930842b46e9507372f0b1b963James Dong 10470c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qYh7, qYi5, qYi7 ;// (i5+i7)/4 10480c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qYh5, qYi5, qYi7 ;// (i5-i7)/2 10490c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qYh2, qYi2, qYi3 ;// h2, h3 10500c1bc742181ded4930842b46e9507372f0b1b963James Dong VQDMULH qYh5, qYh5, InvSqrt2 ;// h5/sqrt(2) 10510c1bc742181ded4930842b46e9507372f0b1b963James Dong 10520c1bc742181ded4930842b46e9507372f0b1b963James Dong VMULL qXt0, dYi4lo, C ;// c*i4 10530c1bc742181ded4930842b46e9507372f0b1b963James Dong VMLAL qXt0, dYi6lo, S ;// c*i4+s*i6 10540c1bc742181ded4930842b46e9507372f0b1b963James Dong VMULL qXt1, dYi4hi, C 10550c1bc742181ded4930842b46e9507372f0b1b963James Dong VMLAL qXt1, dYi6hi, S 10560c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHRN dYh4lo, qXt0, #16 ;// h4 10570c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHRN dYh4hi, qXt1, #16 10580c1bc742181ded4930842b46e9507372f0b1b963James Dong 10590c1bc742181ded4930842b46e9507372f0b1b963James Dong VMULL qXt0, dYi6lo, C ;// c*i6 10600c1bc742181ded4930842b46e9507372f0b1b963James Dong VMLSL qXt0, dYi4lo, S ;// -s*i4 + c*h6 10610c1bc742181ded4930842b46e9507372f0b1b963James Dong VMULL qXt1, dYi6hi, C 10620c1bc742181ded4930842b46e9507372f0b1b963James Dong VMLSL qXt1, dYi4hi, S 10630c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHRN dYh6lo, qXt0, #16 ;// h6 10640c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHRN dYh6hi, qXt1, #16 10650c1bc742181ded4930842b46e9507372f0b1b963James Dong 10660c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qYg6, qYh6, qYh7 10670c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qYg5, qYh5, qYg6 10680c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qYg4, qYh4, qYg5 10690c1bc742181ded4930842b46e9507372f0b1b963James Dong 10700c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// IStage 2 rows 0to3 x 1/2 10710c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qYg1, qYh1, qYh2 ;// (h1+h2)/2 10720c1bc742181ded4930842b46e9507372f0b1b963James Dong VHSUB qYg2, qYh1, qYh2 ;// (h1-h2)/2 10730c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qYg0, qYh0, qYh3 ;// (h0+h3)/2 10740c1bc742181ded4930842b46e9507372f0b1b963James Dong VHSUB qYg3, qYh0, qYh3 ;// (h0-h3)/2 10750c1bc742181ded4930842b46e9507372f0b1b963James Dong 10760c1bc742181ded4930842b46e9507372f0b1b963James Dong 10770c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// IStage 1 all rows 10780c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qYf3, qYg3, qYg4 10790c1bc742181ded4930842b46e9507372f0b1b963James Dong VHSUB qYf4, qYg3, qYg4 10800c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qYf2, qYg2, qYg5 10810c1bc742181ded4930842b46e9507372f0b1b963James Dong VHSUB qYf5, qYg2, qYg5 10820c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qYf1, qYg1, qYg6 10830c1bc742181ded4930842b46e9507372f0b1b963James Dong VHSUB qYf6, qYg1, qYg6 10840c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qYf0, qYg0, qYg7 10850c1bc742181ded4930842b46e9507372f0b1b963James Dong VHSUB qYf7, qYg0, qYg7 10860c1bc742181ded4930842b46e9507372f0b1b963James Dong 10870c1bc742181ded4930842b46e9507372f0b1b963James DongYTR0 EQU Src0 10880c1bc742181ded4930842b46e9507372f0b1b963James DongYTR1 EQU Src4 10890c1bc742181ded4930842b46e9507372f0b1b963James DongYTR2 EQU Src1 10900c1bc742181ded4930842b46e9507372f0b1b963James DongYTR3 EQU Src2 10910c1bc742181ded4930842b46e9507372f0b1b963James DongYTR4 EQU Src7 10920c1bc742181ded4930842b46e9507372f0b1b963James DongYTR5 EQU Src5 10930c1bc742181ded4930842b46e9507372f0b1b963James DongYTR6 EQU Tmp 10940c1bc742181ded4930842b46e9507372f0b1b963James DongYTR7 EQU Src6 10950c1bc742181ded4930842b46e9507372f0b1b963James DongYTRt EQU Src3 10960c1bc742181ded4930842b46e9507372f0b1b963James Dong 10970c1bc742181ded4930842b46e9507372f0b1b963James DongqC0 QN YTR0.S32 ;// for YTRpose 10980c1bc742181ded4930842b46e9507372f0b1b963James DongqC1 QN YTR1.S32 10990c1bc742181ded4930842b46e9507372f0b1b963James DongqC2 QN YTR2.S32 11000c1bc742181ded4930842b46e9507372f0b1b963James DongqC3 QN YTR3.S32 11010c1bc742181ded4930842b46e9507372f0b1b963James DongqC4 QN YTR4.S32 11020c1bc742181ded4930842b46e9507372f0b1b963James DongqC5 QN YTR5.S32 11030c1bc742181ded4930842b46e9507372f0b1b963James DongqC6 QN YTR6.S32 11040c1bc742181ded4930842b46e9507372f0b1b963James DongqC7 QN YTR7.S32 11050c1bc742181ded4930842b46e9507372f0b1b963James Dong 11060c1bc742181ded4930842b46e9507372f0b1b963James DongdD0 DN YTR0*2+1 ;// for using VSWP 11070c1bc742181ded4930842b46e9507372f0b1b963James DongdD1 DN YTR1*2+1 11080c1bc742181ded4930842b46e9507372f0b1b963James DongdD2 DN YTR2*2+1 11090c1bc742181ded4930842b46e9507372f0b1b963James DongdD3 DN YTR3*2+1 11100c1bc742181ded4930842b46e9507372f0b1b963James DongdD4 DN YTR4*2 11110c1bc742181ded4930842b46e9507372f0b1b963James DongdD5 DN YTR5*2 11120c1bc742181ded4930842b46e9507372f0b1b963James DongdD6 DN YTR6*2 11130c1bc742181ded4930842b46e9507372f0b1b963James DongdD7 DN YTR7*2 11140c1bc742181ded4930842b46e9507372f0b1b963James Dong 11150c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN qYf0, qYf1 11160c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN qYf2, qYf3 11170c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN qYf4, qYf5 11180c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN qYf6, qYf7 11190c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN qC0, qC2 11200c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN qC1, qC3 11210c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN qC4, qC6 11220c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN qC5, qC7 11230c1bc742181ded4930842b46e9507372f0b1b963James Dong VSWP dD0, dD4 11240c1bc742181ded4930842b46e9507372f0b1b963James Dong VSWP dD1, dD5 11250c1bc742181ded4930842b46e9507372f0b1b963James Dong VSWP dD2, dD6 11260c1bc742181ded4930842b46e9507372f0b1b963James Dong VSWP dD3, dD7 11270c1bc742181ded4930842b46e9507372f0b1b963James Dong 11280c1bc742181ded4930842b46e9507372f0b1b963James Dong 11290c1bc742181ded4930842b46e9507372f0b1b963James DongdYf0U8 DN YTR0*2.U8 11300c1bc742181ded4930842b46e9507372f0b1b963James DongdYf1U8 DN YTR1*2.U8 11310c1bc742181ded4930842b46e9507372f0b1b963James DongdYf2U8 DN YTR2*2.U8 11320c1bc742181ded4930842b46e9507372f0b1b963James DongdYf3U8 DN YTR3*2.U8 11330c1bc742181ded4930842b46e9507372f0b1b963James DongdYf4U8 DN YTR4*2.U8 11340c1bc742181ded4930842b46e9507372f0b1b963James DongdYf5U8 DN YTR5*2.U8 11350c1bc742181ded4930842b46e9507372f0b1b963James DongdYf6U8 DN YTR6*2.U8 11360c1bc742181ded4930842b46e9507372f0b1b963James DongdYf7U8 DN YTR7*2.U8 11370c1bc742181ded4930842b46e9507372f0b1b963James Dong 11380c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 11390c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Do saturation if outsize is other than S16 11400c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 11410c1bc742181ded4930842b46e9507372f0b1b963James Dong 11420c1bc742181ded4930842b46e9507372f0b1b963James Dong IF ("$outsize"="u8") 11430c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Output range [0-255] 11440c1bc742181ded4930842b46e9507372f0b1b963James Dong VQMOVN dYf0U8, qYf0 11450c1bc742181ded4930842b46e9507372f0b1b963James Dong VQMOVN dYf1U8, qYf1 11460c1bc742181ded4930842b46e9507372f0b1b963James Dong VQMOVN dYf2U8, qYf2 11470c1bc742181ded4930842b46e9507372f0b1b963James Dong VQMOVN dYf3U8, qYf3 11480c1bc742181ded4930842b46e9507372f0b1b963James Dong VQMOVN dYf4U8, qYf4 11490c1bc742181ded4930842b46e9507372f0b1b963James Dong VQMOVN dYf5U8, qYf5 11500c1bc742181ded4930842b46e9507372f0b1b963James Dong VQMOVN dYf6U8, qYf6 11510c1bc742181ded4930842b46e9507372f0b1b963James Dong VQMOVN dYf7U8, qYf7 11520c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 11530c1bc742181ded4930842b46e9507372f0b1b963James Dong 11540c1bc742181ded4930842b46e9507372f0b1b963James Dong IF ("$outsize"="s9") 11550c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Output range [-256 to +255] 11560c1bc742181ded4930842b46e9507372f0b1b963James Dong VQSHL qYf0, qYf0, #16-9 11570c1bc742181ded4930842b46e9507372f0b1b963James Dong VQSHL qYf1, qYf1, #16-9 11580c1bc742181ded4930842b46e9507372f0b1b963James Dong VQSHL qYf2, qYf2, #16-9 11590c1bc742181ded4930842b46e9507372f0b1b963James Dong VQSHL qYf3, qYf3, #16-9 11600c1bc742181ded4930842b46e9507372f0b1b963James Dong VQSHL qYf4, qYf4, #16-9 11610c1bc742181ded4930842b46e9507372f0b1b963James Dong VQSHL qYf5, qYf5, #16-9 11620c1bc742181ded4930842b46e9507372f0b1b963James Dong VQSHL qYf6, qYf6, #16-9 11630c1bc742181ded4930842b46e9507372f0b1b963James Dong VQSHL qYf7, qYf7, #16-9 11640c1bc742181ded4930842b46e9507372f0b1b963James Dong 11650c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHR qYf0, qYf0, #16-9 11660c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHR qYf1, qYf1, #16-9 11670c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHR qYf2, qYf2, #16-9 11680c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHR qYf3, qYf3, #16-9 11690c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHR qYf4, qYf4, #16-9 11700c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHR qYf5, qYf5, #16-9 11710c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHR qYf6, qYf6, #16-9 11720c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHR qYf7, qYf7, #16-9 11730c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 11740c1bc742181ded4930842b46e9507372f0b1b963James Dong 11750c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Store output depending on the Stride size 11760c1bc742181ded4930842b46e9507372f0b1b963James Dong IF "$stride"="s" 11770c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 qYf0, [pDest @64], Stride 11780c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 qYf1, [pDest @64], Stride 11790c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 qYf2, [pDest @64], Stride 11800c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 qYf3, [pDest @64], Stride 11810c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 qYf4, [pDest @64], Stride 11820c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 qYf5, [pDest @64], Stride 11830c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 qYf6, [pDest @64], Stride 11840c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 qYf7, [pDest @64] 11850c1bc742181ded4930842b46e9507372f0b1b963James Dong ELSE 11860c1bc742181ded4930842b46e9507372f0b1b963James Dong IF ("$outsize"="u8") 11870c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dYf0U8, [pDest @64], #8 11880c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dYf1U8, [pDest @64], #8 11890c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dYf2U8, [pDest @64], #8 11900c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dYf3U8, [pDest @64], #8 11910c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dYf4U8, [pDest @64], #8 11920c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dYf5U8, [pDest @64], #8 11930c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dYf6U8, [pDest @64], #8 11940c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dYf7U8, [pDest @64] 11950c1bc742181ded4930842b46e9507372f0b1b963James Dong ELSE 11960c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// ("$outsize"="s9") or ("$outsize"="s16") 11970c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 qYf0, [pDest @64], #16 11980c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 qYf1, [pDest @64], #16 11990c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 qYf2, [pDest @64], #16 12000c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 qYf3, [pDest @64], #16 12010c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 qYf4, [pDest @64], #16 12020c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 qYf5, [pDest @64], #16 12030c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 qYf6, [pDest @64], #16 12040c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 qYf7, [pDest @64] 12050c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 12060c1bc742181ded4930842b46e9507372f0b1b963James Dong 12070c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 12080c1bc742181ded4930842b46e9507372f0b1b963James Dong 12090c1bc742181ded4930842b46e9507372f0b1b963James Dong 12100c1bc742181ded4930842b46e9507372f0b1b963James Dong 12110c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF ;// CortexA8 12120c1bc742181ded4930842b46e9507372f0b1b963James Dong 12130c1bc742181ded4930842b46e9507372f0b1b963James Dong 12140c1bc742181ded4930842b46e9507372f0b1b963James Dong 12150c1bc742181ded4930842b46e9507372f0b1b963James Dong MEND 12160c1bc742181ded4930842b46e9507372f0b1b963James Dong 12170c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Scale TWO input rows with TWO rows of 16 bit scale values 12180c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 12190c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// This macro is used by M_IDCT_PRESCALE16 to pre-scale one row 12200c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// input (Eight input values) with one row of scale values. Also 12210c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Loads next scale values from pScale, if $LastRow flag is not set. 12220c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 12230c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Input Registers: 12240c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 12250c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// $dAlo - Input D register with first four S16 values of row n 12260c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// $dAhi - Input D register with next four S16 values of row n 12270c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// $dBlo - Input D register with first four S16 values of row n+1 12280c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// $dBhi - Input D register with next four S16 values of row n+1 12290c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// pScale - Pointer to next row of scale values 12300c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// qT0lo - Temporary scratch register 12310c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// qT0hi - Temporary scratch register 12320c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// qT1lo - Temporary scratch register 12330c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// qT1hi - Temporary scratch register 12340c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// dScale1lo - Scale value of row n 12350c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// dScale1hi - Scale value of row n 12360c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// dScale2lo - Scale value of row n+1 12370c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// dScale2hi - Scale value of row n+1 12380c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 12390c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Input Flag 12400c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 12410c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// $LastRow - Flag to indicate whether current row is last row 12420c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 12430c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Output Registers: 12440c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 12450c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// $dAlo - Scaled output values (first four S16 of row n) 12460c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// $dAhi - Scaled output values (next four S16 of row n) 12470c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// $dBlo - Scaled output values (first four S16 of row n+1) 12480c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// $dBhi - Scaled output values (next four S16 of row n+1) 12490c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// qScale1 - Scale values for next row 12500c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// qScale2 - Scale values for next row+1 12510c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// pScale - Pointer to next row of scale values 12520c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 12530c1bc742181ded4930842b46e9507372f0b1b963James Dong MACRO 12540c1bc742181ded4930842b46e9507372f0b1b963James Dong M_IDCT_SCALE16 $dAlo, $dAhi, $dBlo, $dBhi, $LastRow 12550c1bc742181ded4930842b46e9507372f0b1b963James Dong VMULL qT0lo, $dAlo, dScale1lo 12560c1bc742181ded4930842b46e9507372f0b1b963James Dong VMULL qT0hi, $dAhi, dScale1hi 12570c1bc742181ded4930842b46e9507372f0b1b963James Dong VMULL qT1lo, $dBlo, dScale2lo 12580c1bc742181ded4930842b46e9507372f0b1b963James Dong VMULL qT1hi, $dBhi, dScale2hi 12590c1bc742181ded4930842b46e9507372f0b1b963James Dong IF "$LastRow"="0" 12600c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 qScale1, [pScale], #16 ;// Load scale for row n+1 12610c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 qScale2, [pScale], #16 ;// Load scale for row n+2 12620c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 12630c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRSHRN $dAlo, qT0lo, #12 12640c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRSHRN $dAhi, qT0hi, #12 12650c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRSHRN $dBlo, qT1lo, #12 12660c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRSHRN $dBhi, qT1hi, #12 12670c1bc742181ded4930842b46e9507372f0b1b963James Dong MEND 12680c1bc742181ded4930842b46e9507372f0b1b963James Dong 12690c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Scale 8x8 block input values with 16 bit scale values 12700c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 12710c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// This macro is used to pre-scale block of 8x8 input. 12720c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// This also do the Ist stage transformations of IDCT. 12730c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 12740c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Input Registers: 12750c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 12760c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// dXjnlo - n th input D register with first four S16 values 12770c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// dXjnhi - n th input D register with next four S16 values 12780c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// qXjn - n th input Q register with eight S16 values 12790c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// pScale - Pointer to scale values 12800c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 12810c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Output Registers: 12820c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 12830c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// qXin - n th output Q register with eight S16 output values of 1st stage 12840c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 12850c1bc742181ded4930842b46e9507372f0b1b963James Dong MACRO 12860c1bc742181ded4930842b46e9507372f0b1b963James Dong M_IDCT_PRESCALE16 12870c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 qScale1, [pScale], #16 ;// Load Pre scale for row 0 12880c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 qScale2, [pScale], #16 ;// Load Pre scale for row 0 12890c1bc742181ded4930842b46e9507372f0b1b963James Dong M_IDCT_SCALE16 dXj0lo, dXj0hi, dXj1lo, dXj1hi, 0 ;// Pre scale row 0 & 1 12900c1bc742181ded4930842b46e9507372f0b1b963James Dong M_IDCT_SCALE16 dXj2lo, dXj2hi, dXj3lo, dXj3hi, 0 12910c1bc742181ded4930842b46e9507372f0b1b963James Dong M_IDCT_SCALE16 dXj4lo, dXj4hi, dXj5lo, dXj5hi, 0 12920c1bc742181ded4930842b46e9507372f0b1b963James Dong M_IDCT_SCALE16 dXj6lo, dXj6hi, dXj7lo, dXj7hi, 1 12930c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qXi5, qXj1, qXj7 ;// (j1+j7)/2 12940c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qXi6, qXj1, qXj7 ;// j1-j7 12950c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR pSrc, =armCOMM_IDCTCoef ;// Address of DCT inverse AAN constants 12960c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qXi3, qXj2, qXj6 ;// (j2+j6)/2 12970c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qXi2, qXj2, qXj6 ;// j2-j6 12980c1bc742181ded4930842b46e9507372f0b1b963James Dong VLDR dCoefs, [pSrc] ;// Load DCT inverse AAN constants 12990c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qXi7, qXj5, qXj3 ;// (j5+j3)/2 13000c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qXi4, qXj5, qXj3 ;// j5-j3 13010c1bc742181ded4930842b46e9507372f0b1b963James Dong MEND 13020c1bc742181ded4930842b46e9507372f0b1b963James Dong 13030c1bc742181ded4930842b46e9507372f0b1b963James Dong 13040c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Scale 8x8 block input values with 32 bit scale values 13050c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 13060c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// This macro is used to pre-scale block of 8x8 input. 13070c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// This also do the Ist stage transformations of IDCT. 13080c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 13090c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Input Registers: 13100c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 13110c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// dXjnlo - n th input D register with first four S16 values 13120c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// dXjnhi - n th input D register with next four S16 values 13130c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// qXjn - n th input Q register with eight S16 values 13140c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// pScale - Pointer to 32bit scale values in Q23 format 13150c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 13160c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Output Registers: 13170c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 13180c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// dXinlo - n th output D register with first four S16 output values of 1st stage 13190c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// dXinhi - n th output D register with next four S16 output values of 1st stage 13200c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 13210c1bc742181ded4930842b46e9507372f0b1b963James Dong MACRO 13220c1bc742181ded4930842b46e9507372f0b1b963James Dong M_IDCT_PRESCALE32 13230c1bc742181ded4930842b46e9507372f0b1b963James DongqScale0lo QN 0.S32 13240c1bc742181ded4930842b46e9507372f0b1b963James DongqScale0hi QN 1.S32 13250c1bc742181ded4930842b46e9507372f0b1b963James DongqScale1lo QN 2.S32 13260c1bc742181ded4930842b46e9507372f0b1b963James DongqScale1hi QN 3.S32 13270c1bc742181ded4930842b46e9507372f0b1b963James DongqScale2lo QN qScale1lo 13280c1bc742181ded4930842b46e9507372f0b1b963James DongqScale2hi QN qScale1hi 13290c1bc742181ded4930842b46e9507372f0b1b963James DongqScale3lo QN qScale1lo 13300c1bc742181ded4930842b46e9507372f0b1b963James DongqScale3hi QN qScale1hi 13310c1bc742181ded4930842b46e9507372f0b1b963James DongqScale4lo QN qScale1lo 13320c1bc742181ded4930842b46e9507372f0b1b963James DongqScale4hi QN qScale1hi 13330c1bc742181ded4930842b46e9507372f0b1b963James DongqScale5lo QN qScale0lo 13340c1bc742181ded4930842b46e9507372f0b1b963James DongqScale5hi QN qScale0hi 13350c1bc742181ded4930842b46e9507372f0b1b963James DongqScale6lo QN qScale0lo 13360c1bc742181ded4930842b46e9507372f0b1b963James DongqScale6hi QN qScale0hi 13370c1bc742181ded4930842b46e9507372f0b1b963James DongqScale7lo QN qScale0lo 13380c1bc742181ded4930842b46e9507372f0b1b963James DongqScale7hi QN qScale0hi 13390c1bc742181ded4930842b46e9507372f0b1b963James Dong 13400c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc0lo QN 4.S32 13410c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc0hi QN 5.S32 13420c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc1lo QN 6.S32 13430c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc1hi QN Src4.S32 13440c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc2lo QN qSrc0lo 13450c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc2hi QN qSrc0hi 13460c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc3lo QN qSrc0lo 13470c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc3hi QN qSrc0hi 13480c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc4lo QN qSrc0lo 13490c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc4hi QN qSrc0hi 13500c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc5lo QN qSrc1lo 13510c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc5hi QN qSrc1hi 13520c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc6lo QN qSrc1lo 13530c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc6hi QN qSrc1hi 13540c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc7lo QN qSrc0lo 13550c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc7hi QN qSrc0hi 13560c1bc742181ded4930842b46e9507372f0b1b963James Dong 13570c1bc742181ded4930842b46e9507372f0b1b963James DongqRes17lo QN qScale0lo 13580c1bc742181ded4930842b46e9507372f0b1b963James DongqRes17hi QN qScale0hi 13590c1bc742181ded4930842b46e9507372f0b1b963James DongqRes26lo QN qScale0lo 13600c1bc742181ded4930842b46e9507372f0b1b963James DongqRes26hi QN qScale0hi 13610c1bc742181ded4930842b46e9507372f0b1b963James DongqRes53lo QN qScale0lo 13620c1bc742181ded4930842b46e9507372f0b1b963James DongqRes53hi QN qScale0hi 13630c1bc742181ded4930842b46e9507372f0b1b963James Dong 13640c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pTemp, pScale, #4*8*7 ;// Address of pScale[7] 13650c1bc742181ded4930842b46e9507372f0b1b963James Dong 13660c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Row 0 13670c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 {qScale0lo, qScale0hi}, [pScale]! 13680c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHLL qSrc0lo, dXj0lo, #(12-1) 13690c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHLL qSrc0hi, dXj0hi, #(12-1) 13700c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 {qScale1lo, qScale1hi}, [pScale]! 13710c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRDMULH qSrc0lo, qScale0lo, qSrc0lo 13720c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRDMULH qSrc0hi, qScale0hi, qSrc0hi 13730c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 {qScale7lo, qScale7hi}, [pTemp]! 13740c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHLL qSrc1lo, dXj1lo, #(12-1) 13750c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHLL qSrc1hi, dXj1hi, #(12-1) 13760c1bc742181ded4930842b46e9507372f0b1b963James Dong VMOVN dXi0lo, qSrc0lo ;// Output i0 13770c1bc742181ded4930842b46e9507372f0b1b963James Dong VMOVN dXi0hi, qSrc0hi 13780c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHLL qSrc7lo, dXj7lo, #(12-1) 13790c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHLL qSrc7hi, dXj7hi, #(12-1) 13800c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pTemp, pTemp, #((16*2)+(4*8*1)) 13810c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRDMULH qSrc1lo, qScale1lo, qSrc1lo 13820c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRDMULH qSrc1hi, qScale1hi, qSrc1hi 13830c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRDMULH qSrc7lo, qScale7lo, qSrc7lo 13840c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRDMULH qSrc7hi, qScale7hi, qSrc7hi 13850c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 {qScale2lo, qScale2hi}, [pScale]! 13860c1bc742181ded4930842b46e9507372f0b1b963James Dong 13870c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Row 1 & 7 13880c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qRes17lo, qSrc1lo, qSrc7lo ;// (j1+j7)/2 13890c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qRes17hi, qSrc1hi, qSrc7hi ;// (j1+j7)/2 13900c1bc742181ded4930842b46e9507372f0b1b963James Dong VMOVN dXi5lo, qRes17lo ;// Output i5 13910c1bc742181ded4930842b46e9507372f0b1b963James Dong VMOVN dXi5hi, qRes17hi 13920c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qRes17lo, qSrc1lo, qSrc7lo ;// j1-j7 13930c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qRes17hi, qSrc1hi, qSrc7hi ;// j1-j7 13940c1bc742181ded4930842b46e9507372f0b1b963James Dong VMOVN dXi6lo, qRes17lo ;// Output i6 13950c1bc742181ded4930842b46e9507372f0b1b963James Dong VMOVN dXi6hi, qRes17hi 13960c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHLL qSrc2lo, dXj2lo, #(12-1) 13970c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHLL qSrc2hi, dXj2hi, #(12-1) 13980c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 {qScale6lo, qScale6hi}, [pTemp]! 13990c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHLL qSrc6lo, dXj6lo, #(12-1) 14000c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHLL qSrc6hi, dXj6hi, #(12-1) 14010c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pTemp, pTemp, #((16*2)+(4*8*1)) 14020c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRDMULH qSrc2lo, qScale2lo, qSrc2lo 14030c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRDMULH qSrc2hi, qScale2hi, qSrc2hi 14040c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRDMULH qSrc6lo, qScale6lo, qSrc6lo 14050c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRDMULH qSrc6hi, qScale6hi, qSrc6hi 14060c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 {qScale3lo, qScale3hi}, [pScale]! 14070c1bc742181ded4930842b46e9507372f0b1b963James Dong 14080c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Row 2 & 6 14090c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qRes26lo, qSrc2lo, qSrc6lo ;// (j2+j6)/2 14100c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qRes26hi, qSrc2hi, qSrc6hi ;// (j2+j6)/2 14110c1bc742181ded4930842b46e9507372f0b1b963James Dong VMOVN dXi3lo, qRes26lo ;// Output i3 14120c1bc742181ded4930842b46e9507372f0b1b963James Dong VMOVN dXi3hi, qRes26hi 14130c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qRes26lo, qSrc2lo, qSrc6lo ;// j2-j6 14140c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qRes26hi, qSrc2hi, qSrc6hi ;// j2-j6 14150c1bc742181ded4930842b46e9507372f0b1b963James Dong VMOVN dXi2lo, qRes26lo ;// Output i2 14160c1bc742181ded4930842b46e9507372f0b1b963James Dong VMOVN dXi2hi, qRes26hi 14170c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHLL qSrc3lo, dXj3lo, #(12-1) 14180c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHLL qSrc3hi, dXj3hi, #(12-1) 14190c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 {qScale5lo, qScale5hi}, [pTemp]! 14200c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHLL qSrc5lo, dXj5lo, #(12-1) 14210c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHLL qSrc5hi, dXj5hi, #(12-1) 14220c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRDMULH qSrc3lo, qScale3lo, qSrc3lo 14230c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRDMULH qSrc3hi, qScale3hi, qSrc3hi 14240c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRDMULH qSrc5lo, qScale5lo, qSrc5lo 14250c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRDMULH qSrc5hi, qScale5hi, qSrc5hi 14260c1bc742181ded4930842b46e9507372f0b1b963James Dong 14270c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Row 3 & 5 14280c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qRes53lo, qSrc5lo, qSrc3lo ;// (j5+j3)/2 14290c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qRes53hi, qSrc5hi, qSrc3hi ;// (j5+j3)/2 14300c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, #16*2*2 14310c1bc742181ded4930842b46e9507372f0b1b963James Dong VMOVN dXi7lo, qRes53lo ;// Output i7 14320c1bc742181ded4930842b46e9507372f0b1b963James Dong VMOVN dXi7hi, qRes53hi 14330c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qRes53lo, qSrc5lo, qSrc3lo ;// j5-j3 14340c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qRes53hi, qSrc5hi, qSrc3hi ;// j5-j3 14350c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 qXj4, [pSrc @64] 14360c1bc742181ded4930842b46e9507372f0b1b963James Dong VMOVN dXi4lo, qRes53lo ;// Output i4 14370c1bc742181ded4930842b46e9507372f0b1b963James Dong VMOVN dXi4hi, qRes53hi 14380c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHLL qSrc4lo, dXj4lo, #(12-1) 14390c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHLL qSrc4hi, dXj4hi, #(12-1) 14400c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 {qScale4lo, qScale4hi}, [pScale] 14410c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR pSrc, =armCOMM_IDCTCoef ;// Address of DCT inverse AAN constants 14420c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRDMULH qSrc4lo, qScale4lo, qSrc4lo 14430c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRDMULH qSrc4hi, qScale4hi, qSrc4hi 14440c1bc742181ded4930842b46e9507372f0b1b963James Dong VLDR dCoefs, [pSrc] ;// Load DCT inverse AAN constants 14450c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Row 4 14460c1bc742181ded4930842b46e9507372f0b1b963James Dong VMOVN dXi1lo, qSrc4lo ;// Output i1 14470c1bc742181ded4930842b46e9507372f0b1b963James Dong VMOVN dXi1hi, qSrc4hi 14480c1bc742181ded4930842b46e9507372f0b1b963James Dong 14490c1bc742181ded4930842b46e9507372f0b1b963James Dong MEND 14500c1bc742181ded4930842b46e9507372f0b1b963James Dong 14510c1bc742181ded4930842b46e9507372f0b1b963James Dong END 1452