10c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Copyright (C) 2004 ARM Limited 378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Licensed under the Apache License, Version 2.0 (the "License"); 578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// you may not use this file except in compliance with the License. 678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// You may obtain a copy of the License at 778e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 878e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// http://www.apache.org/licenses/LICENSE-2.0 978e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 1078e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Unless required by applicable law or agreed to in writing, software 1178e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// distributed under the License is distributed on an "AS IS" BASIS, 1278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// See the License for the specific language governing permissions and 1478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// limitations under the License. 1578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 1678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// 170c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 180c1bc742181ded4930842b46e9507372f0b1b963James Dong;// IDCT_s.s 190c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 200c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Inverse DCT module 210c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 220c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 230c1bc742181ded4930842b46e9507372f0b1b963James Dong;// ALGORITHM DESCRIPTION 240c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 250c1bc742181ded4930842b46e9507372f0b1b963James Dong;// The 8x8 2D IDCT is performed by calculating a 1D IDCT for each 260c1bc742181ded4930842b46e9507372f0b1b963James Dong;// column and then a 1D IDCT for each row. 270c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 280c1bc742181ded4930842b46e9507372f0b1b963James Dong;// The 8-point 1D IDCT is defined by 290c1bc742181ded4930842b46e9507372f0b1b963James Dong;// f(x) = (C(0)*T(0)*c(0,x) + ... + C(7)*T(7)*c(7,x))/2 300c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 310c1bc742181ded4930842b46e9507372f0b1b963James Dong;// C(u) = 1/sqrt(2) if u=0 or 1 if u!=0 320c1bc742181ded4930842b46e9507372f0b1b963James Dong;// c(u,x) = cos( (2x+1)*u*pi/16 ) 330c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 340c1bc742181ded4930842b46e9507372f0b1b963James Dong;// We compute the 8-point 1D IDCT using the reverse of 350c1bc742181ded4930842b46e9507372f0b1b963James Dong;// the Arai-Agui-Nakajima flow graph which we split into 360c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 5 stages named in reverse order to identify with the 370c1bc742181ded4930842b46e9507372f0b1b963James Dong;// forward DCT. Direct inversion of the forward formulae 380c1bc742181ded4930842b46e9507372f0b1b963James Dong;// in file FDCT_s.s gives: 390c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 400c1bc742181ded4930842b46e9507372f0b1b963James Dong;// IStage 5: j(u) = T(u)*A(u) [ A(u)=4*C(u)*c(u,0) ] 410c1bc742181ded4930842b46e9507372f0b1b963James Dong;// [ A(0) = 2*sqrt(2) 420c1bc742181ded4930842b46e9507372f0b1b963James Dong;// A(u) = 4*cos(u*pi/16) for (u!=0) ] 430c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 440c1bc742181ded4930842b46e9507372f0b1b963James Dong;// IStage 4: i0 = j0 i1 = j4 450c1bc742181ded4930842b46e9507372f0b1b963James Dong;// i3 = (j2+j6)/2 i2 = (j2-j6)/2 460c1bc742181ded4930842b46e9507372f0b1b963James Dong;// i7 = (j5+j3)/2 i4 = (j5-j3)/2 470c1bc742181ded4930842b46e9507372f0b1b963James Dong;// i5 = (j1+j7)/2 i6 = (j1-j7)/2 480c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 490c1bc742181ded4930842b46e9507372f0b1b963James Dong;// IStage 3: h0 = (i0+i1)/2 h1 = (i0-i1)/2 500c1bc742181ded4930842b46e9507372f0b1b963James Dong;// h2 = (i2*sqrt2)-i3 h3 = i3 510c1bc742181ded4930842b46e9507372f0b1b963James Dong;// h4 = cos(pi/8)*i4 + sin(pi/8)*i6 520c1bc742181ded4930842b46e9507372f0b1b963James Dong;// h6 = -sin(pi/8)*i4 + cos(pi/8)*i6 530c1bc742181ded4930842b46e9507372f0b1b963James Dong;// [ The above two lines rotate by -(pi/8) ] 540c1bc742181ded4930842b46e9507372f0b1b963James Dong;// h5 = (i5-i7)/sqrt2 h7 = (i5+i7)/2 550c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 560c1bc742181ded4930842b46e9507372f0b1b963James Dong;// IStage 2: g0 = (h0+h3)/2 g3 = (h0-h3)/2 570c1bc742181ded4930842b46e9507372f0b1b963James Dong;// g1 = (h1+h2)/2 g2 = (h1-h2)/2 580c1bc742181ded4930842b46e9507372f0b1b963James Dong;// g7 = h7 g6 = h6 - h7 590c1bc742181ded4930842b46e9507372f0b1b963James Dong;// g5 = h5 - g6 g4 = h4 - g5 600c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 610c1bc742181ded4930842b46e9507372f0b1b963James Dong;// IStage 1: f0 = (g0+g7)/2 f7 = (g0-g7)/2 620c1bc742181ded4930842b46e9507372f0b1b963James Dong;// f1 = (g1+g6)/2 f6 = (g1-g6)/2 630c1bc742181ded4930842b46e9507372f0b1b963James Dong;// f2 = (g2+g5)/2 f5 = (g2-g5)/2 640c1bc742181ded4930842b46e9507372f0b1b963James Dong;// f3 = (g3+g4)/2 f4 = (g3-g4)/2 650c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 660c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Note that most coefficients are halved 3 times during the 670c1bc742181ded4930842b46e9507372f0b1b963James Dong;// above calculation. We can rescale the algorithm dividing 680c1bc742181ded4930842b46e9507372f0b1b963James Dong;// the input by 8 to remove the halvings. 690c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 700c1bc742181ded4930842b46e9507372f0b1b963James Dong;// IStage 5: j(u) = T(u)*A(u)/8 710c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 720c1bc742181ded4930842b46e9507372f0b1b963James Dong;// IStage 4: i0 = j0 i1 = j4 730c1bc742181ded4930842b46e9507372f0b1b963James Dong;// i3 = j2 + j6 i2 = j2 - j6 740c1bc742181ded4930842b46e9507372f0b1b963James Dong;// i7 = j5 + j3 i4 = j5 - j3 750c1bc742181ded4930842b46e9507372f0b1b963James Dong;// i5 = j1 + j7 i6 = j1 - j7 760c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 770c1bc742181ded4930842b46e9507372f0b1b963James Dong;// IStage 3: h0 = i0 + i1 h1 = i0 - i1 780c1bc742181ded4930842b46e9507372f0b1b963James Dong;// h2 = (i2*sqrt2)-i3 h3 = i3 790c1bc742181ded4930842b46e9507372f0b1b963James Dong;// h4 = 2*( cos(pi/8)*i4 + sin(pi/8)*i6) 800c1bc742181ded4930842b46e9507372f0b1b963James Dong;// h6 = 2*(-sin(pi/8)*i4 + cos(pi/8)*i6) 810c1bc742181ded4930842b46e9507372f0b1b963James Dong;// h5 = (i5-i7)*sqrt2 h7 = i5 + i7 820c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 830c1bc742181ded4930842b46e9507372f0b1b963James Dong;// IStage 2: g0 = h0 + h3 g3 = h0 - h3 840c1bc742181ded4930842b46e9507372f0b1b963James Dong;// g1 = h1 + h2 g2 = h1 - h2 850c1bc742181ded4930842b46e9507372f0b1b963James Dong;// g7 = h7 g6 = h6 - h7 860c1bc742181ded4930842b46e9507372f0b1b963James Dong;// g5 = h5 - g6 g4 = h4 - g5 870c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 880c1bc742181ded4930842b46e9507372f0b1b963James Dong;// IStage 1: f0 = g0 + g7 f7 = g0 - g7 890c1bc742181ded4930842b46e9507372f0b1b963James Dong;// f1 = g1 + g6 f6 = g1 - g6 900c1bc742181ded4930842b46e9507372f0b1b963James Dong;// f2 = g2 + g5 f5 = g2 - g5 910c1bc742181ded4930842b46e9507372f0b1b963James Dong;// f3 = g3 + g4 f4 = g3 - g4 920c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 930c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Note: 940c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 1. The scaling by A(u)/8 can often be combined with inverse 950c1bc742181ded4930842b46e9507372f0b1b963James Dong;// quantization. The column and row scalings can be combined. 960c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 2. The flowgraph in the AAN paper has h4,g6 negated compared 970c1bc742181ded4930842b46e9507372f0b1b963James Dong;// to the above code but is otherwise identical. 980c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 3. The rotation by -pi/8 can be peformed using three multiplies 990c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Eg c*i4+s*i6 = (i6-i4)*s + (c+s)*i4 1000c1bc742181ded4930842b46e9507372f0b1b963James Dong;// -s*i4+c*i6 = (i6-i4)*s + (c-s)*i6 1010c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 4. If |T(u)|<=1 then from the IDCT definition, 1020c1bc742181ded4930842b46e9507372f0b1b963James Dong;// |f(x)| <= ((1/sqrt2) + |c(1,x)| + .. + |c(7,x)|)/2 1030c1bc742181ded4930842b46e9507372f0b1b963James Dong;// = ((1/sqrt2) + cos(pi/16) + ... + cos(7*pi/16))/2 1040c1bc742181ded4930842b46e9507372f0b1b963James Dong;// = ((1/sqrt2) + (cot(pi/32)-1)/2)/2 1050c1bc742181ded4930842b46e9507372f0b1b963James Dong;// = (1 + cos(pi/16) + cos(2pi/16) + cos(3pi/16))/sqrt(2) 1060c1bc742181ded4930842b46e9507372f0b1b963James Dong;// = (approx)2.64 1070c1bc742181ded4930842b46e9507372f0b1b963James Dong;// So the max gain of the 2D IDCT is ~x7.0 = 3 bits. 1080c1bc742181ded4930842b46e9507372f0b1b963James Dong;// The table below shows input patterns generating the maximum 1090c1bc742181ded4930842b46e9507372f0b1b963James Dong;// value of |f(u)| for input in the range |T(x)|<=1. M=-1, P=+1 1100c1bc742181ded4930842b46e9507372f0b1b963James Dong;// InputPattern Max |f(x)| 1110c1bc742181ded4930842b46e9507372f0b1b963James Dong;// PPPPPPPP |f0| = 2.64 1120c1bc742181ded4930842b46e9507372f0b1b963James Dong;// PPPMMMMM |f1| = 2.64 1130c1bc742181ded4930842b46e9507372f0b1b963James Dong;// PPMMMPPP |f2| = 2.64 1140c1bc742181ded4930842b46e9507372f0b1b963James Dong;// PPMMPPMM |f3| = 2.64 1150c1bc742181ded4930842b46e9507372f0b1b963James Dong;// PMMPPMMP |f4| = 2.64 1160c1bc742181ded4930842b46e9507372f0b1b963James Dong;// PMMPMMPM |f5| = 2.64 1170c1bc742181ded4930842b46e9507372f0b1b963James Dong;// PMPPMPMP |f6| = 2.64 1180c1bc742181ded4930842b46e9507372f0b1b963James Dong;// PMPMPMPM |f7| = 2.64 1190c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Note that this input pattern is the transpose of the 1200c1bc742181ded4930842b46e9507372f0b1b963James Dong;// corresponding max input patter for the FDCT. 1210c1bc742181ded4930842b46e9507372f0b1b963James Dong 1220c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Arguments 1230c1bc742181ded4930842b46e9507372f0b1b963James Dong 1240c1bc742181ded4930842b46e9507372f0b1b963James DongpSrc RN 0 ;// source data buffer 1250c1bc742181ded4930842b46e9507372f0b1b963James DongStride RN 1 ;// destination stride in bytes 1260c1bc742181ded4930842b46e9507372f0b1b963James DongpDest RN 2 ;// destination data buffer 1270c1bc742181ded4930842b46e9507372f0b1b963James DongpScale RN 3 ;// pointer to scaling table 1280c1bc742181ded4930842b46e9507372f0b1b963James Dong 1290c1bc742181ded4930842b46e9507372f0b1b963James Dong 1300c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// DCT Inverse Macro 1310c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// The DCT code should be parametrized according 1320c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// to the following inputs: 1330c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// $outsize = "u8" : 8-bit unsigned data saturated (0 to +255) 1340c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// "s9" : 16-bit signed data saturated to 9-bit (-256 to +255) 1350c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// "s16" : 16-bit signed data not saturated (max size ~+/-14273) 1360c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// $inscale = "s16" : signed 16-bit aan-scale table, Q15 format, with 4 byte alignment 1370c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// "s32" : signed 32-bit aan-scale table, Q23 format, with 4 byte alignment 1380c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 1390c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Inputs: 1400c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// pSrc = r0 = Pointer to input data 1410c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Range is -256 to +255 (9-bit) 1420c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Stride = r1 = Stride between input lines 1430c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// pDest = r2 = Pointer to output data 1440c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// pScale = r3 = Pointer to aan-scale table in the format defined by $inscale 1450c1bc742181ded4930842b46e9507372f0b1b963James Dong 1460c1bc742181ded4930842b46e9507372f0b1b963James Dong 1470c1bc742181ded4930842b46e9507372f0b1b963James Dong 1480c1bc742181ded4930842b46e9507372f0b1b963James Dong MACRO 1490c1bc742181ded4930842b46e9507372f0b1b963James Dong M_IDCT $outsize, $inscale, $stride 1500c1bc742181ded4930842b46e9507372f0b1b963James Dong LCLA SHIFT 1510c1bc742181ded4930842b46e9507372f0b1b963James Dong 1520c1bc742181ded4930842b46e9507372f0b1b963James Dong 1530c1bc742181ded4930842b46e9507372f0b1b963James Dong IF ARM1136JS 1540c1bc742181ded4930842b46e9507372f0b1b963James Dong 1550c1bc742181ded4930842b46e9507372f0b1b963James Dong;// REGISTER ALLOCATION 1560c1bc742181ded4930842b46e9507372f0b1b963James Dong;// This is hard since we have 8 values, 9 free registers and each 1570c1bc742181ded4930842b46e9507372f0b1b963James Dong;// butterfly requires a temporary register. We also want to 1580c1bc742181ded4930842b46e9507372f0b1b963James Dong;// maintain register order so we can use LDM/STM. The table below 1590c1bc742181ded4930842b46e9507372f0b1b963James Dong;// summarises the register allocation that meets all these criteria. 1600c1bc742181ded4930842b46e9507372f0b1b963James Dong;// a=1stcol, b=2ndcol, f,g,h,i are dataflow points described above. 1610c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 1620c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r1 a01 g0 h0 1630c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r4 b01 f0 g1 h1 i0 1640c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r5 a23 f1 g2 i1 1650c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r6 b23 f2 g3 h2 i2 1660c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r7 a45 f3 h3 i3 1670c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r8 b45 f4 g4 h4 i4 1680c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r9 a67 f5 g5 h5 i5 1690c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r10 b67 f6 g6 h6 i6 1700c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r11 f7 g7 h7 i7 1710c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 1720c1bc742181ded4930842b46e9507372f0b1b963James Dongra01 RN 1 1730c1bc742181ded4930842b46e9507372f0b1b963James Dongrb01 RN 4 1740c1bc742181ded4930842b46e9507372f0b1b963James Dongra23 RN 5 1750c1bc742181ded4930842b46e9507372f0b1b963James Dongrb23 RN 6 1760c1bc742181ded4930842b46e9507372f0b1b963James Dongra45 RN 7 1770c1bc742181ded4930842b46e9507372f0b1b963James Dongrb45 RN 8 1780c1bc742181ded4930842b46e9507372f0b1b963James Dongra67 RN 9 1790c1bc742181ded4930842b46e9507372f0b1b963James Dongrb67 RN 10 1800c1bc742181ded4930842b46e9507372f0b1b963James Dongrtmp RN 11 1810c1bc742181ded4930842b46e9507372f0b1b963James DongcsPiBy8 RN 12 ;// [ (Sin(pi/8)@Q15), (Cos(pi/8)@Q15) ] 1820c1bc742181ded4930842b46e9507372f0b1b963James DongLoopRR2 RN 14 ;// [ LoopNumber<<13 , (1/Sqrt(2))@Q15 ] 1830c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Transpose allocation 1840c1bc742181ded4930842b46e9507372f0b1b963James Dongxft RN ra01 1850c1bc742181ded4930842b46e9507372f0b1b963James Dongxf0 RN rb01 1860c1bc742181ded4930842b46e9507372f0b1b963James Dongxf1 RN ra23 1870c1bc742181ded4930842b46e9507372f0b1b963James Dongxf2 RN rb23 1880c1bc742181ded4930842b46e9507372f0b1b963James Dongxf3 RN ra45 1890c1bc742181ded4930842b46e9507372f0b1b963James Dongxf4 RN rb45 1900c1bc742181ded4930842b46e9507372f0b1b963James Dongxf5 RN ra67 1910c1bc742181ded4930842b46e9507372f0b1b963James Dongxf6 RN rb67 1920c1bc742181ded4930842b46e9507372f0b1b963James Dongxf7 RN rtmp 1930c1bc742181ded4930842b46e9507372f0b1b963James Dong;// IStage 1 allocation 1940c1bc742181ded4930842b46e9507372f0b1b963James Dongxg0 RN xft 1950c1bc742181ded4930842b46e9507372f0b1b963James Dongxg1 RN xf0 1960c1bc742181ded4930842b46e9507372f0b1b963James Dongxg2 RN xf1 1970c1bc742181ded4930842b46e9507372f0b1b963James Dongxg3 RN xf2 1980c1bc742181ded4930842b46e9507372f0b1b963James Dongxgt RN xf3 1990c1bc742181ded4930842b46e9507372f0b1b963James Dongxg4 RN xf4 2000c1bc742181ded4930842b46e9507372f0b1b963James Dongxg5 RN xf5 2010c1bc742181ded4930842b46e9507372f0b1b963James Dongxg6 RN xf6 2020c1bc742181ded4930842b46e9507372f0b1b963James Dongxg7 RN xf7 2030c1bc742181ded4930842b46e9507372f0b1b963James Dong;// IStage 2 allocation 2040c1bc742181ded4930842b46e9507372f0b1b963James Dongxh0 RN xg0 2050c1bc742181ded4930842b46e9507372f0b1b963James Dongxh1 RN xg1 2060c1bc742181ded4930842b46e9507372f0b1b963James Dongxht RN xg2 2070c1bc742181ded4930842b46e9507372f0b1b963James Dongxh2 RN xg3 2080c1bc742181ded4930842b46e9507372f0b1b963James Dongxh3 RN xgt 2090c1bc742181ded4930842b46e9507372f0b1b963James Dongxh4 RN xg4 2100c1bc742181ded4930842b46e9507372f0b1b963James Dongxh5 RN xg5 2110c1bc742181ded4930842b46e9507372f0b1b963James Dongxh6 RN xg6 2120c1bc742181ded4930842b46e9507372f0b1b963James Dongxh7 RN xg7 2130c1bc742181ded4930842b46e9507372f0b1b963James Dong;// IStage 3,4 allocation 2140c1bc742181ded4930842b46e9507372f0b1b963James Dongxit RN xh0 2150c1bc742181ded4930842b46e9507372f0b1b963James Dongxi0 RN xh1 2160c1bc742181ded4930842b46e9507372f0b1b963James Dongxi1 RN xht 2170c1bc742181ded4930842b46e9507372f0b1b963James Dongxi2 RN xh2 2180c1bc742181ded4930842b46e9507372f0b1b963James Dongxi3 RN xh3 2190c1bc742181ded4930842b46e9507372f0b1b963James Dongxi4 RN xh4 2200c1bc742181ded4930842b46e9507372f0b1b963James Dongxi5 RN xh5 2210c1bc742181ded4930842b46e9507372f0b1b963James Dongxi6 RN xh6 2220c1bc742181ded4930842b46e9507372f0b1b963James Dongxi7 RN xh7 2230c1bc742181ded4930842b46e9507372f0b1b963James Dong 2240c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR pDest, ppDest 2250c1bc742181ded4930842b46e9507372f0b1b963James Dong IF "$stride"="s" 2260c1bc742181ded4930842b46e9507372f0b1b963James Dong M_STR Stride, pStride 2270c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 2280c1bc742181ded4930842b46e9507372f0b1b963James Dong M_ADR pDest, pBlk 2290c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR csPiBy8, =0x30fc7642 2300c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR LoopRR2, =0x00005a82 2310c1bc742181ded4930842b46e9507372f0b1b963James Dong 2320c1bc742181ded4930842b46e9507372f0b1b963James Dongv6_idct_col$_F 2330c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Load even values 2340c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi4, [pSrc], #4 ;// j0 2350c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi5, [pSrc, #4*16-4] ;// j4 2360c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi6, [pSrc, #2*16-4] ;// j2 2370c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi7, [pSrc, #6*16-4] ;// j6 2380c1bc742181ded4930842b46e9507372f0b1b963James Dong 2390c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Scale Even Values 2400c1bc742181ded4930842b46e9507372f0b1b963James Dong IF "$inscale"="s16" ;// 16x16 mul 2410c1bc742181ded4930842b46e9507372f0b1b963James DongSHIFT SETA 12 2420c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi0, [pScale], #4 2430c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi1, [pScale, #4*16-4] 2440c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi2, [pScale, #2*16-4] 2450c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xit, #1<<(SHIFT-1) 2460c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB xi3, xi0, xi4, xit 2470c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATT xi4, xi0, xi4, xit 2480c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB xi0, xi1, xi5, xit 2490c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATT xi5, xi1, xi5, xit 2500c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xi3, xi3, ASR #SHIFT 2510c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT xi4, xi3, xi4, LSL #(16-SHIFT) 2520c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi3, [pScale, #6*16-4] 2530c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB xi1, xi2, xi6, xit 2540c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATT xi6, xi2, xi6, xit 2550c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xi0, xi0, ASR #SHIFT 2560c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT xi5, xi0, xi5, LSL #(16-SHIFT) 2570c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB xi2, xi3, xi7, xit 2580c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATT xi7, xi3, xi7, xit 2590c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xi1, xi1, ASR #SHIFT 2600c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT xi6, xi1, xi6, LSL #(16-SHIFT) 2610c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xi2, xi2, ASR #SHIFT 2620c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT xi7, xi2, xi7, LSL #(16-SHIFT) 2630c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 2640c1bc742181ded4930842b46e9507372f0b1b963James Dong IF "$inscale"="s32" ;// 32x16 mul 2650c1bc742181ded4930842b46e9507372f0b1b963James DongSHIFT SETA (12+8-16) 2660c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xit, #1<<(SHIFT-1) 2670c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi0, [pScale], #8 2680c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi1, [pScale, #0*32+4-8] 2690c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi2, [pScale, #4*32-8] 2700c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi3, [pScale, #4*32+4-8] 2710c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAWB xi0, xi0, xi4, xit 2720c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAWT xi1, xi1, xi4, xit 2730c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAWB xi2, xi2, xi5, xit 2740c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAWT xi3, xi3, xi5, xit 2750c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xi0, xi0, ASR #SHIFT 2760c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT xi4, xi0, xi1, LSL #(16-SHIFT) 2770c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xi2, xi2, ASR #SHIFT 2780c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT xi5, xi2, xi3, LSL #(16-SHIFT) 2790c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi0, [pScale, #2*32-8] 2800c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi1, [pScale, #2*32+4-8] 2810c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi2, [pScale, #6*32-8] 2820c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi3, [pScale, #6*32+4-8] 2830c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAWB xi0, xi0, xi6, xit 2840c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAWT xi1, xi1, xi6, xit 2850c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAWB xi2, xi2, xi7, xit 2860c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAWT xi3, xi3, xi7, xit 2870c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xi0, xi0, ASR #SHIFT 2880c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT xi6, xi0, xi1, LSL #(16-SHIFT) 2890c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xi2, xi2, ASR #SHIFT 2900c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT xi7, xi2, xi3, LSL #(16-SHIFT) 2910c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 2920c1bc742181ded4930842b46e9507372f0b1b963James Dong 2930c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Load odd values 2940c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi0, [pSrc, #1*16-4] ;// j1 2950c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi1, [pSrc, #7*16-4] ;// j7 2960c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi2, [pSrc, #5*16-4] ;// j5 2970c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi3, [pSrc, #3*16-4] ;// j3 2980c1bc742181ded4930842b46e9507372f0b1b963James Dong 2990c1bc742181ded4930842b46e9507372f0b1b963James Dong IF {TRUE} 3000c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// shortcut if odd values 0 3010c1bc742181ded4930842b46e9507372f0b1b963James Dong TEQ xi0, #0 3020c1bc742181ded4930842b46e9507372f0b1b963James Dong TEQEQ xi1, #0 3030c1bc742181ded4930842b46e9507372f0b1b963James Dong TEQEQ xi2, #0 3040c1bc742181ded4930842b46e9507372f0b1b963James Dong TEQEQ xi3, #0 3050c1bc742181ded4930842b46e9507372f0b1b963James Dong BEQ v6OddZero$_F 3060c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 3070c1bc742181ded4930842b46e9507372f0b1b963James Dong 3080c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Store scaled even values 3090c1bc742181ded4930842b46e9507372f0b1b963James Dong STMIA pDest, {xi4, xi5, xi6, xi7} 3100c1bc742181ded4930842b46e9507372f0b1b963James Dong 3110c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Scale odd values 3120c1bc742181ded4930842b46e9507372f0b1b963James Dong IF "$inscale"="s16" 3130c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Perform AAN Scale 3140c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi4, [pScale, #1*16-4] 3150c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi5, [pScale, #7*16-4] 3160c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi6, [pScale, #5*16-4] 3170c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB xi7, xi0, xi4, xit 3180c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATT xi0, xi0, xi4, xit 3190c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB xi4, xi1, xi5, xit 3200c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATT xi1, xi1, xi5, xit 3210c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xi7, xi7, ASR #SHIFT 3220c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT xi0, xi7, xi0, LSL #(16-SHIFT) 3230c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi7, [pScale, #3*16-4] 3240c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB xi5, xi2, xi6, xit 3250c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATT xi2, xi2, xi6, xit 3260c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xi4, xi4, ASR #SHIFT 3270c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT xi1, xi4, xi1, LSL #(16-SHIFT) 3280c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB xi6, xi3, xi7, xit 3290c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATT xi3, xi3, xi7, xit 3300c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xi5, xi5, ASR #SHIFT 3310c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT xi2, xi5, xi2, LSL #(16-SHIFT) 3320c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xi6, xi6, ASR #SHIFT 3330c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT xi3, xi6, xi3, LSL #(16-SHIFT) 3340c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 3350c1bc742181ded4930842b46e9507372f0b1b963James Dong IF "$inscale"="s32" ;// 32x16 mul 3360c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi4, [pScale, #1*32-8] 3370c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi5, [pScale, #1*32+4-8] 3380c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi6, [pScale, #7*32-8] 3390c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi7, [pScale, #7*32+4-8] 3400c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAWB xi4, xi4, xi0, xit 3410c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAWT xi5, xi5, xi0, xit 3420c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAWB xi6, xi6, xi1, xit 3430c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAWT xi7, xi7, xi1, xit 3440c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xi4, xi4, ASR #SHIFT 3450c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT xi0, xi4, xi5, LSL #(16-SHIFT) 3460c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xi6, xi6, ASR #SHIFT 3470c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT xi1, xi6, xi7, LSL #(16-SHIFT) 3480c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi4, [pScale, #5*32-8] 3490c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi5, [pScale, #5*32+4-8] 3500c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi6, [pScale, #3*32-8] 3510c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi7, [pScale, #3*32+4-8] 3520c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAWB xi4, xi4, xi2, xit 3530c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAWT xi5, xi5, xi2, xit 3540c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAWB xi6, xi6, xi3, xit 3550c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAWT xi7, xi7, xi3, xit 3560c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xi4, xi4, ASR #SHIFT 3570c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT xi2, xi4, xi5, LSL #(16-SHIFT) 3580c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xi6, xi6, ASR #SHIFT 3590c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT xi3, xi6, xi7, LSL #(16-SHIFT) 3600c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 3610c1bc742181ded4930842b46e9507372f0b1b963James Dong 3620c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xit, =0x00010001 ;// rounding constant 3630c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 xi5, xi0, xi1 ;// (j1+j7)/2 3640c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xi5, xi5, xit 3650c1bc742181ded4930842b46e9507372f0b1b963James Dong 3660c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 xi6, xi0, xi1 ;// j1-j7 3670c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 xi7, xi2, xi3 ;// (j5+j3)/2 3680c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xi7, xi7, xit 3690c1bc742181ded4930842b46e9507372f0b1b963James Dong 3700c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 xi4, xi2, xi3 ;// j5-j3 3710c1bc742181ded4930842b46e9507372f0b1b963James Dong 3720c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 xi3, xi5, xi7 ;// (i5-i7)/2 3730c1bc742181ded4930842b46e9507372f0b1b963James Dong 3740c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT xi0, xi6, xi4, LSL#16 ;// [i4,i6] row a 3750c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB xi1, xi4, xi6, ASR#16 ;// [i4,i6] row b 3760c1bc742181ded4930842b46e9507372f0b1b963James Dong 3770c1bc742181ded4930842b46e9507372f0b1b963James Dong SMUADX xi2, xi0, csPiBy8 ;// rowa by [c,s] 3780c1bc742181ded4930842b46e9507372f0b1b963James Dong SMUADX xi4, xi1, csPiBy8 ;// rowb by [c,s] 3790c1bc742181ded4930842b46e9507372f0b1b963James Dong SMUSD xi0, xi0, csPiBy8 ;// rowa by [-s,c] 3800c1bc742181ded4930842b46e9507372f0b1b963James Dong SMUSD xi6, xi1, csPiBy8 ;// rowb by [-s,c] 3810c1bc742181ded4930842b46e9507372f0b1b963James Dong 3820c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULBB xi1, xi3, LoopRR2 3830c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULTB xi3, xi3, LoopRR2 3840c1bc742181ded4930842b46e9507372f0b1b963James Dong 3850c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB xh4, xi4, xi2, ASR#16 ;// h4/4 3860c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB xh6, xi6, xi0, ASR#16 ;// h6/4 3870c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xh7, xi5, xi7 ;// (i5+i7)/4 3880c1bc742181ded4930842b46e9507372f0b1b963James Dong 3890c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// xi0,xi1,xi2,xi3 now free 3900c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// IStage 4,3, rows 2to3 x1/2 3910c1bc742181ded4930842b46e9507372f0b1b963James Dong 3920c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xi3, xi3, LSL #1 3930c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB xh5, xi3, xi1, ASR#15 ;// h5/4 3940c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRD xi0, [pDest, #8] ;// j2,j6 scaled 3950c1bc742181ded4930842b46e9507372f0b1b963James Dong 3960c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// IStage 2, rows4to7 3970c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 xg6, xh6, xh7 3980c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 xg5, xh5, xg6 3990c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 xg4, xh4, xg5 4000c1bc742181ded4930842b46e9507372f0b1b963James Dong 4010c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 xi2, xi0, xi1 ;// (j2-j6) 4020c1bc742181ded4930842b46e9507372f0b1b963James Dong 4030c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xi3, xi0, xi1 ;// (j2+j6)/2 4040c1bc742181ded4930842b46e9507372f0b1b963James Dong 4050c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULBB xi0, xi2, LoopRR2 4060c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULTB xi2, xi2, LoopRR2 4070c1bc742181ded4930842b46e9507372f0b1b963James Dong 4080c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xi2, xi2, LSL #1 4090c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB xh2, xi2, xi0, ASR#15 ;// i2*sqrt(2)/4 4100c1bc742181ded4930842b46e9507372f0b1b963James Dong 4110c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// xi0, xi1 now free 4120c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// IStage 4,3 rows 0to1 x 1/2 4130c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRD xi0, [pDest] ;// j0, j4 scaled 4140c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 xh2, xh2, xi3 4150c1bc742181ded4930842b46e9507372f0b1b963James Dong ADDS LoopRR2, LoopRR2, #2<<29 ;// done two rows 4160c1bc742181ded4930842b46e9507372f0b1b963James Dong 4170c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xh0, xi0, xi1 4180c1bc742181ded4930842b46e9507372f0b1b963James Dong SHSUB16 xh1, xi0, xi1 4190c1bc742181ded4930842b46e9507372f0b1b963James Dong 4200c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// IStage 2 rows 0to3 x 1/2 4210c1bc742181ded4930842b46e9507372f0b1b963James Dong SHSUB16 xg2, xh1, xh2 4220c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xg1, xh1, xh2 4230c1bc742181ded4930842b46e9507372f0b1b963James Dong SHSUB16 xg3, xh0, xh3 4240c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xg0, xh0, xh3 4250c1bc742181ded4930842b46e9507372f0b1b963James Dong 4260c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// IStage 1 all rows 4270c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 xf3, xg3, xg4 4280c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 xf4, xg3, xg4 4290c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 xf2, xg2, xg5 4300c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 xf5, xg2, xg5 4310c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 xf1, xg1, xg6 4320c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 xf6, xg1, xg6 4330c1bc742181ded4930842b46e9507372f0b1b963James Dong SADD16 xf0, xg0, xg7 4340c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 xf7, xg0, xg7 4350c1bc742181ded4930842b46e9507372f0b1b963James Dong 4360c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Transpose, store and loop 4370c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT ra01, xf0, xf1, LSL #16 4380c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB rb01, xf1, xf0, ASR #16 4390c1bc742181ded4930842b46e9507372f0b1b963James Dong 4400c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT ra23, xf2, xf3, LSL #16 4410c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB rb23, xf3, xf2, ASR #16 4420c1bc742181ded4930842b46e9507372f0b1b963James Dong 4430c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT ra45, xf4, xf5, LSL #16 4440c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB rb45, xf5, xf4, ASR #16 4450c1bc742181ded4930842b46e9507372f0b1b963James Dong 4460c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT ra67, xf6, xf7, LSL #16 4470c1bc742181ded4930842b46e9507372f0b1b963James Dong STMIA pDest!, {ra01, ra23, ra45, ra67} 4480c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB rb67, xf7, xf6, ASR #16 4490c1bc742181ded4930842b46e9507372f0b1b963James Dong STMIA pDest!, {rb01, rb23, rb45, rb67} 4500c1bc742181ded4930842b46e9507372f0b1b963James Dong BCC v6_idct_col$_F 4510c1bc742181ded4930842b46e9507372f0b1b963James Dong 4520c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pDest, #(64*2) 4530c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR pDest, ppDest 4540c1bc742181ded4930842b46e9507372f0b1b963James Dong IF "$stride"="s" 4550c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR pScale, pStride 4560c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 4570c1bc742181ded4930842b46e9507372f0b1b963James Dong B v6_idct_row$_F 4580c1bc742181ded4930842b46e9507372f0b1b963James Dong 4590c1bc742181ded4930842b46e9507372f0b1b963James Dongv6OddZero$_F 4600c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 xi2, xi6, xi7 ;// (j2-j6) 4610c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xi3, xi6, xi7 ;// (j2+j6)/2 4620c1bc742181ded4930842b46e9507372f0b1b963James Dong 4630c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULBB xi0, xi2, LoopRR2 4640c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULTB xi2, xi2, LoopRR2 4650c1bc742181ded4930842b46e9507372f0b1b963James Dong 4660c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xi2, xi2, LSL #1 4670c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB xh2, xi2, xi0, ASR#15 ;// i2*sqrt(2)/4 4680c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 xh2, xh2, xi3 4690c1bc742181ded4930842b46e9507372f0b1b963James Dong 4700c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// xi0, xi1 now free 4710c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// IStage 4,3 rows 0to1 x 1/2 4720c1bc742181ded4930842b46e9507372f0b1b963James Dong 4730c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xh0, xi4, xi5 4740c1bc742181ded4930842b46e9507372f0b1b963James Dong SHSUB16 xh1, xi4, xi5 4750c1bc742181ded4930842b46e9507372f0b1b963James Dong 4760c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// IStage 2 rows 0to3 x 1/2 4770c1bc742181ded4930842b46e9507372f0b1b963James Dong SHSUB16 xg2, xh1, xh2 4780c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xg1, xh1, xh2 4790c1bc742181ded4930842b46e9507372f0b1b963James Dong SHSUB16 xg3, xh0, xh3 4800c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xg0, xh0, xh3 4810c1bc742181ded4930842b46e9507372f0b1b963James Dong 4820c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// IStage 1 all rows 4830c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xf3, xg3 4840c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xf4, xg3 4850c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xf2, xg2 4860c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xf5, xg2 4870c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xf1, xg1 4880c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xf6, xg1 4890c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xf0, xg0 4900c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xf7, xg0 4910c1bc742181ded4930842b46e9507372f0b1b963James Dong 4920c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Transpose 4930c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT ra01, xf0, xf1, LSL #16 4940c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB rb01, xf1, xf0, ASR #16 4950c1bc742181ded4930842b46e9507372f0b1b963James Dong 4960c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT ra23, xf2, xf3, LSL #16 4970c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB rb23, xf3, xf2, ASR #16 4980c1bc742181ded4930842b46e9507372f0b1b963James Dong 4990c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT ra45, xf4, xf5, LSL #16 5000c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB rb45, xf5, xf4, ASR #16 5010c1bc742181ded4930842b46e9507372f0b1b963James Dong 5020c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT ra67, xf6, xf7, LSL #16 5030c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB rb67, xf7, xf6, ASR #16 5040c1bc742181ded4930842b46e9507372f0b1b963James Dong 5050c1bc742181ded4930842b46e9507372f0b1b963James Dong STMIA pDest!, {ra01, ra23, ra45, ra67} 5060c1bc742181ded4930842b46e9507372f0b1b963James Dong ADDS LoopRR2, LoopRR2, #2<<29 ;// done two rows 5070c1bc742181ded4930842b46e9507372f0b1b963James Dong STMIA pDest!, {rb01, rb23, rb45, rb67} 5080c1bc742181ded4930842b46e9507372f0b1b963James Dong 5090c1bc742181ded4930842b46e9507372f0b1b963James Dong BCC v6_idct_col$_F 5100c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pDest, #(64*2) 5110c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR pDest, ppDest 5120c1bc742181ded4930842b46e9507372f0b1b963James Dong IF "$stride"="s" 5130c1bc742181ded4930842b46e9507372f0b1b963James Dong M_LDR pScale, pStride 5140c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 5150c1bc742181ded4930842b46e9507372f0b1b963James Dong 5160c1bc742181ded4930842b46e9507372f0b1b963James Dong 5170c1bc742181ded4930842b46e9507372f0b1b963James Dongv6_idct_row$_F 5180c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// IStage 4,3, rows4to7 x1/4 5190c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xit, =0x00010001 ;// rounding constant 5200c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi0, [pSrc, #1*16] ;// j1 5210c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi1, [pSrc, #7*16] ;// 4*j7 5220c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi2, [pSrc, #5*16] ;// j5 5230c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi3, [pSrc, #3*16] ;// j3 5240c1bc742181ded4930842b46e9507372f0b1b963James Dong 5250c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xi1, xi1, xit ;// 2*j7 5260c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xi1, xi1, xit ;// j7 5270c1bc742181ded4930842b46e9507372f0b1b963James Dong 5280c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xi5, xi0, xi1 ;// (j1+j7)/2 5290c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 xi6, xi0, xi1 ;// j1-j7 5300c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xi7, xi2, xi3 ;// (j5+j3)/2 5310c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 xi4, xi2, xi3 ;// j5-j3 5320c1bc742181ded4930842b46e9507372f0b1b963James Dong 5330c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 xi3, xi5, xi7 ;// (i5-i7)/2 5340c1bc742181ded4930842b46e9507372f0b1b963James Dong 5350c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT xi0, xi6, xi4, LSL#16 ;// [i4,i6] row a 5360c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB xi1, xi4, xi6, ASR#16 ;// [i4,i6] row b 5370c1bc742181ded4930842b46e9507372f0b1b963James Dong 5380c1bc742181ded4930842b46e9507372f0b1b963James Dong SMUADX xi2, xi0, csPiBy8 ;// rowa by [c,s] 5390c1bc742181ded4930842b46e9507372f0b1b963James Dong SMUADX xi4, xi1, csPiBy8 ;// rowb by [c,s] 5400c1bc742181ded4930842b46e9507372f0b1b963James Dong SMUSD xi0, xi0, csPiBy8 ;// rowa by [-s,c] 5410c1bc742181ded4930842b46e9507372f0b1b963James Dong SMUSD xi6, xi1, csPiBy8 ;// rowb by [-s,c] 5420c1bc742181ded4930842b46e9507372f0b1b963James Dong 5430c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULBB xi1, xi3, LoopRR2 5440c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULTB xi3, xi3, LoopRR2 5450c1bc742181ded4930842b46e9507372f0b1b963James Dong 5460c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB xh4, xi4, xi2, ASR#16 ;// h4/4 5470c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB xh6, xi6, xi0, ASR#16 ;// h6/4 5480c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xh7, xi5, xi7 ;// (i5+i7)/4 5490c1bc742181ded4930842b46e9507372f0b1b963James Dong 5500c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xi3, xi3, LSL #1 5510c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB xh5, xi3, xi1, ASR#15 ;// h5/4 5520c1bc742181ded4930842b46e9507372f0b1b963James Dong 5530c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// xi0,xi1,xi2,xi3 now free 5540c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// IStage 4,3, rows 2to3 x1/2 5550c1bc742181ded4930842b46e9507372f0b1b963James Dong 5560c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi0, [pSrc, #2*16] ;// j2 5570c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi1, [pSrc, #6*16] ;// 2*j6 5580c1bc742181ded4930842b46e9507372f0b1b963James Dong 5590c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// IStage 2, rows4to7 5600c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 xg6, xh6, xh7 5610c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 xg5, xh5, xg6 5620c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 xg4, xh4, xg5 5630c1bc742181ded4930842b46e9507372f0b1b963James Dong 5640c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xi1, xi1, xit ;// j6 5650c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 xi2, xi0, xi1 ;// (j2-j6) 5660c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xi3, xi0, xi1 ;// (j2+j6)/2 5670c1bc742181ded4930842b46e9507372f0b1b963James Dong 5680c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULBB xi0, xi2, LoopRR2 5690c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULTB xi2, xi2, LoopRR2 5700c1bc742181ded4930842b46e9507372f0b1b963James Dong 5710c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV xi2, xi2, LSL #1 5720c1bc742181ded4930842b46e9507372f0b1b963James Dong 5730c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB xh2, xi2, xi0, ASR#15 ;// i2*sqrt(2)/4 5740c1bc742181ded4930842b46e9507372f0b1b963James Dong 5750c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// xi0, xi1 now free 5760c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// IStage 4,3 rows 0to1 x 1/2 5770c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi1, [pSrc, #4*16] ;// j4 5780c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xi0, [pSrc], #4 ;// j0 5790c1bc742181ded4930842b46e9507372f0b1b963James Dong 5800c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 xh2, xh2, xi3 5810c1bc742181ded4930842b46e9507372f0b1b963James Dong ADDS LoopRR2, LoopRR2, #2<<29 ;// done two rows 5820c1bc742181ded4930842b46e9507372f0b1b963James Dong 5830c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD xi0, xi0, xit, LSL #2 ;// ensure correct round 5840c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xh0, xi0, xi1 ;// of DC result 5850c1bc742181ded4930842b46e9507372f0b1b963James Dong SHSUB16 xh1, xi0, xi1 5860c1bc742181ded4930842b46e9507372f0b1b963James Dong 5870c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// IStage 2 rows 0to3 x 1/2 5880c1bc742181ded4930842b46e9507372f0b1b963James Dong SHSUB16 xg2, xh1, xh2 5890c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xg1, xh1, xh2 5900c1bc742181ded4930842b46e9507372f0b1b963James Dong SHSUB16 xg3, xh0, xh3 5910c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xg0, xh0, xh3 5920c1bc742181ded4930842b46e9507372f0b1b963James Dong 5930c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// IStage 1 all rows 5940c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xf3, xg3, xg4 5950c1bc742181ded4930842b46e9507372f0b1b963James Dong SHSUB16 xf4, xg3, xg4 5960c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xf2, xg2, xg5 5970c1bc742181ded4930842b46e9507372f0b1b963James Dong SHSUB16 xf5, xg2, xg5 5980c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xf1, xg1, xg6 5990c1bc742181ded4930842b46e9507372f0b1b963James Dong SHSUB16 xf6, xg1, xg6 6000c1bc742181ded4930842b46e9507372f0b1b963James Dong SHADD16 xf0, xg0, xg7 6010c1bc742181ded4930842b46e9507372f0b1b963James Dong SHSUB16 xf7, xg0, xg7 6020c1bc742181ded4930842b46e9507372f0b1b963James Dong 6030c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Saturate 6040c1bc742181ded4930842b46e9507372f0b1b963James Dong IF ("$outsize"="u8") 6050c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 xf0, #8, xf0 6060c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 xf1, #8, xf1 6070c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 xf2, #8, xf2 6080c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 xf3, #8, xf3 6090c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 xf4, #8, xf4 6100c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 xf5, #8, xf5 6110c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 xf6, #8, xf6 6120c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 xf7, #8, xf7 6130c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 6140c1bc742181ded4930842b46e9507372f0b1b963James Dong IF ("$outsize"="s9") 6150c1bc742181ded4930842b46e9507372f0b1b963James Dong SSAT16 xf0, #9, xf0 6160c1bc742181ded4930842b46e9507372f0b1b963James Dong SSAT16 xf1, #9, xf1 6170c1bc742181ded4930842b46e9507372f0b1b963James Dong SSAT16 xf2, #9, xf2 6180c1bc742181ded4930842b46e9507372f0b1b963James Dong SSAT16 xf3, #9, xf3 6190c1bc742181ded4930842b46e9507372f0b1b963James Dong SSAT16 xf4, #9, xf4 6200c1bc742181ded4930842b46e9507372f0b1b963James Dong SSAT16 xf5, #9, xf5 6210c1bc742181ded4930842b46e9507372f0b1b963James Dong SSAT16 xf6, #9, xf6 6220c1bc742181ded4930842b46e9507372f0b1b963James Dong SSAT16 xf7, #9, xf7 6230c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 6240c1bc742181ded4930842b46e9507372f0b1b963James Dong 6250c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Transpose to Row, Pack and store 6260c1bc742181ded4930842b46e9507372f0b1b963James Dong IF ("$outsize"="u8") 6270c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR xf0, xf0, xf1, LSL #8 ;// [ b1 b0 a1 a0 ] 6280c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR xf2, xf2, xf3, LSL #8 ;// [ b3 b2 a3 a2 ] 6290c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR xf4, xf4, xf5, LSL #8 ;// [ b5 b4 a5 a4 ] 6300c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR xf6, xf6, xf7, LSL #8 ;// [ b7 b6 a7 a6 ] 6310c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT ra01, xf0, xf2, LSL #16 6320c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB rb01, xf2, xf0, ASR #16 6330c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT ra23, xf4, xf6, LSL #16 6340c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB rb23, xf6, xf4, ASR #16 6350c1bc742181ded4930842b46e9507372f0b1b963James Dong STMIA pDest, {ra01, ra23} 6360c1bc742181ded4930842b46e9507372f0b1b963James Dong IF "$stride"="s" 6370c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pDest, pDest, pScale 6380c1bc742181ded4930842b46e9507372f0b1b963James Dong STMIA pDest, {rb01, rb23} 6390c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pDest, pDest, pScale 6400c1bc742181ded4930842b46e9507372f0b1b963James Dong ELSE 6410c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pDest, pDest, #($stride) 6420c1bc742181ded4930842b46e9507372f0b1b963James Dong STMIA pDest, {rb01, rb23} 6430c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pDest, pDest, #($stride) 6440c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 6450c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 6460c1bc742181ded4930842b46e9507372f0b1b963James Dong IF ("$outsize"="s9"):LOR:("$outsize"="s16") 6470c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT ra01, xf0, xf1, LSL #16 6480c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB rb01, xf1, xf0, ASR #16 6490c1bc742181ded4930842b46e9507372f0b1b963James Dong 6500c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT ra23, xf2, xf3, LSL #16 6510c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB rb23, xf3, xf2, ASR #16 6520c1bc742181ded4930842b46e9507372f0b1b963James Dong 6530c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT ra45, xf4, xf5, LSL #16 6540c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB rb45, xf5, xf4, ASR #16 6550c1bc742181ded4930842b46e9507372f0b1b963James Dong 6560c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT ra67, xf6, xf7, LSL #16 6570c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHTB rb67, xf7, xf6, ASR #16 6580c1bc742181ded4930842b46e9507372f0b1b963James Dong 6590c1bc742181ded4930842b46e9507372f0b1b963James Dong STMIA pDest, {ra01, ra23, ra45, ra67} 6600c1bc742181ded4930842b46e9507372f0b1b963James Dong IF "$stride"="s" 6610c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pDest, pDest, pScale 6620c1bc742181ded4930842b46e9507372f0b1b963James Dong STMIA pDest, {rb01, rb23, rb45, rb67} 6630c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pDest, pDest, pScale 6640c1bc742181ded4930842b46e9507372f0b1b963James Dong ELSE 6650c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pDest, pDest, #($stride) 6660c1bc742181ded4930842b46e9507372f0b1b963James Dong STMIA pDest, {rb01, rb23, rb45, rb67} 6670c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pDest, pDest, #($stride) 6680c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 6690c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 6700c1bc742181ded4930842b46e9507372f0b1b963James Dong 6710c1bc742181ded4930842b46e9507372f0b1b963James Dong BCC v6_idct_row$_F 6720c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF ;// ARM1136JS 6730c1bc742181ded4930842b46e9507372f0b1b963James Dong 6740c1bc742181ded4930842b46e9507372f0b1b963James Dong 6750c1bc742181ded4930842b46e9507372f0b1b963James Dong IF CortexA8 6760c1bc742181ded4930842b46e9507372f0b1b963James Dong 6770c1bc742181ded4930842b46e9507372f0b1b963James DongSrc0 EQU 7 6780c1bc742181ded4930842b46e9507372f0b1b963James DongSrc1 EQU 8 6790c1bc742181ded4930842b46e9507372f0b1b963James DongSrc2 EQU 9 6800c1bc742181ded4930842b46e9507372f0b1b963James DongSrc3 EQU 10 6810c1bc742181ded4930842b46e9507372f0b1b963James DongSrc4 EQU 11 6820c1bc742181ded4930842b46e9507372f0b1b963James DongSrc5 EQU 12 6830c1bc742181ded4930842b46e9507372f0b1b963James DongSrc6 EQU 13 6840c1bc742181ded4930842b46e9507372f0b1b963James DongSrc7 EQU 14 6850c1bc742181ded4930842b46e9507372f0b1b963James DongTmp EQU 15 6860c1bc742181ded4930842b46e9507372f0b1b963James Dong 6870c1bc742181ded4930842b46e9507372f0b1b963James DongqXj0 QN Src0.S16 6880c1bc742181ded4930842b46e9507372f0b1b963James DongqXj1 QN Src1.S16 6890c1bc742181ded4930842b46e9507372f0b1b963James DongqXj2 QN Src2.S16 6900c1bc742181ded4930842b46e9507372f0b1b963James DongqXj3 QN Src3.S16 6910c1bc742181ded4930842b46e9507372f0b1b963James DongqXj4 QN Src4.S16 6920c1bc742181ded4930842b46e9507372f0b1b963James DongqXj5 QN Src5.S16 6930c1bc742181ded4930842b46e9507372f0b1b963James DongqXj6 QN Src6.S16 6940c1bc742181ded4930842b46e9507372f0b1b963James DongqXj7 QN Src7.S16 6950c1bc742181ded4930842b46e9507372f0b1b963James DongqXjt QN Tmp.S16 6960c1bc742181ded4930842b46e9507372f0b1b963James Dong 6970c1bc742181ded4930842b46e9507372f0b1b963James DongdXj0lo DN (Src0*2).S16 6980c1bc742181ded4930842b46e9507372f0b1b963James DongdXj0hi DN (Src0*2+1).S16 6990c1bc742181ded4930842b46e9507372f0b1b963James DongdXj1lo DN (Src1*2).S16 7000c1bc742181ded4930842b46e9507372f0b1b963James DongdXj1hi DN (Src1*2+1).S16 7010c1bc742181ded4930842b46e9507372f0b1b963James DongdXj2lo DN (Src2*2).S16 7020c1bc742181ded4930842b46e9507372f0b1b963James DongdXj2hi DN (Src2*2+1).S16 7030c1bc742181ded4930842b46e9507372f0b1b963James DongdXj3lo DN (Src3*2).S16 7040c1bc742181ded4930842b46e9507372f0b1b963James DongdXj3hi DN (Src3*2+1).S16 7050c1bc742181ded4930842b46e9507372f0b1b963James DongdXj4lo DN (Src4*2).S16 7060c1bc742181ded4930842b46e9507372f0b1b963James DongdXj4hi DN (Src4*2+1).S16 7070c1bc742181ded4930842b46e9507372f0b1b963James DongdXj5lo DN (Src5*2).S16 7080c1bc742181ded4930842b46e9507372f0b1b963James DongdXj5hi DN (Src5*2+1).S16 7090c1bc742181ded4930842b46e9507372f0b1b963James DongdXj6lo DN (Src6*2).S16 7100c1bc742181ded4930842b46e9507372f0b1b963James DongdXj6hi DN (Src6*2+1).S16 7110c1bc742181ded4930842b46e9507372f0b1b963James DongdXj7lo DN (Src7*2).S16 7120c1bc742181ded4930842b46e9507372f0b1b963James DongdXj7hi DN (Src7*2+1).S16 7130c1bc742181ded4930842b46e9507372f0b1b963James DongdXjtlo DN (Tmp*2).S16 7140c1bc742181ded4930842b46e9507372f0b1b963James DongdXjthi DN (Tmp*2+1).S16 7150c1bc742181ded4930842b46e9507372f0b1b963James Dong 7160c1bc742181ded4930842b46e9507372f0b1b963James DongqXi0 QN qXj0 7170c1bc742181ded4930842b46e9507372f0b1b963James DongqXi1 QN qXj4 7180c1bc742181ded4930842b46e9507372f0b1b963James DongqXi2 QN qXj2 7190c1bc742181ded4930842b46e9507372f0b1b963James DongqXi3 QN qXj7 7200c1bc742181ded4930842b46e9507372f0b1b963James DongqXi4 QN qXj5 7210c1bc742181ded4930842b46e9507372f0b1b963James DongqXi5 QN qXjt 7220c1bc742181ded4930842b46e9507372f0b1b963James DongqXi6 QN qXj1 7230c1bc742181ded4930842b46e9507372f0b1b963James DongqXi7 QN qXj6 7240c1bc742181ded4930842b46e9507372f0b1b963James DongqXit QN qXj3 7250c1bc742181ded4930842b46e9507372f0b1b963James Dong 7260c1bc742181ded4930842b46e9507372f0b1b963James DongdXi0lo DN dXj0lo 7270c1bc742181ded4930842b46e9507372f0b1b963James DongdXi0hi DN dXj0hi 7280c1bc742181ded4930842b46e9507372f0b1b963James DongdXi1lo DN dXj4lo 7290c1bc742181ded4930842b46e9507372f0b1b963James DongdXi1hi DN dXj4hi 7300c1bc742181ded4930842b46e9507372f0b1b963James DongdXi2lo DN dXj2lo 7310c1bc742181ded4930842b46e9507372f0b1b963James DongdXi2hi DN dXj2hi 7320c1bc742181ded4930842b46e9507372f0b1b963James DongdXi3lo DN dXj7lo 7330c1bc742181ded4930842b46e9507372f0b1b963James DongdXi3hi DN dXj7hi 7340c1bc742181ded4930842b46e9507372f0b1b963James DongdXi4lo DN dXj5lo 7350c1bc742181ded4930842b46e9507372f0b1b963James DongdXi4hi DN dXj5hi 7360c1bc742181ded4930842b46e9507372f0b1b963James DongdXi5lo DN dXjtlo 7370c1bc742181ded4930842b46e9507372f0b1b963James DongdXi5hi DN dXjthi 7380c1bc742181ded4930842b46e9507372f0b1b963James DongdXi6lo DN dXj1lo 7390c1bc742181ded4930842b46e9507372f0b1b963James DongdXi6hi DN dXj1hi 7400c1bc742181ded4930842b46e9507372f0b1b963James DongdXi7lo DN dXj6lo 7410c1bc742181ded4930842b46e9507372f0b1b963James DongdXi7hi DN dXj6hi 7420c1bc742181ded4930842b46e9507372f0b1b963James DongdXitlo DN dXj3lo 7430c1bc742181ded4930842b46e9507372f0b1b963James DongdXithi DN dXj3hi 7440c1bc742181ded4930842b46e9507372f0b1b963James Dong 7450c1bc742181ded4930842b46e9507372f0b1b963James DongqXh0 QN qXit 7460c1bc742181ded4930842b46e9507372f0b1b963James DongqXh1 QN qXi0 7470c1bc742181ded4930842b46e9507372f0b1b963James DongqXh2 QN qXi2 7480c1bc742181ded4930842b46e9507372f0b1b963James DongqXh3 QN qXi3 7490c1bc742181ded4930842b46e9507372f0b1b963James DongqXh4 QN qXi7 7500c1bc742181ded4930842b46e9507372f0b1b963James DongqXh5 QN qXi5 7510c1bc742181ded4930842b46e9507372f0b1b963James DongqXh6 QN qXi4 7520c1bc742181ded4930842b46e9507372f0b1b963James DongqXh7 QN qXi1 7530c1bc742181ded4930842b46e9507372f0b1b963James DongqXht QN qXi6 7540c1bc742181ded4930842b46e9507372f0b1b963James Dong 7550c1bc742181ded4930842b46e9507372f0b1b963James DongdXh0lo DN dXitlo 7560c1bc742181ded4930842b46e9507372f0b1b963James DongdXh0hi DN dXithi 7570c1bc742181ded4930842b46e9507372f0b1b963James DongdXh1lo DN dXi0lo 7580c1bc742181ded4930842b46e9507372f0b1b963James DongdXh1hi DN dXi0hi 7590c1bc742181ded4930842b46e9507372f0b1b963James DongdXh2lo DN dXi2lo 7600c1bc742181ded4930842b46e9507372f0b1b963James DongdXh2hi DN dXi2hi 7610c1bc742181ded4930842b46e9507372f0b1b963James DongdXh3lo DN dXi3lo 7620c1bc742181ded4930842b46e9507372f0b1b963James DongdXh3hi DN dXi3hi 7630c1bc742181ded4930842b46e9507372f0b1b963James DongdXh4lo DN dXi7lo 7640c1bc742181ded4930842b46e9507372f0b1b963James DongdXh4hi DN dXi7hi 7650c1bc742181ded4930842b46e9507372f0b1b963James DongdXh5lo DN dXi5lo 7660c1bc742181ded4930842b46e9507372f0b1b963James DongdXh5hi DN dXi5hi 7670c1bc742181ded4930842b46e9507372f0b1b963James DongdXh6lo DN dXi4lo 7680c1bc742181ded4930842b46e9507372f0b1b963James DongdXh6hi DN dXi4hi 7690c1bc742181ded4930842b46e9507372f0b1b963James DongdXh7lo DN dXi1lo 7700c1bc742181ded4930842b46e9507372f0b1b963James DongdXh7hi DN dXi1hi 7710c1bc742181ded4930842b46e9507372f0b1b963James DongdXhtlo DN dXi6lo 7720c1bc742181ded4930842b46e9507372f0b1b963James DongdXhthi DN dXi6hi 7730c1bc742181ded4930842b46e9507372f0b1b963James Dong 7740c1bc742181ded4930842b46e9507372f0b1b963James DongqXg0 QN qXh2 7750c1bc742181ded4930842b46e9507372f0b1b963James DongqXg1 QN qXht 7760c1bc742181ded4930842b46e9507372f0b1b963James DongqXg2 QN qXh1 7770c1bc742181ded4930842b46e9507372f0b1b963James DongqXg3 QN qXh0 7780c1bc742181ded4930842b46e9507372f0b1b963James DongqXg4 QN qXh4 7790c1bc742181ded4930842b46e9507372f0b1b963James DongqXg5 QN qXh5 7800c1bc742181ded4930842b46e9507372f0b1b963James DongqXg6 QN qXh6 7810c1bc742181ded4930842b46e9507372f0b1b963James DongqXg7 QN qXh7 7820c1bc742181ded4930842b46e9507372f0b1b963James DongqXgt QN qXh3 7830c1bc742181ded4930842b46e9507372f0b1b963James Dong 7840c1bc742181ded4930842b46e9507372f0b1b963James DongqXf0 QN qXg6 7850c1bc742181ded4930842b46e9507372f0b1b963James DongqXf1 QN qXg5 7860c1bc742181ded4930842b46e9507372f0b1b963James DongqXf2 QN qXg4 7870c1bc742181ded4930842b46e9507372f0b1b963James DongqXf3 QN qXgt 7880c1bc742181ded4930842b46e9507372f0b1b963James DongqXf4 QN qXg3 7890c1bc742181ded4930842b46e9507372f0b1b963James DongqXf5 QN qXg2 7900c1bc742181ded4930842b46e9507372f0b1b963James DongqXf6 QN qXg1 7910c1bc742181ded4930842b46e9507372f0b1b963James DongqXf7 QN qXg0 7920c1bc742181ded4930842b46e9507372f0b1b963James DongqXft QN qXg7 7930c1bc742181ded4930842b46e9507372f0b1b963James Dong 7940c1bc742181ded4930842b46e9507372f0b1b963James Dong 7950c1bc742181ded4930842b46e9507372f0b1b963James DongqXt0 QN 1.S32 7960c1bc742181ded4930842b46e9507372f0b1b963James DongqXt1 QN 2.S32 7970c1bc742181ded4930842b46e9507372f0b1b963James DongqT0lo QN 1.S32 7980c1bc742181ded4930842b46e9507372f0b1b963James DongqT0hi QN 2.S32 7990c1bc742181ded4930842b46e9507372f0b1b963James DongqT1lo QN 3.S32 8000c1bc742181ded4930842b46e9507372f0b1b963James DongqT1hi QN 4.S32 8010c1bc742181ded4930842b46e9507372f0b1b963James DongqScalelo QN 5.S32 ;// used to read post scale values 8020c1bc742181ded4930842b46e9507372f0b1b963James DongqScalehi QN 6.S32 8030c1bc742181ded4930842b46e9507372f0b1b963James DongqTemp0 QN 5.S32 8040c1bc742181ded4930842b46e9507372f0b1b963James DongqTemp1 QN 6.S32 8050c1bc742181ded4930842b46e9507372f0b1b963James Dong 8060c1bc742181ded4930842b46e9507372f0b1b963James Dong 8070c1bc742181ded4930842b46e9507372f0b1b963James DongScale1 EQU 6 8080c1bc742181ded4930842b46e9507372f0b1b963James DongScale2 EQU 15 8090c1bc742181ded4930842b46e9507372f0b1b963James DongqScale1 QN Scale1.S16 8100c1bc742181ded4930842b46e9507372f0b1b963James DongqScale2 QN Scale2.S16 8110c1bc742181ded4930842b46e9507372f0b1b963James DongdScale1lo DN (Scale1*2).S16 8120c1bc742181ded4930842b46e9507372f0b1b963James DongdScale1hi DN (Scale1*2+1).S16 8130c1bc742181ded4930842b46e9507372f0b1b963James DongdScale2lo DN (Scale2*2).S16 8140c1bc742181ded4930842b46e9507372f0b1b963James DongdScale2hi DN (Scale2*2+1).S16 8150c1bc742181ded4930842b46e9507372f0b1b963James Dong 8160c1bc742181ded4930842b46e9507372f0b1b963James DongdCoefs DN 0.S16 ;// Scale coefficients in format {[0] [C] [S] [InvSqrt2]} 8170c1bc742181ded4930842b46e9507372f0b1b963James DongInvSqrt2 DN dCoefs[0] ;// 1/sqrt(2) in Q15 8180c1bc742181ded4930842b46e9507372f0b1b963James DongS DN dCoefs[1] ;// Sin(PI/8) in Q15 8190c1bc742181ded4930842b46e9507372f0b1b963James DongC DN dCoefs[2] ;// Cos(PI/8) in Q15 8200c1bc742181ded4930842b46e9507372f0b1b963James Dong 8210c1bc742181ded4930842b46e9507372f0b1b963James DongpTemp RN 12 8220c1bc742181ded4930842b46e9507372f0b1b963James Dong 8230c1bc742181ded4930842b46e9507372f0b1b963James Dong 8240c1bc742181ded4930842b46e9507372f0b1b963James Dong IMPORT armCOMM_IDCTCoef 8250c1bc742181ded4930842b46e9507372f0b1b963James Dong 8260c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 {qXj0,qXj1}, [pSrc @64]! 8270c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 {qXj2,qXj3}, [pSrc @64]! 8280c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 {qXj4,qXj5}, [pSrc @64]! 8290c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 {qXj6,qXj7}, [pSrc @64]! 8300c1bc742181ded4930842b46e9507372f0b1b963James Dong 8310c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Load PreScale and multiply with Src 8320c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// IStage 4 8330c1bc742181ded4930842b46e9507372f0b1b963James Dong 8340c1bc742181ded4930842b46e9507372f0b1b963James Dong IF "$inscale"="s16" ;// 16X16 Mul 8350c1bc742181ded4930842b46e9507372f0b1b963James Dong M_IDCT_PRESCALE16 8360c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 8370c1bc742181ded4930842b46e9507372f0b1b963James Dong 8380c1bc742181ded4930842b46e9507372f0b1b963James Dong IF "$inscale"="s32" ;// 32X32 ,ul 8390c1bc742181ded4930842b46e9507372f0b1b963James Dong M_IDCT_PRESCALE32 8400c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 8410c1bc742181ded4930842b46e9507372f0b1b963James Dong 8420c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// IStage 3 8430c1bc742181ded4930842b46e9507372f0b1b963James Dong VQDMULH qXi2, qXi2, InvSqrt2 ;// i2/sqrt(2) 8440c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qXh0, qXi0, qXi1 ;// (i0+i1)/2 8450c1bc742181ded4930842b46e9507372f0b1b963James Dong VHSUB qXh1, qXi0, qXi1 ;// (i0-i1)/2 8460c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qXh7, qXi5, qXi7 ;// (i5+i7)/4 8470c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qXh5, qXi5, qXi7 ;// (i5-i7)/2 8480c1bc742181ded4930842b46e9507372f0b1b963James Dong VQDMULH qXh5, qXh5, InvSqrt2 ;// h5/sqrt(2) 8490c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qXh2, qXi2, qXi3 ;// h2, h3 8500c1bc742181ded4930842b46e9507372f0b1b963James Dong 8510c1bc742181ded4930842b46e9507372f0b1b963James Dong VMULL qXt0, dXi4lo, C ;// c*i4 8520c1bc742181ded4930842b46e9507372f0b1b963James Dong VMLAL qXt0, dXi6lo, S ;// c*i4+s*i6 8530c1bc742181ded4930842b46e9507372f0b1b963James Dong VMULL qXt1, dXi4hi, C 8540c1bc742181ded4930842b46e9507372f0b1b963James Dong VMLAL qXt1, dXi6hi, S 8550c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHRN dXh4lo, qXt0, #16 ;// h4 8560c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHRN dXh4hi, qXt1, #16 8570c1bc742181ded4930842b46e9507372f0b1b963James Dong 8580c1bc742181ded4930842b46e9507372f0b1b963James Dong VMULL qXt0, dXi6lo, C ;// c*i6 8590c1bc742181ded4930842b46e9507372f0b1b963James Dong VMLSL qXt0, dXi4lo, S ;// -s*i4 + c*h6 8600c1bc742181ded4930842b46e9507372f0b1b963James Dong VMULL qXt1, dXi6hi, C 8610c1bc742181ded4930842b46e9507372f0b1b963James Dong VMLSL qXt1, dXi4hi, S 8620c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHRN dXh6lo, qXt0, #16 ;// h6 8630c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHRN dXh6hi, qXt1, #16 8640c1bc742181ded4930842b46e9507372f0b1b963James Dong 8650c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// IStage 2 8660c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qXg6, qXh6, qXh7 8670c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qXg5, qXh5, qXg6 8680c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qXg4, qXh4, qXg5 8690c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qXg1, qXh1, qXh2 ;// (h1+h2)/2 8700c1bc742181ded4930842b46e9507372f0b1b963James Dong VHSUB qXg2, qXh1, qXh2 ;// (h1-h2)/2 8710c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qXg0, qXh0, qXh3 ;// (h0+h3)/2 8720c1bc742181ded4930842b46e9507372f0b1b963James Dong VHSUB qXg3, qXh0, qXh3 ;// (h0-h3)/2 8730c1bc742181ded4930842b46e9507372f0b1b963James Dong 8740c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// IStage 1 all rows 8750c1bc742181ded4930842b46e9507372f0b1b963James Dong VADD qXf3, qXg3, qXg4 8760c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qXf4, qXg3, qXg4 8770c1bc742181ded4930842b46e9507372f0b1b963James Dong VADD qXf2, qXg2, qXg5 8780c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qXf5, qXg2, qXg5 8790c1bc742181ded4930842b46e9507372f0b1b963James Dong VADD qXf1, qXg1, qXg6 8800c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qXf6, qXg1, qXg6 8810c1bc742181ded4930842b46e9507372f0b1b963James Dong VADD qXf0, qXg0, qXg7 8820c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qXf7, qXg0, qXg7 8830c1bc742181ded4930842b46e9507372f0b1b963James Dong 8840c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Transpose, store and loop 8850c1bc742181ded4930842b46e9507372f0b1b963James DongXTR0 EQU Src5 8860c1bc742181ded4930842b46e9507372f0b1b963James DongXTR1 EQU Tmp 8870c1bc742181ded4930842b46e9507372f0b1b963James DongXTR2 EQU Src6 8880c1bc742181ded4930842b46e9507372f0b1b963James DongXTR3 EQU Src7 8890c1bc742181ded4930842b46e9507372f0b1b963James DongXTR4 EQU Src3 8900c1bc742181ded4930842b46e9507372f0b1b963James DongXTR5 EQU Src0 8910c1bc742181ded4930842b46e9507372f0b1b963James DongXTR6 EQU Src1 8920c1bc742181ded4930842b46e9507372f0b1b963James DongXTR7 EQU Src2 8930c1bc742181ded4930842b46e9507372f0b1b963James DongXTRt EQU Src4 8940c1bc742181ded4930842b46e9507372f0b1b963James Dong 8950c1bc742181ded4930842b46e9507372f0b1b963James DongqA0 QN XTR0.S32 ;// for XTRpose 8960c1bc742181ded4930842b46e9507372f0b1b963James DongqA1 QN XTR1.S32 8970c1bc742181ded4930842b46e9507372f0b1b963James DongqA2 QN XTR2.S32 8980c1bc742181ded4930842b46e9507372f0b1b963James DongqA3 QN XTR3.S32 8990c1bc742181ded4930842b46e9507372f0b1b963James DongqA4 QN XTR4.S32 9000c1bc742181ded4930842b46e9507372f0b1b963James DongqA5 QN XTR5.S32 9010c1bc742181ded4930842b46e9507372f0b1b963James DongqA6 QN XTR6.S32 9020c1bc742181ded4930842b46e9507372f0b1b963James DongqA7 QN XTR7.S32 9030c1bc742181ded4930842b46e9507372f0b1b963James Dong 9040c1bc742181ded4930842b46e9507372f0b1b963James DongdB0 DN XTR0*2+1 ;// for using VSWP 9050c1bc742181ded4930842b46e9507372f0b1b963James DongdB1 DN XTR1*2+1 9060c1bc742181ded4930842b46e9507372f0b1b963James DongdB2 DN XTR2*2+1 9070c1bc742181ded4930842b46e9507372f0b1b963James DongdB3 DN XTR3*2+1 9080c1bc742181ded4930842b46e9507372f0b1b963James DongdB4 DN XTR4*2 9090c1bc742181ded4930842b46e9507372f0b1b963James DongdB5 DN XTR5*2 9100c1bc742181ded4930842b46e9507372f0b1b963James DongdB6 DN XTR6*2 9110c1bc742181ded4930842b46e9507372f0b1b963James DongdB7 DN XTR7*2 9120c1bc742181ded4930842b46e9507372f0b1b963James Dong 9130c1bc742181ded4930842b46e9507372f0b1b963James Dong 9140c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN qXf0, qXf1 9150c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN qXf2, qXf3 9160c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN qXf4, qXf5 9170c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN qXf6, qXf7 9180c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN qA0, qA2 9190c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN qA1, qA3 9200c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN qA4, qA6 9210c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN qA5, qA7 9220c1bc742181ded4930842b46e9507372f0b1b963James Dong VSWP dB0, dB4 9230c1bc742181ded4930842b46e9507372f0b1b963James Dong VSWP dB1, dB5 9240c1bc742181ded4930842b46e9507372f0b1b963James Dong VSWP dB2, dB6 9250c1bc742181ded4930842b46e9507372f0b1b963James Dong VSWP dB3, dB7 9260c1bc742181ded4930842b46e9507372f0b1b963James Dong 9270c1bc742181ded4930842b46e9507372f0b1b963James Dong 9280c1bc742181ded4930842b46e9507372f0b1b963James DongqYj0 QN qXf0 9290c1bc742181ded4930842b46e9507372f0b1b963James DongqYj1 QN qXf1 9300c1bc742181ded4930842b46e9507372f0b1b963James DongqYj2 QN qXf2 9310c1bc742181ded4930842b46e9507372f0b1b963James DongqYj3 QN qXf3 9320c1bc742181ded4930842b46e9507372f0b1b963James DongqYj4 QN qXf4 9330c1bc742181ded4930842b46e9507372f0b1b963James DongqYj5 QN qXf5 9340c1bc742181ded4930842b46e9507372f0b1b963James DongqYj6 QN qXf6 9350c1bc742181ded4930842b46e9507372f0b1b963James DongqYj7 QN qXf7 9360c1bc742181ded4930842b46e9507372f0b1b963James DongqYjt QN qXft 9370c1bc742181ded4930842b46e9507372f0b1b963James Dong 9380c1bc742181ded4930842b46e9507372f0b1b963James DongdYj0lo DN (XTR0*2).S16 9390c1bc742181ded4930842b46e9507372f0b1b963James DongdYj0hi DN (XTR0*2+1).S16 9400c1bc742181ded4930842b46e9507372f0b1b963James DongdYj1lo DN (XTR1*2).S16 9410c1bc742181ded4930842b46e9507372f0b1b963James DongdYj1hi DN (XTR1*2+1).S16 9420c1bc742181ded4930842b46e9507372f0b1b963James DongdYj2lo DN (XTR2*2).S16 9430c1bc742181ded4930842b46e9507372f0b1b963James DongdYj2hi DN (XTR2*2+1).S16 9440c1bc742181ded4930842b46e9507372f0b1b963James DongdYj3lo DN (XTR3*2).S16 9450c1bc742181ded4930842b46e9507372f0b1b963James DongdYj3hi DN (XTR3*2+1).S16 9460c1bc742181ded4930842b46e9507372f0b1b963James DongdYj4lo DN (XTR4*2).S16 9470c1bc742181ded4930842b46e9507372f0b1b963James DongdYj4hi DN (XTR4*2+1).S16 9480c1bc742181ded4930842b46e9507372f0b1b963James DongdYj5lo DN (XTR5*2).S16 9490c1bc742181ded4930842b46e9507372f0b1b963James DongdYj5hi DN (XTR5*2+1).S16 9500c1bc742181ded4930842b46e9507372f0b1b963James DongdYj6lo DN (XTR6*2).S16 9510c1bc742181ded4930842b46e9507372f0b1b963James DongdYj6hi DN (XTR6*2+1).S16 9520c1bc742181ded4930842b46e9507372f0b1b963James DongdYj7lo DN (XTR7*2).S16 9530c1bc742181ded4930842b46e9507372f0b1b963James DongdYj7hi DN (XTR7*2+1).S16 9540c1bc742181ded4930842b46e9507372f0b1b963James DongdYjtlo DN (XTRt*2).S16 9550c1bc742181ded4930842b46e9507372f0b1b963James DongdYjthi DN (XTRt*2+1).S16 9560c1bc742181ded4930842b46e9507372f0b1b963James Dong 9570c1bc742181ded4930842b46e9507372f0b1b963James DongqYi0 QN qYj0 9580c1bc742181ded4930842b46e9507372f0b1b963James DongqYi1 QN qYj4 9590c1bc742181ded4930842b46e9507372f0b1b963James DongqYi2 QN qYj2 9600c1bc742181ded4930842b46e9507372f0b1b963James DongqYi3 QN qYj7 9610c1bc742181ded4930842b46e9507372f0b1b963James DongqYi4 QN qYj5 9620c1bc742181ded4930842b46e9507372f0b1b963James DongqYi5 QN qYjt 9630c1bc742181ded4930842b46e9507372f0b1b963James DongqYi6 QN qYj1 9640c1bc742181ded4930842b46e9507372f0b1b963James DongqYi7 QN qYj6 9650c1bc742181ded4930842b46e9507372f0b1b963James DongqYit QN qYj3 9660c1bc742181ded4930842b46e9507372f0b1b963James Dong 9670c1bc742181ded4930842b46e9507372f0b1b963James DongdYi0lo DN dYj0lo 9680c1bc742181ded4930842b46e9507372f0b1b963James DongdYi0hi DN dYj0hi 9690c1bc742181ded4930842b46e9507372f0b1b963James DongdYi1lo DN dYj4lo 9700c1bc742181ded4930842b46e9507372f0b1b963James DongdYi1hi DN dYj4hi 9710c1bc742181ded4930842b46e9507372f0b1b963James DongdYi2lo DN dYj2lo 9720c1bc742181ded4930842b46e9507372f0b1b963James DongdYi2hi DN dYj2hi 9730c1bc742181ded4930842b46e9507372f0b1b963James DongdYi3lo DN dYj7lo 9740c1bc742181ded4930842b46e9507372f0b1b963James DongdYi3hi DN dYj7hi 9750c1bc742181ded4930842b46e9507372f0b1b963James DongdYi4lo DN dYj5lo 9760c1bc742181ded4930842b46e9507372f0b1b963James DongdYi4hi DN dYj5hi 9770c1bc742181ded4930842b46e9507372f0b1b963James DongdYi5lo DN dYjtlo 9780c1bc742181ded4930842b46e9507372f0b1b963James DongdYi5hi DN dYjthi 9790c1bc742181ded4930842b46e9507372f0b1b963James DongdYi6lo DN dYj1lo 9800c1bc742181ded4930842b46e9507372f0b1b963James DongdYi6hi DN dYj1hi 9810c1bc742181ded4930842b46e9507372f0b1b963James DongdYi7lo DN dYj6lo 9820c1bc742181ded4930842b46e9507372f0b1b963James DongdYi7hi DN dYj6hi 9830c1bc742181ded4930842b46e9507372f0b1b963James DongdYitlo DN dYj3lo 9840c1bc742181ded4930842b46e9507372f0b1b963James DongdYithi DN dYj3hi 9850c1bc742181ded4930842b46e9507372f0b1b963James Dong 9860c1bc742181ded4930842b46e9507372f0b1b963James DongqYh0 QN qYit 9870c1bc742181ded4930842b46e9507372f0b1b963James DongqYh1 QN qYi0 9880c1bc742181ded4930842b46e9507372f0b1b963James DongqYh2 QN qYi2 9890c1bc742181ded4930842b46e9507372f0b1b963James DongqYh3 QN qYi3 9900c1bc742181ded4930842b46e9507372f0b1b963James DongqYh4 QN qYi7 9910c1bc742181ded4930842b46e9507372f0b1b963James DongqYh5 QN qYi5 9920c1bc742181ded4930842b46e9507372f0b1b963James DongqYh6 QN qYi4 9930c1bc742181ded4930842b46e9507372f0b1b963James DongqYh7 QN qYi1 9940c1bc742181ded4930842b46e9507372f0b1b963James DongqYht QN qYi6 9950c1bc742181ded4930842b46e9507372f0b1b963James Dong 9960c1bc742181ded4930842b46e9507372f0b1b963James DongdYh0lo DN dYitlo 9970c1bc742181ded4930842b46e9507372f0b1b963James DongdYh0hi DN dYithi 9980c1bc742181ded4930842b46e9507372f0b1b963James DongdYh1lo DN dYi0lo 9990c1bc742181ded4930842b46e9507372f0b1b963James DongdYh1hi DN dYi0hi 10000c1bc742181ded4930842b46e9507372f0b1b963James DongdYh2lo DN dYi2lo 10010c1bc742181ded4930842b46e9507372f0b1b963James DongdYh2hi DN dYi2hi 10020c1bc742181ded4930842b46e9507372f0b1b963James DongdYh3lo DN dYi3lo 10030c1bc742181ded4930842b46e9507372f0b1b963James DongdYh3hi DN dYi3hi 10040c1bc742181ded4930842b46e9507372f0b1b963James DongdYh4lo DN dYi7lo 10050c1bc742181ded4930842b46e9507372f0b1b963James DongdYh4hi DN dYi7hi 10060c1bc742181ded4930842b46e9507372f0b1b963James DongdYh5lo DN dYi5lo 10070c1bc742181ded4930842b46e9507372f0b1b963James DongdYh5hi DN dYi5hi 10080c1bc742181ded4930842b46e9507372f0b1b963James DongdYh6lo DN dYi4lo 10090c1bc742181ded4930842b46e9507372f0b1b963James DongdYh6hi DN dYi4hi 10100c1bc742181ded4930842b46e9507372f0b1b963James DongdYh7lo DN dYi1lo 10110c1bc742181ded4930842b46e9507372f0b1b963James DongdYh7hi DN dYi1hi 10120c1bc742181ded4930842b46e9507372f0b1b963James DongdYhtlo DN dYi6lo 10130c1bc742181ded4930842b46e9507372f0b1b963James DongdYhthi DN dYi6hi 10140c1bc742181ded4930842b46e9507372f0b1b963James Dong 10150c1bc742181ded4930842b46e9507372f0b1b963James DongqYg0 QN qYh2 10160c1bc742181ded4930842b46e9507372f0b1b963James DongqYg1 QN qYht 10170c1bc742181ded4930842b46e9507372f0b1b963James DongqYg2 QN qYh1 10180c1bc742181ded4930842b46e9507372f0b1b963James DongqYg3 QN qYh0 10190c1bc742181ded4930842b46e9507372f0b1b963James DongqYg4 QN qYh4 10200c1bc742181ded4930842b46e9507372f0b1b963James DongqYg5 QN qYh5 10210c1bc742181ded4930842b46e9507372f0b1b963James DongqYg6 QN qYh6 10220c1bc742181ded4930842b46e9507372f0b1b963James DongqYg7 QN qYh7 10230c1bc742181ded4930842b46e9507372f0b1b963James DongqYgt QN qYh3 10240c1bc742181ded4930842b46e9507372f0b1b963James Dong 10250c1bc742181ded4930842b46e9507372f0b1b963James DongqYf0 QN qYg6 10260c1bc742181ded4930842b46e9507372f0b1b963James DongqYf1 QN qYg5 10270c1bc742181ded4930842b46e9507372f0b1b963James DongqYf2 QN qYg4 10280c1bc742181ded4930842b46e9507372f0b1b963James DongqYf3 QN qYgt 10290c1bc742181ded4930842b46e9507372f0b1b963James DongqYf4 QN qYg3 10300c1bc742181ded4930842b46e9507372f0b1b963James DongqYf5 QN qYg2 10310c1bc742181ded4930842b46e9507372f0b1b963James DongqYf6 QN qYg1 10320c1bc742181ded4930842b46e9507372f0b1b963James DongqYf7 QN qYg0 10330c1bc742181ded4930842b46e9507372f0b1b963James DongqYft QN qYg7 10340c1bc742181ded4930842b46e9507372f0b1b963James Dong 10350c1bc742181ded4930842b46e9507372f0b1b963James Dong VRSHR qYj7, qYj7, #2 10360c1bc742181ded4930842b46e9507372f0b1b963James Dong VRSHR qYj6, qYj6, #1 10370c1bc742181ded4930842b46e9507372f0b1b963James Dong 10380c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qYi5, qYj1, qYj7 ;// i5 = (j1+j7)/2 10390c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qYi6, qYj1, qYj7 ;// i6 = j1-j7 10400c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qYi3, qYj2, qYj6 ;// i3 = (j2+j6)/2 10410c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qYi2, qYj2, qYj6 ;// i2 = j2-j6 10420c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qYi7, qYj5, qYj3 ;// i7 = (j5+j3)/2 10430c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qYi4, qYj5, qYj3 ;// i4 = j5-j3 10440c1bc742181ded4930842b46e9507372f0b1b963James Dong 10450c1bc742181ded4930842b46e9507372f0b1b963James Dong VQDMULH qYi2, qYi2, InvSqrt2 ;// i2/sqrt(2) 10460c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// IStage 4,3 rows 0to1 x 1/2 10470c1bc742181ded4930842b46e9507372f0b1b963James Dong 10480c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV pTemp, #0x4 ;// ensure correct round 10490c1bc742181ded4930842b46e9507372f0b1b963James Dong VDUP qScale1, pTemp ;// of DC result 10500c1bc742181ded4930842b46e9507372f0b1b963James Dong VADD qYi0, qYi0, qScale1 10510c1bc742181ded4930842b46e9507372f0b1b963James Dong 10520c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qYh0, qYi0, qYi1 ;// (i0+i1)/2 10530c1bc742181ded4930842b46e9507372f0b1b963James Dong VHSUB qYh1, qYi0, qYi1 ;// (i0-i1)/2 10540c1bc742181ded4930842b46e9507372f0b1b963James Dong 10550c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qYh7, qYi5, qYi7 ;// (i5+i7)/4 10560c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qYh5, qYi5, qYi7 ;// (i5-i7)/2 10570c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qYh2, qYi2, qYi3 ;// h2, h3 10580c1bc742181ded4930842b46e9507372f0b1b963James Dong VQDMULH qYh5, qYh5, InvSqrt2 ;// h5/sqrt(2) 10590c1bc742181ded4930842b46e9507372f0b1b963James Dong 10600c1bc742181ded4930842b46e9507372f0b1b963James Dong VMULL qXt0, dYi4lo, C ;// c*i4 10610c1bc742181ded4930842b46e9507372f0b1b963James Dong VMLAL qXt0, dYi6lo, S ;// c*i4+s*i6 10620c1bc742181ded4930842b46e9507372f0b1b963James Dong VMULL qXt1, dYi4hi, C 10630c1bc742181ded4930842b46e9507372f0b1b963James Dong VMLAL qXt1, dYi6hi, S 10640c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHRN dYh4lo, qXt0, #16 ;// h4 10650c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHRN dYh4hi, qXt1, #16 10660c1bc742181ded4930842b46e9507372f0b1b963James Dong 10670c1bc742181ded4930842b46e9507372f0b1b963James Dong VMULL qXt0, dYi6lo, C ;// c*i6 10680c1bc742181ded4930842b46e9507372f0b1b963James Dong VMLSL qXt0, dYi4lo, S ;// -s*i4 + c*h6 10690c1bc742181ded4930842b46e9507372f0b1b963James Dong VMULL qXt1, dYi6hi, C 10700c1bc742181ded4930842b46e9507372f0b1b963James Dong VMLSL qXt1, dYi4hi, S 10710c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHRN dYh6lo, qXt0, #16 ;// h6 10720c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHRN dYh6hi, qXt1, #16 10730c1bc742181ded4930842b46e9507372f0b1b963James Dong 10740c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qYg6, qYh6, qYh7 10750c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qYg5, qYh5, qYg6 10760c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qYg4, qYh4, qYg5 10770c1bc742181ded4930842b46e9507372f0b1b963James Dong 10780c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// IStage 2 rows 0to3 x 1/2 10790c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qYg1, qYh1, qYh2 ;// (h1+h2)/2 10800c1bc742181ded4930842b46e9507372f0b1b963James Dong VHSUB qYg2, qYh1, qYh2 ;// (h1-h2)/2 10810c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qYg0, qYh0, qYh3 ;// (h0+h3)/2 10820c1bc742181ded4930842b46e9507372f0b1b963James Dong VHSUB qYg3, qYh0, qYh3 ;// (h0-h3)/2 10830c1bc742181ded4930842b46e9507372f0b1b963James Dong 10840c1bc742181ded4930842b46e9507372f0b1b963James Dong 10850c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// IStage 1 all rows 10860c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qYf3, qYg3, qYg4 10870c1bc742181ded4930842b46e9507372f0b1b963James Dong VHSUB qYf4, qYg3, qYg4 10880c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qYf2, qYg2, qYg5 10890c1bc742181ded4930842b46e9507372f0b1b963James Dong VHSUB qYf5, qYg2, qYg5 10900c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qYf1, qYg1, qYg6 10910c1bc742181ded4930842b46e9507372f0b1b963James Dong VHSUB qYf6, qYg1, qYg6 10920c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qYf0, qYg0, qYg7 10930c1bc742181ded4930842b46e9507372f0b1b963James Dong VHSUB qYf7, qYg0, qYg7 10940c1bc742181ded4930842b46e9507372f0b1b963James Dong 10950c1bc742181ded4930842b46e9507372f0b1b963James DongYTR0 EQU Src0 10960c1bc742181ded4930842b46e9507372f0b1b963James DongYTR1 EQU Src4 10970c1bc742181ded4930842b46e9507372f0b1b963James DongYTR2 EQU Src1 10980c1bc742181ded4930842b46e9507372f0b1b963James DongYTR3 EQU Src2 10990c1bc742181ded4930842b46e9507372f0b1b963James DongYTR4 EQU Src7 11000c1bc742181ded4930842b46e9507372f0b1b963James DongYTR5 EQU Src5 11010c1bc742181ded4930842b46e9507372f0b1b963James DongYTR6 EQU Tmp 11020c1bc742181ded4930842b46e9507372f0b1b963James DongYTR7 EQU Src6 11030c1bc742181ded4930842b46e9507372f0b1b963James DongYTRt EQU Src3 11040c1bc742181ded4930842b46e9507372f0b1b963James Dong 11050c1bc742181ded4930842b46e9507372f0b1b963James DongqC0 QN YTR0.S32 ;// for YTRpose 11060c1bc742181ded4930842b46e9507372f0b1b963James DongqC1 QN YTR1.S32 11070c1bc742181ded4930842b46e9507372f0b1b963James DongqC2 QN YTR2.S32 11080c1bc742181ded4930842b46e9507372f0b1b963James DongqC3 QN YTR3.S32 11090c1bc742181ded4930842b46e9507372f0b1b963James DongqC4 QN YTR4.S32 11100c1bc742181ded4930842b46e9507372f0b1b963James DongqC5 QN YTR5.S32 11110c1bc742181ded4930842b46e9507372f0b1b963James DongqC6 QN YTR6.S32 11120c1bc742181ded4930842b46e9507372f0b1b963James DongqC7 QN YTR7.S32 11130c1bc742181ded4930842b46e9507372f0b1b963James Dong 11140c1bc742181ded4930842b46e9507372f0b1b963James DongdD0 DN YTR0*2+1 ;// for using VSWP 11150c1bc742181ded4930842b46e9507372f0b1b963James DongdD1 DN YTR1*2+1 11160c1bc742181ded4930842b46e9507372f0b1b963James DongdD2 DN YTR2*2+1 11170c1bc742181ded4930842b46e9507372f0b1b963James DongdD3 DN YTR3*2+1 11180c1bc742181ded4930842b46e9507372f0b1b963James DongdD4 DN YTR4*2 11190c1bc742181ded4930842b46e9507372f0b1b963James DongdD5 DN YTR5*2 11200c1bc742181ded4930842b46e9507372f0b1b963James DongdD6 DN YTR6*2 11210c1bc742181ded4930842b46e9507372f0b1b963James DongdD7 DN YTR7*2 11220c1bc742181ded4930842b46e9507372f0b1b963James Dong 11230c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN qYf0, qYf1 11240c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN qYf2, qYf3 11250c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN qYf4, qYf5 11260c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN qYf6, qYf7 11270c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN qC0, qC2 11280c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN qC1, qC3 11290c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN qC4, qC6 11300c1bc742181ded4930842b46e9507372f0b1b963James Dong VTRN qC5, qC7 11310c1bc742181ded4930842b46e9507372f0b1b963James Dong VSWP dD0, dD4 11320c1bc742181ded4930842b46e9507372f0b1b963James Dong VSWP dD1, dD5 11330c1bc742181ded4930842b46e9507372f0b1b963James Dong VSWP dD2, dD6 11340c1bc742181ded4930842b46e9507372f0b1b963James Dong VSWP dD3, dD7 11350c1bc742181ded4930842b46e9507372f0b1b963James Dong 11360c1bc742181ded4930842b46e9507372f0b1b963James Dong 11370c1bc742181ded4930842b46e9507372f0b1b963James DongdYf0U8 DN YTR0*2.U8 11380c1bc742181ded4930842b46e9507372f0b1b963James DongdYf1U8 DN YTR1*2.U8 11390c1bc742181ded4930842b46e9507372f0b1b963James DongdYf2U8 DN YTR2*2.U8 11400c1bc742181ded4930842b46e9507372f0b1b963James DongdYf3U8 DN YTR3*2.U8 11410c1bc742181ded4930842b46e9507372f0b1b963James DongdYf4U8 DN YTR4*2.U8 11420c1bc742181ded4930842b46e9507372f0b1b963James DongdYf5U8 DN YTR5*2.U8 11430c1bc742181ded4930842b46e9507372f0b1b963James DongdYf6U8 DN YTR6*2.U8 11440c1bc742181ded4930842b46e9507372f0b1b963James DongdYf7U8 DN YTR7*2.U8 11450c1bc742181ded4930842b46e9507372f0b1b963James Dong 11460c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 11470c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Do saturation if outsize is other than S16 11480c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 11490c1bc742181ded4930842b46e9507372f0b1b963James Dong 11500c1bc742181ded4930842b46e9507372f0b1b963James Dong IF ("$outsize"="u8") 11510c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Output range [0-255] 11520c1bc742181ded4930842b46e9507372f0b1b963James Dong VQMOVN dYf0U8, qYf0 11530c1bc742181ded4930842b46e9507372f0b1b963James Dong VQMOVN dYf1U8, qYf1 11540c1bc742181ded4930842b46e9507372f0b1b963James Dong VQMOVN dYf2U8, qYf2 11550c1bc742181ded4930842b46e9507372f0b1b963James Dong VQMOVN dYf3U8, qYf3 11560c1bc742181ded4930842b46e9507372f0b1b963James Dong VQMOVN dYf4U8, qYf4 11570c1bc742181ded4930842b46e9507372f0b1b963James Dong VQMOVN dYf5U8, qYf5 11580c1bc742181ded4930842b46e9507372f0b1b963James Dong VQMOVN dYf6U8, qYf6 11590c1bc742181ded4930842b46e9507372f0b1b963James Dong VQMOVN dYf7U8, qYf7 11600c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 11610c1bc742181ded4930842b46e9507372f0b1b963James Dong 11620c1bc742181ded4930842b46e9507372f0b1b963James Dong IF ("$outsize"="s9") 11630c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Output range [-256 to +255] 11640c1bc742181ded4930842b46e9507372f0b1b963James Dong VQSHL qYf0, qYf0, #16-9 11650c1bc742181ded4930842b46e9507372f0b1b963James Dong VQSHL qYf1, qYf1, #16-9 11660c1bc742181ded4930842b46e9507372f0b1b963James Dong VQSHL qYf2, qYf2, #16-9 11670c1bc742181ded4930842b46e9507372f0b1b963James Dong VQSHL qYf3, qYf3, #16-9 11680c1bc742181ded4930842b46e9507372f0b1b963James Dong VQSHL qYf4, qYf4, #16-9 11690c1bc742181ded4930842b46e9507372f0b1b963James Dong VQSHL qYf5, qYf5, #16-9 11700c1bc742181ded4930842b46e9507372f0b1b963James Dong VQSHL qYf6, qYf6, #16-9 11710c1bc742181ded4930842b46e9507372f0b1b963James Dong VQSHL qYf7, qYf7, #16-9 11720c1bc742181ded4930842b46e9507372f0b1b963James Dong 11730c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHR qYf0, qYf0, #16-9 11740c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHR qYf1, qYf1, #16-9 11750c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHR qYf2, qYf2, #16-9 11760c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHR qYf3, qYf3, #16-9 11770c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHR qYf4, qYf4, #16-9 11780c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHR qYf5, qYf5, #16-9 11790c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHR qYf6, qYf6, #16-9 11800c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHR qYf7, qYf7, #16-9 11810c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 11820c1bc742181ded4930842b46e9507372f0b1b963James Dong 11830c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Store output depending on the Stride size 11840c1bc742181ded4930842b46e9507372f0b1b963James Dong IF "$stride"="s" 11850c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 qYf0, [pDest @64], Stride 11860c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 qYf1, [pDest @64], Stride 11870c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 qYf2, [pDest @64], Stride 11880c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 qYf3, [pDest @64], Stride 11890c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 qYf4, [pDest @64], Stride 11900c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 qYf5, [pDest @64], Stride 11910c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 qYf6, [pDest @64], Stride 11920c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 qYf7, [pDest @64] 11930c1bc742181ded4930842b46e9507372f0b1b963James Dong ELSE 11940c1bc742181ded4930842b46e9507372f0b1b963James Dong IF ("$outsize"="u8") 11950c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dYf0U8, [pDest @64], #8 11960c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dYf1U8, [pDest @64], #8 11970c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dYf2U8, [pDest @64], #8 11980c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dYf3U8, [pDest @64], #8 11990c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dYf4U8, [pDest @64], #8 12000c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dYf5U8, [pDest @64], #8 12010c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dYf6U8, [pDest @64], #8 12020c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 dYf7U8, [pDest @64] 12030c1bc742181ded4930842b46e9507372f0b1b963James Dong ELSE 12040c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// ("$outsize"="s9") or ("$outsize"="s16") 12050c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 qYf0, [pDest @64], #16 12060c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 qYf1, [pDest @64], #16 12070c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 qYf2, [pDest @64], #16 12080c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 qYf3, [pDest @64], #16 12090c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 qYf4, [pDest @64], #16 12100c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 qYf5, [pDest @64], #16 12110c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 qYf6, [pDest @64], #16 12120c1bc742181ded4930842b46e9507372f0b1b963James Dong VST1 qYf7, [pDest @64] 12130c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 12140c1bc742181ded4930842b46e9507372f0b1b963James Dong 12150c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 12160c1bc742181ded4930842b46e9507372f0b1b963James Dong 12170c1bc742181ded4930842b46e9507372f0b1b963James Dong 12180c1bc742181ded4930842b46e9507372f0b1b963James Dong 12190c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF ;// CortexA8 12200c1bc742181ded4930842b46e9507372f0b1b963James Dong 12210c1bc742181ded4930842b46e9507372f0b1b963James Dong 12220c1bc742181ded4930842b46e9507372f0b1b963James Dong 12230c1bc742181ded4930842b46e9507372f0b1b963James Dong MEND 12240c1bc742181ded4930842b46e9507372f0b1b963James Dong 12250c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Scale TWO input rows with TWO rows of 16 bit scale values 12260c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 12270c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// This macro is used by M_IDCT_PRESCALE16 to pre-scale one row 12280c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// input (Eight input values) with one row of scale values. Also 12290c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Loads next scale values from pScale, if $LastRow flag is not set. 12300c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 12310c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Input Registers: 12320c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 12330c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// $dAlo - Input D register with first four S16 values of row n 12340c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// $dAhi - Input D register with next four S16 values of row n 12350c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// $dBlo - Input D register with first four S16 values of row n+1 12360c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// $dBhi - Input D register with next four S16 values of row n+1 12370c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// pScale - Pointer to next row of scale values 12380c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// qT0lo - Temporary scratch register 12390c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// qT0hi - Temporary scratch register 12400c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// qT1lo - Temporary scratch register 12410c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// qT1hi - Temporary scratch register 12420c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// dScale1lo - Scale value of row n 12430c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// dScale1hi - Scale value of row n 12440c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// dScale2lo - Scale value of row n+1 12450c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// dScale2hi - Scale value of row n+1 12460c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 12470c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Input Flag 12480c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 12490c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// $LastRow - Flag to indicate whether current row is last row 12500c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 12510c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Output Registers: 12520c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 12530c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// $dAlo - Scaled output values (first four S16 of row n) 12540c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// $dAhi - Scaled output values (next four S16 of row n) 12550c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// $dBlo - Scaled output values (first four S16 of row n+1) 12560c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// $dBhi - Scaled output values (next four S16 of row n+1) 12570c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// qScale1 - Scale values for next row 12580c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// qScale2 - Scale values for next row+1 12590c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// pScale - Pointer to next row of scale values 12600c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 12610c1bc742181ded4930842b46e9507372f0b1b963James Dong MACRO 12620c1bc742181ded4930842b46e9507372f0b1b963James Dong M_IDCT_SCALE16 $dAlo, $dAhi, $dBlo, $dBhi, $LastRow 12630c1bc742181ded4930842b46e9507372f0b1b963James Dong VMULL qT0lo, $dAlo, dScale1lo 12640c1bc742181ded4930842b46e9507372f0b1b963James Dong VMULL qT0hi, $dAhi, dScale1hi 12650c1bc742181ded4930842b46e9507372f0b1b963James Dong VMULL qT1lo, $dBlo, dScale2lo 12660c1bc742181ded4930842b46e9507372f0b1b963James Dong VMULL qT1hi, $dBhi, dScale2hi 12670c1bc742181ded4930842b46e9507372f0b1b963James Dong IF "$LastRow"="0" 12680c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 qScale1, [pScale], #16 ;// Load scale for row n+1 12690c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 qScale2, [pScale], #16 ;// Load scale for row n+2 12700c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 12710c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRSHRN $dAlo, qT0lo, #12 12720c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRSHRN $dAhi, qT0hi, #12 12730c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRSHRN $dBlo, qT1lo, #12 12740c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRSHRN $dBhi, qT1hi, #12 12750c1bc742181ded4930842b46e9507372f0b1b963James Dong MEND 12760c1bc742181ded4930842b46e9507372f0b1b963James Dong 12770c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Scale 8x8 block input values with 16 bit scale values 12780c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 12790c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// This macro is used to pre-scale block of 8x8 input. 12800c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// This also do the Ist stage transformations of IDCT. 12810c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 12820c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Input Registers: 12830c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 12840c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// dXjnlo - n th input D register with first four S16 values 12850c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// dXjnhi - n th input D register with next four S16 values 12860c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// qXjn - n th input Q register with eight S16 values 12870c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// pScale - Pointer to scale values 12880c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 12890c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Output Registers: 12900c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 12910c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// qXin - n th output Q register with eight S16 output values of 1st stage 12920c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 12930c1bc742181ded4930842b46e9507372f0b1b963James Dong MACRO 12940c1bc742181ded4930842b46e9507372f0b1b963James Dong M_IDCT_PRESCALE16 12950c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 qScale1, [pScale], #16 ;// Load Pre scale for row 0 12960c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 qScale2, [pScale], #16 ;// Load Pre scale for row 0 12970c1bc742181ded4930842b46e9507372f0b1b963James Dong M_IDCT_SCALE16 dXj0lo, dXj0hi, dXj1lo, dXj1hi, 0 ;// Pre scale row 0 & 1 12980c1bc742181ded4930842b46e9507372f0b1b963James Dong M_IDCT_SCALE16 dXj2lo, dXj2hi, dXj3lo, dXj3hi, 0 12990c1bc742181ded4930842b46e9507372f0b1b963James Dong M_IDCT_SCALE16 dXj4lo, dXj4hi, dXj5lo, dXj5hi, 0 13000c1bc742181ded4930842b46e9507372f0b1b963James Dong M_IDCT_SCALE16 dXj6lo, dXj6hi, dXj7lo, dXj7hi, 1 13010c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qXi5, qXj1, qXj7 ;// (j1+j7)/2 13020c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qXi6, qXj1, qXj7 ;// j1-j7 13030c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR pSrc, =armCOMM_IDCTCoef ;// Address of DCT inverse AAN constants 13040c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qXi3, qXj2, qXj6 ;// (j2+j6)/2 13050c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qXi2, qXj2, qXj6 ;// j2-j6 13060c1bc742181ded4930842b46e9507372f0b1b963James Dong VLDR dCoefs, [pSrc] ;// Load DCT inverse AAN constants 13070c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qXi7, qXj5, qXj3 ;// (j5+j3)/2 13080c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qXi4, qXj5, qXj3 ;// j5-j3 13090c1bc742181ded4930842b46e9507372f0b1b963James Dong MEND 13100c1bc742181ded4930842b46e9507372f0b1b963James Dong 13110c1bc742181ded4930842b46e9507372f0b1b963James Dong 13120c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Scale 8x8 block input values with 32 bit scale values 13130c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 13140c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// This macro is used to pre-scale block of 8x8 input. 13150c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// This also do the Ist stage transformations of IDCT. 13160c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 13170c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Input Registers: 13180c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 13190c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// dXjnlo - n th input D register with first four S16 values 13200c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// dXjnhi - n th input D register with next four S16 values 13210c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// qXjn - n th input Q register with eight S16 values 13220c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// pScale - Pointer to 32bit scale values in Q23 format 13230c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 13240c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Output Registers: 13250c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 13260c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// dXinlo - n th output D register with first four S16 output values of 1st stage 13270c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// dXinhi - n th output D register with next four S16 output values of 1st stage 13280c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 13290c1bc742181ded4930842b46e9507372f0b1b963James Dong MACRO 13300c1bc742181ded4930842b46e9507372f0b1b963James Dong M_IDCT_PRESCALE32 13310c1bc742181ded4930842b46e9507372f0b1b963James DongqScale0lo QN 0.S32 13320c1bc742181ded4930842b46e9507372f0b1b963James DongqScale0hi QN 1.S32 13330c1bc742181ded4930842b46e9507372f0b1b963James DongqScale1lo QN 2.S32 13340c1bc742181ded4930842b46e9507372f0b1b963James DongqScale1hi QN 3.S32 13350c1bc742181ded4930842b46e9507372f0b1b963James DongqScale2lo QN qScale1lo 13360c1bc742181ded4930842b46e9507372f0b1b963James DongqScale2hi QN qScale1hi 13370c1bc742181ded4930842b46e9507372f0b1b963James DongqScale3lo QN qScale1lo 13380c1bc742181ded4930842b46e9507372f0b1b963James DongqScale3hi QN qScale1hi 13390c1bc742181ded4930842b46e9507372f0b1b963James DongqScale4lo QN qScale1lo 13400c1bc742181ded4930842b46e9507372f0b1b963James DongqScale4hi QN qScale1hi 13410c1bc742181ded4930842b46e9507372f0b1b963James DongqScale5lo QN qScale0lo 13420c1bc742181ded4930842b46e9507372f0b1b963James DongqScale5hi QN qScale0hi 13430c1bc742181ded4930842b46e9507372f0b1b963James DongqScale6lo QN qScale0lo 13440c1bc742181ded4930842b46e9507372f0b1b963James DongqScale6hi QN qScale0hi 13450c1bc742181ded4930842b46e9507372f0b1b963James DongqScale7lo QN qScale0lo 13460c1bc742181ded4930842b46e9507372f0b1b963James DongqScale7hi QN qScale0hi 13470c1bc742181ded4930842b46e9507372f0b1b963James Dong 13480c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc0lo QN 4.S32 13490c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc0hi QN 5.S32 13500c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc1lo QN 6.S32 13510c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc1hi QN Src4.S32 13520c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc2lo QN qSrc0lo 13530c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc2hi QN qSrc0hi 13540c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc3lo QN qSrc0lo 13550c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc3hi QN qSrc0hi 13560c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc4lo QN qSrc0lo 13570c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc4hi QN qSrc0hi 13580c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc5lo QN qSrc1lo 13590c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc5hi QN qSrc1hi 13600c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc6lo QN qSrc1lo 13610c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc6hi QN qSrc1hi 13620c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc7lo QN qSrc0lo 13630c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc7hi QN qSrc0hi 13640c1bc742181ded4930842b46e9507372f0b1b963James Dong 13650c1bc742181ded4930842b46e9507372f0b1b963James DongqRes17lo QN qScale0lo 13660c1bc742181ded4930842b46e9507372f0b1b963James DongqRes17hi QN qScale0hi 13670c1bc742181ded4930842b46e9507372f0b1b963James DongqRes26lo QN qScale0lo 13680c1bc742181ded4930842b46e9507372f0b1b963James DongqRes26hi QN qScale0hi 13690c1bc742181ded4930842b46e9507372f0b1b963James DongqRes53lo QN qScale0lo 13700c1bc742181ded4930842b46e9507372f0b1b963James DongqRes53hi QN qScale0hi 13710c1bc742181ded4930842b46e9507372f0b1b963James Dong 13720c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD pTemp, pScale, #4*8*7 ;// Address of pScale[7] 13730c1bc742181ded4930842b46e9507372f0b1b963James Dong 13740c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Row 0 13750c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 {qScale0lo, qScale0hi}, [pScale]! 13760c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHLL qSrc0lo, dXj0lo, #(12-1) 13770c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHLL qSrc0hi, dXj0hi, #(12-1) 13780c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 {qScale1lo, qScale1hi}, [pScale]! 13790c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRDMULH qSrc0lo, qScale0lo, qSrc0lo 13800c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRDMULH qSrc0hi, qScale0hi, qSrc0hi 13810c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 {qScale7lo, qScale7hi}, [pTemp]! 13820c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHLL qSrc1lo, dXj1lo, #(12-1) 13830c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHLL qSrc1hi, dXj1hi, #(12-1) 13840c1bc742181ded4930842b46e9507372f0b1b963James Dong VMOVN dXi0lo, qSrc0lo ;// Output i0 13850c1bc742181ded4930842b46e9507372f0b1b963James Dong VMOVN dXi0hi, qSrc0hi 13860c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHLL qSrc7lo, dXj7lo, #(12-1) 13870c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHLL qSrc7hi, dXj7hi, #(12-1) 13880c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pTemp, pTemp, #((16*2)+(4*8*1)) 13890c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRDMULH qSrc1lo, qScale1lo, qSrc1lo 13900c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRDMULH qSrc1hi, qScale1hi, qSrc1hi 13910c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRDMULH qSrc7lo, qScale7lo, qSrc7lo 13920c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRDMULH qSrc7hi, qScale7hi, qSrc7hi 13930c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 {qScale2lo, qScale2hi}, [pScale]! 13940c1bc742181ded4930842b46e9507372f0b1b963James Dong 13950c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Row 1 & 7 13960c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qRes17lo, qSrc1lo, qSrc7lo ;// (j1+j7)/2 13970c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qRes17hi, qSrc1hi, qSrc7hi ;// (j1+j7)/2 13980c1bc742181ded4930842b46e9507372f0b1b963James Dong VMOVN dXi5lo, qRes17lo ;// Output i5 13990c1bc742181ded4930842b46e9507372f0b1b963James Dong VMOVN dXi5hi, qRes17hi 14000c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qRes17lo, qSrc1lo, qSrc7lo ;// j1-j7 14010c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qRes17hi, qSrc1hi, qSrc7hi ;// j1-j7 14020c1bc742181ded4930842b46e9507372f0b1b963James Dong VMOVN dXi6lo, qRes17lo ;// Output i6 14030c1bc742181ded4930842b46e9507372f0b1b963James Dong VMOVN dXi6hi, qRes17hi 14040c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHLL qSrc2lo, dXj2lo, #(12-1) 14050c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHLL qSrc2hi, dXj2hi, #(12-1) 14060c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 {qScale6lo, qScale6hi}, [pTemp]! 14070c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHLL qSrc6lo, dXj6lo, #(12-1) 14080c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHLL qSrc6hi, dXj6hi, #(12-1) 14090c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pTemp, pTemp, #((16*2)+(4*8*1)) 14100c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRDMULH qSrc2lo, qScale2lo, qSrc2lo 14110c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRDMULH qSrc2hi, qScale2hi, qSrc2hi 14120c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRDMULH qSrc6lo, qScale6lo, qSrc6lo 14130c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRDMULH qSrc6hi, qScale6hi, qSrc6hi 14140c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 {qScale3lo, qScale3hi}, [pScale]! 14150c1bc742181ded4930842b46e9507372f0b1b963James Dong 14160c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Row 2 & 6 14170c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qRes26lo, qSrc2lo, qSrc6lo ;// (j2+j6)/2 14180c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qRes26hi, qSrc2hi, qSrc6hi ;// (j2+j6)/2 14190c1bc742181ded4930842b46e9507372f0b1b963James Dong VMOVN dXi3lo, qRes26lo ;// Output i3 14200c1bc742181ded4930842b46e9507372f0b1b963James Dong VMOVN dXi3hi, qRes26hi 14210c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qRes26lo, qSrc2lo, qSrc6lo ;// j2-j6 14220c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qRes26hi, qSrc2hi, qSrc6hi ;// j2-j6 14230c1bc742181ded4930842b46e9507372f0b1b963James Dong VMOVN dXi2lo, qRes26lo ;// Output i2 14240c1bc742181ded4930842b46e9507372f0b1b963James Dong VMOVN dXi2hi, qRes26hi 14250c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHLL qSrc3lo, dXj3lo, #(12-1) 14260c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHLL qSrc3hi, dXj3hi, #(12-1) 14270c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 {qScale5lo, qScale5hi}, [pTemp]! 14280c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHLL qSrc5lo, dXj5lo, #(12-1) 14290c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHLL qSrc5hi, dXj5hi, #(12-1) 14300c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRDMULH qSrc3lo, qScale3lo, qSrc3lo 14310c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRDMULH qSrc3hi, qScale3hi, qSrc3hi 14320c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRDMULH qSrc5lo, qScale5lo, qSrc5lo 14330c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRDMULH qSrc5hi, qScale5hi, qSrc5hi 14340c1bc742181ded4930842b46e9507372f0b1b963James Dong 14350c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Row 3 & 5 14360c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qRes53lo, qSrc5lo, qSrc3lo ;// (j5+j3)/2 14370c1bc742181ded4930842b46e9507372f0b1b963James Dong VHADD qRes53hi, qSrc5hi, qSrc3hi ;// (j5+j3)/2 14380c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB pSrc, pSrc, #16*2*2 14390c1bc742181ded4930842b46e9507372f0b1b963James Dong VMOVN dXi7lo, qRes53lo ;// Output i7 14400c1bc742181ded4930842b46e9507372f0b1b963James Dong VMOVN dXi7hi, qRes53hi 14410c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qRes53lo, qSrc5lo, qSrc3lo ;// j5-j3 14420c1bc742181ded4930842b46e9507372f0b1b963James Dong VSUB qRes53hi, qSrc5hi, qSrc3hi ;// j5-j3 14430c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 qXj4, [pSrc @64] 14440c1bc742181ded4930842b46e9507372f0b1b963James Dong VMOVN dXi4lo, qRes53lo ;// Output i4 14450c1bc742181ded4930842b46e9507372f0b1b963James Dong VMOVN dXi4hi, qRes53hi 14460c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHLL qSrc4lo, dXj4lo, #(12-1) 14470c1bc742181ded4930842b46e9507372f0b1b963James Dong VSHLL qSrc4hi, dXj4hi, #(12-1) 14480c1bc742181ded4930842b46e9507372f0b1b963James Dong VLD1 {qScale4lo, qScale4hi}, [pScale] 14490c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR pSrc, =armCOMM_IDCTCoef ;// Address of DCT inverse AAN constants 14500c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRDMULH qSrc4lo, qScale4lo, qSrc4lo 14510c1bc742181ded4930842b46e9507372f0b1b963James Dong VQRDMULH qSrc4hi, qScale4hi, qSrc4hi 14520c1bc742181ded4930842b46e9507372f0b1b963James Dong VLDR dCoefs, [pSrc] ;// Load DCT inverse AAN constants 14530c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Row 4 14540c1bc742181ded4930842b46e9507372f0b1b963James Dong VMOVN dXi1lo, qSrc4lo ;// Output i1 14550c1bc742181ded4930842b46e9507372f0b1b963James Dong VMOVN dXi1hi, qSrc4hi 14560c1bc742181ded4930842b46e9507372f0b1b963James Dong 14570c1bc742181ded4930842b46e9507372f0b1b963James Dong MEND 14580c1bc742181ded4930842b46e9507372f0b1b963James Dong 14590c1bc742181ded4930842b46e9507372f0b1b963James Dong END 1460