10c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
20c1bc742181ded4930842b46e9507372f0b1b963James Dong;// This confidential and proprietary software may be used only as
30c1bc742181ded4930842b46e9507372f0b1b963James Dong;// authorised by a licensing agreement from ARM Limited
40c1bc742181ded4930842b46e9507372f0b1b963James Dong;//   (C) COPYRIGHT 2004 ARM Limited
50c1bc742181ded4930842b46e9507372f0b1b963James Dong;//       ALL RIGHTS RESERVED
60c1bc742181ded4930842b46e9507372f0b1b963James Dong;// The entire notice above must be reproduced on all authorised
70c1bc742181ded4930842b46e9507372f0b1b963James Dong;// copies and copies may only be made to the extent permitted
80c1bc742181ded4930842b46e9507372f0b1b963James Dong;// by a licensing agreement from ARM Limited.
90c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
100c1bc742181ded4930842b46e9507372f0b1b963James Dong;// IDCT_s.s
110c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
120c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Inverse DCT module
130c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
140c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
150c1bc742181ded4930842b46e9507372f0b1b963James Dong;// ALGORITHM DESCRIPTION
160c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
170c1bc742181ded4930842b46e9507372f0b1b963James Dong;// The 8x8 2D IDCT is performed by calculating a 1D IDCT for each
180c1bc742181ded4930842b46e9507372f0b1b963James Dong;// column and then a 1D IDCT for each row.
190c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
200c1bc742181ded4930842b46e9507372f0b1b963James Dong;// The 8-point 1D IDCT is defined by
210c1bc742181ded4930842b46e9507372f0b1b963James Dong;//   f(x) = (C(0)*T(0)*c(0,x) + ... + C(7)*T(7)*c(7,x))/2
220c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
230c1bc742181ded4930842b46e9507372f0b1b963James Dong;//   C(u) = 1/sqrt(2) if u=0 or 1 if u!=0
240c1bc742181ded4930842b46e9507372f0b1b963James Dong;//   c(u,x) = cos( (2x+1)*u*pi/16 )
250c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
260c1bc742181ded4930842b46e9507372f0b1b963James Dong;// We compute the 8-point 1D IDCT using the reverse of
270c1bc742181ded4930842b46e9507372f0b1b963James Dong;// the Arai-Agui-Nakajima flow graph which we split into
280c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 5 stages named in reverse order to identify with the
290c1bc742181ded4930842b46e9507372f0b1b963James Dong;// forward DCT. Direct inversion of the forward formulae
300c1bc742181ded4930842b46e9507372f0b1b963James Dong;// in file FDCT_s.s gives:
310c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
320c1bc742181ded4930842b46e9507372f0b1b963James Dong;// IStage 5:   j(u) = T(u)*A(u)  [ A(u)=4*C(u)*c(u,0) ]
330c1bc742181ded4930842b46e9507372f0b1b963James Dong;//             [ A(0) = 2*sqrt(2)
340c1bc742181ded4930842b46e9507372f0b1b963James Dong;//               A(u) = 4*cos(u*pi/16)  for (u!=0) ]
350c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
360c1bc742181ded4930842b46e9507372f0b1b963James Dong;// IStage 4:   i0 = j0             i1 = j4
370c1bc742181ded4930842b46e9507372f0b1b963James Dong;//             i3 = (j2+j6)/2      i2 = (j2-j6)/2
380c1bc742181ded4930842b46e9507372f0b1b963James Dong;//             i7 = (j5+j3)/2      i4 = (j5-j3)/2
390c1bc742181ded4930842b46e9507372f0b1b963James Dong;//             i5 = (j1+j7)/2      i6 = (j1-j7)/2
400c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
410c1bc742181ded4930842b46e9507372f0b1b963James Dong;// IStage 3:   h0 = (i0+i1)/2      h1 = (i0-i1)/2
420c1bc742181ded4930842b46e9507372f0b1b963James Dong;//             h2 = (i2*sqrt2)-i3  h3 = i3
430c1bc742181ded4930842b46e9507372f0b1b963James Dong;//             h4 =  cos(pi/8)*i4 + sin(pi/8)*i6
440c1bc742181ded4930842b46e9507372f0b1b963James Dong;//             h6 = -sin(pi/8)*i4 + cos(pi/8)*i6
450c1bc742181ded4930842b46e9507372f0b1b963James Dong;//             [ The above two lines rotate by -(pi/8) ]
460c1bc742181ded4930842b46e9507372f0b1b963James Dong;//             h5 = (i5-i7)/sqrt2  h7 = (i5+i7)/2
470c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
480c1bc742181ded4930842b46e9507372f0b1b963James Dong;// IStage 2:   g0 = (h0+h3)/2      g3 = (h0-h3)/2
490c1bc742181ded4930842b46e9507372f0b1b963James Dong;//             g1 = (h1+h2)/2      g2 = (h1-h2)/2
500c1bc742181ded4930842b46e9507372f0b1b963James Dong;//             g7 = h7             g6 = h6 - h7
510c1bc742181ded4930842b46e9507372f0b1b963James Dong;//             g5 = h5 - g6        g4 = h4 - g5
520c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
530c1bc742181ded4930842b46e9507372f0b1b963James Dong;// IStage 1:   f0 = (g0+g7)/2      f7 = (g0-g7)/2
540c1bc742181ded4930842b46e9507372f0b1b963James Dong;//             f1 = (g1+g6)/2      f6 = (g1-g6)/2
550c1bc742181ded4930842b46e9507372f0b1b963James Dong;//             f2 = (g2+g5)/2      f5 = (g2-g5)/2
560c1bc742181ded4930842b46e9507372f0b1b963James Dong;//             f3 = (g3+g4)/2      f4 = (g3-g4)/2
570c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
580c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Note that most coefficients are halved 3 times during the
590c1bc742181ded4930842b46e9507372f0b1b963James Dong;// above calculation. We can rescale the algorithm dividing
600c1bc742181ded4930842b46e9507372f0b1b963James Dong;// the input by 8 to remove the halvings.
610c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
620c1bc742181ded4930842b46e9507372f0b1b963James Dong;// IStage 5:   j(u) = T(u)*A(u)/8
630c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
640c1bc742181ded4930842b46e9507372f0b1b963James Dong;// IStage 4:   i0 = j0             i1 = j4
650c1bc742181ded4930842b46e9507372f0b1b963James Dong;//             i3 = j2 + j6        i2 = j2 - j6
660c1bc742181ded4930842b46e9507372f0b1b963James Dong;//             i7 = j5 + j3        i4 = j5 - j3
670c1bc742181ded4930842b46e9507372f0b1b963James Dong;//             i5 = j1 + j7        i6 = j1 - j7
680c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
690c1bc742181ded4930842b46e9507372f0b1b963James Dong;// IStage 3:   h0 = i0 + i1        h1 = i0 - i1
700c1bc742181ded4930842b46e9507372f0b1b963James Dong;//             h2 = (i2*sqrt2)-i3  h3 = i3
710c1bc742181ded4930842b46e9507372f0b1b963James Dong;//             h4 = 2*( cos(pi/8)*i4 + sin(pi/8)*i6)
720c1bc742181ded4930842b46e9507372f0b1b963James Dong;//             h6 = 2*(-sin(pi/8)*i4 + cos(pi/8)*i6)
730c1bc742181ded4930842b46e9507372f0b1b963James Dong;//             h5 = (i5-i7)*sqrt2  h7 = i5 + i7
740c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
750c1bc742181ded4930842b46e9507372f0b1b963James Dong;// IStage 2:   g0 = h0 + h3        g3 = h0 - h3
760c1bc742181ded4930842b46e9507372f0b1b963James Dong;//             g1 = h1 + h2        g2 = h1 - h2
770c1bc742181ded4930842b46e9507372f0b1b963James Dong;//             g7 = h7             g6 = h6 - h7
780c1bc742181ded4930842b46e9507372f0b1b963James Dong;//             g5 = h5 - g6        g4 = h4 - g5
790c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
800c1bc742181ded4930842b46e9507372f0b1b963James Dong;// IStage 1:   f0 = g0 + g7        f7 = g0 - g7
810c1bc742181ded4930842b46e9507372f0b1b963James Dong;//             f1 = g1 + g6        f6 = g1 - g6
820c1bc742181ded4930842b46e9507372f0b1b963James Dong;//             f2 = g2 + g5        f5 = g2 - g5
830c1bc742181ded4930842b46e9507372f0b1b963James Dong;//             f3 = g3 + g4        f4 = g3 - g4
840c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
850c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Note:
860c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 1. The scaling by A(u)/8 can often be combined with inverse
870c1bc742181ded4930842b46e9507372f0b1b963James Dong;//    quantization. The column and row scalings can be combined.
880c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 2. The flowgraph in the AAN paper has h4,g6 negated compared
890c1bc742181ded4930842b46e9507372f0b1b963James Dong;//    to the above code but is otherwise identical.
900c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 3. The rotation by -pi/8 can be peformed using three multiplies
910c1bc742181ded4930842b46e9507372f0b1b963James Dong;//    Eg  c*i4+s*i6 = (i6-i4)*s + (c+s)*i4
920c1bc742181ded4930842b46e9507372f0b1b963James Dong;//       -s*i4+c*i6 = (i6-i4)*s + (c-s)*i6
930c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 4. If |T(u)|<=1 then from the IDCT definition,
940c1bc742181ded4930842b46e9507372f0b1b963James Dong;//    |f(x)| <= ((1/sqrt2) + |c(1,x)| + .. + |c(7,x)|)/2
950c1bc742181ded4930842b46e9507372f0b1b963James Dong;//            = ((1/sqrt2) + cos(pi/16) + ... + cos(7*pi/16))/2
960c1bc742181ded4930842b46e9507372f0b1b963James Dong;//            = ((1/sqrt2) + (cot(pi/32)-1)/2)/2
970c1bc742181ded4930842b46e9507372f0b1b963James Dong;//            = (1 + cos(pi/16) + cos(2pi/16) + cos(3pi/16))/sqrt(2)
980c1bc742181ded4930842b46e9507372f0b1b963James Dong;//            = (approx)2.64
990c1bc742181ded4930842b46e9507372f0b1b963James Dong;//    So the max gain of the 2D IDCT is ~x7.0 = 3 bits.
1000c1bc742181ded4930842b46e9507372f0b1b963James Dong;//    The table below shows input patterns generating the maximum
1010c1bc742181ded4930842b46e9507372f0b1b963James Dong;//    value of |f(u)| for input in the range |T(x)|<=1. M=-1, P=+1
1020c1bc742181ded4930842b46e9507372f0b1b963James Dong;//    InputPattern      Max |f(x)|
1030c1bc742181ded4930842b46e9507372f0b1b963James Dong;//      PPPPPPPP        |f0| =  2.64
1040c1bc742181ded4930842b46e9507372f0b1b963James Dong;//      PPPMMMMM        |f1| =  2.64
1050c1bc742181ded4930842b46e9507372f0b1b963James Dong;//      PPMMMPPP        |f2| =  2.64
1060c1bc742181ded4930842b46e9507372f0b1b963James Dong;//      PPMMPPMM        |f3| =  2.64
1070c1bc742181ded4930842b46e9507372f0b1b963James Dong;//      PMMPPMMP        |f4| =  2.64
1080c1bc742181ded4930842b46e9507372f0b1b963James Dong;//      PMMPMMPM        |f5| =  2.64
1090c1bc742181ded4930842b46e9507372f0b1b963James Dong;//      PMPPMPMP        |f6| =  2.64
1100c1bc742181ded4930842b46e9507372f0b1b963James Dong;//      PMPMPMPM        |f7| =  2.64
1110c1bc742181ded4930842b46e9507372f0b1b963James Dong;//   Note that this input pattern is the transpose of the
1120c1bc742181ded4930842b46e9507372f0b1b963James Dong;//   corresponding max input patter for the FDCT.
1130c1bc742181ded4930842b46e9507372f0b1b963James Dong
1140c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Arguments
1150c1bc742181ded4930842b46e9507372f0b1b963James Dong
1160c1bc742181ded4930842b46e9507372f0b1b963James DongpSrc    RN 0    ;// source data buffer
1170c1bc742181ded4930842b46e9507372f0b1b963James DongStride  RN 1    ;// destination stride in bytes
1180c1bc742181ded4930842b46e9507372f0b1b963James DongpDest   RN 2    ;// destination data buffer
1190c1bc742181ded4930842b46e9507372f0b1b963James DongpScale  RN 3    ;// pointer to scaling table
1200c1bc742181ded4930842b46e9507372f0b1b963James Dong
1210c1bc742181ded4930842b46e9507372f0b1b963James Dong
1220c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// DCT Inverse Macro
1230c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// The DCT code should be parametrized according
1240c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// to the following inputs:
1250c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// $outsize = "u8"  :  8-bit unsigned data saturated (0 to +255)
1260c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//            "s9"  : 16-bit signed data saturated to 9-bit (-256 to +255)
1270c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//            "s16" : 16-bit signed data not saturated (max size ~+/-14273)
1280c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// $inscale = "s16" : signed 16-bit aan-scale table, Q15 format, with 4 byte alignment
1290c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//            "s32" : signed 32-bit aan-scale table, Q23 format, with 4 byte alignment
1300c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
1310c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Inputs:
1320c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// pSrc   = r0 = Pointer to input data
1330c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//               Range is -256 to +255 (9-bit)
1340c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Stride = r1 = Stride between input lines
1350c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// pDest  = r2 = Pointer to output data
1360c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// pScale = r3 = Pointer to aan-scale table in the format defined by $inscale
1370c1bc742181ded4930842b46e9507372f0b1b963James Dong
1380c1bc742181ded4930842b46e9507372f0b1b963James Dong
1390c1bc742181ded4930842b46e9507372f0b1b963James Dong
1400c1bc742181ded4930842b46e9507372f0b1b963James Dong        MACRO
1410c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_IDCT  $outsize, $inscale, $stride
1420c1bc742181ded4930842b46e9507372f0b1b963James Dong        LCLA    SHIFT
1430c1bc742181ded4930842b46e9507372f0b1b963James Dong
1440c1bc742181ded4930842b46e9507372f0b1b963James Dong
1450c1bc742181ded4930842b46e9507372f0b1b963James Dong        IF ARM1136JS
1460c1bc742181ded4930842b46e9507372f0b1b963James Dong
1470c1bc742181ded4930842b46e9507372f0b1b963James Dong;// REGISTER ALLOCATION
1480c1bc742181ded4930842b46e9507372f0b1b963James Dong;// This is hard since we have 8 values, 9 free registers and each
1490c1bc742181ded4930842b46e9507372f0b1b963James Dong;// butterfly requires a temporary register. We also want to
1500c1bc742181ded4930842b46e9507372f0b1b963James Dong;// maintain register order so we can use LDM/STM. The table below
1510c1bc742181ded4930842b46e9507372f0b1b963James Dong;// summarises the register allocation that meets all these criteria.
1520c1bc742181ded4930842b46e9507372f0b1b963James Dong;// a=1stcol, b=2ndcol, f,g,h,i are dataflow points described above.
1530c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
1540c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r1  a01     g0  h0
1550c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r4  b01 f0  g1  h1  i0
1560c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r5  a23 f1  g2      i1
1570c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r6  b23 f2  g3  h2  i2
1580c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r7  a45 f3      h3  i3
1590c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r8  b45 f4  g4  h4  i4
1600c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r9  a67 f5  g5  h5  i5
1610c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r10 b67 f6  g6  h6  i6
1620c1bc742181ded4930842b46e9507372f0b1b963James Dong;// r11     f7  g7  h7  i7
1630c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
1640c1bc742181ded4930842b46e9507372f0b1b963James Dongra01    RN 1
1650c1bc742181ded4930842b46e9507372f0b1b963James Dongrb01    RN 4
1660c1bc742181ded4930842b46e9507372f0b1b963James Dongra23    RN 5
1670c1bc742181ded4930842b46e9507372f0b1b963James Dongrb23    RN 6
1680c1bc742181ded4930842b46e9507372f0b1b963James Dongra45    RN 7
1690c1bc742181ded4930842b46e9507372f0b1b963James Dongrb45    RN 8
1700c1bc742181ded4930842b46e9507372f0b1b963James Dongra67    RN 9
1710c1bc742181ded4930842b46e9507372f0b1b963James Dongrb67    RN 10
1720c1bc742181ded4930842b46e9507372f0b1b963James Dongrtmp    RN 11
1730c1bc742181ded4930842b46e9507372f0b1b963James DongcsPiBy8 RN 12   ;// [ (Sin(pi/8)@Q15), (Cos(pi/8)@Q15) ]
1740c1bc742181ded4930842b46e9507372f0b1b963James DongLoopRR2 RN 14   ;// [ LoopNumber<<13 , (1/Sqrt(2))@Q15 ]
1750c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Transpose allocation
1760c1bc742181ded4930842b46e9507372f0b1b963James Dongxft     RN ra01
1770c1bc742181ded4930842b46e9507372f0b1b963James Dongxf0     RN rb01
1780c1bc742181ded4930842b46e9507372f0b1b963James Dongxf1     RN ra23
1790c1bc742181ded4930842b46e9507372f0b1b963James Dongxf2     RN rb23
1800c1bc742181ded4930842b46e9507372f0b1b963James Dongxf3     RN ra45
1810c1bc742181ded4930842b46e9507372f0b1b963James Dongxf4     RN rb45
1820c1bc742181ded4930842b46e9507372f0b1b963James Dongxf5     RN ra67
1830c1bc742181ded4930842b46e9507372f0b1b963James Dongxf6     RN rb67
1840c1bc742181ded4930842b46e9507372f0b1b963James Dongxf7     RN rtmp
1850c1bc742181ded4930842b46e9507372f0b1b963James Dong;// IStage 1 allocation
1860c1bc742181ded4930842b46e9507372f0b1b963James Dongxg0     RN xft
1870c1bc742181ded4930842b46e9507372f0b1b963James Dongxg1     RN xf0
1880c1bc742181ded4930842b46e9507372f0b1b963James Dongxg2     RN xf1
1890c1bc742181ded4930842b46e9507372f0b1b963James Dongxg3     RN xf2
1900c1bc742181ded4930842b46e9507372f0b1b963James Dongxgt     RN xf3
1910c1bc742181ded4930842b46e9507372f0b1b963James Dongxg4     RN xf4
1920c1bc742181ded4930842b46e9507372f0b1b963James Dongxg5     RN xf5
1930c1bc742181ded4930842b46e9507372f0b1b963James Dongxg6     RN xf6
1940c1bc742181ded4930842b46e9507372f0b1b963James Dongxg7     RN xf7
1950c1bc742181ded4930842b46e9507372f0b1b963James Dong;// IStage 2 allocation
1960c1bc742181ded4930842b46e9507372f0b1b963James Dongxh0     RN xg0
1970c1bc742181ded4930842b46e9507372f0b1b963James Dongxh1     RN xg1
1980c1bc742181ded4930842b46e9507372f0b1b963James Dongxht     RN xg2
1990c1bc742181ded4930842b46e9507372f0b1b963James Dongxh2     RN xg3
2000c1bc742181ded4930842b46e9507372f0b1b963James Dongxh3     RN xgt
2010c1bc742181ded4930842b46e9507372f0b1b963James Dongxh4     RN xg4
2020c1bc742181ded4930842b46e9507372f0b1b963James Dongxh5     RN xg5
2030c1bc742181ded4930842b46e9507372f0b1b963James Dongxh6     RN xg6
2040c1bc742181ded4930842b46e9507372f0b1b963James Dongxh7     RN xg7
2050c1bc742181ded4930842b46e9507372f0b1b963James Dong;// IStage 3,4 allocation
2060c1bc742181ded4930842b46e9507372f0b1b963James Dongxit     RN xh0
2070c1bc742181ded4930842b46e9507372f0b1b963James Dongxi0     RN xh1
2080c1bc742181ded4930842b46e9507372f0b1b963James Dongxi1     RN xht
2090c1bc742181ded4930842b46e9507372f0b1b963James Dongxi2     RN xh2
2100c1bc742181ded4930842b46e9507372f0b1b963James Dongxi3     RN xh3
2110c1bc742181ded4930842b46e9507372f0b1b963James Dongxi4     RN xh4
2120c1bc742181ded4930842b46e9507372f0b1b963James Dongxi5     RN xh5
2130c1bc742181ded4930842b46e9507372f0b1b963James Dongxi6     RN xh6
2140c1bc742181ded4930842b46e9507372f0b1b963James Dongxi7     RN xh7
2150c1bc742181ded4930842b46e9507372f0b1b963James Dong
2160c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR   pDest,  ppDest
2170c1bc742181ded4930842b46e9507372f0b1b963James Dong        IF "$stride"="s"
2180c1bc742181ded4930842b46e9507372f0b1b963James Dong            M_STR   Stride, pStride
2190c1bc742181ded4930842b46e9507372f0b1b963James Dong        ENDIF
2200c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ADR   pDest,  pBlk
2210c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR     csPiBy8, =0x30fc7642
2220c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR     LoopRR2, =0x00005a82
2230c1bc742181ded4930842b46e9507372f0b1b963James Dong
2240c1bc742181ded4930842b46e9507372f0b1b963James Dongv6_idct_col$_F
2250c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Load even values
2260c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR     xi4, [pSrc], #4  ;// j0
2270c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR     xi5, [pSrc, #4*16-4]  ;// j4
2280c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR     xi6, [pSrc, #2*16-4]  ;// j2
2290c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR     xi7, [pSrc, #6*16-4]  ;// j6
2300c1bc742181ded4930842b46e9507372f0b1b963James Dong
2310c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Scale Even Values
2320c1bc742181ded4930842b46e9507372f0b1b963James Dong        IF "$inscale"="s16" ;// 16x16 mul
2330c1bc742181ded4930842b46e9507372f0b1b963James DongSHIFT       SETA    12
2340c1bc742181ded4930842b46e9507372f0b1b963James Dong            LDR     xi0, [pScale], #4
2350c1bc742181ded4930842b46e9507372f0b1b963James Dong            LDR     xi1, [pScale, #4*16-4]
2360c1bc742181ded4930842b46e9507372f0b1b963James Dong            LDR     xi2, [pScale, #2*16-4]
2370c1bc742181ded4930842b46e9507372f0b1b963James Dong            MOV     xit, #1<<(SHIFT-1)
2380c1bc742181ded4930842b46e9507372f0b1b963James Dong            SMLABB  xi3, xi0, xi4, xit
2390c1bc742181ded4930842b46e9507372f0b1b963James Dong            SMLATT  xi4, xi0, xi4, xit
2400c1bc742181ded4930842b46e9507372f0b1b963James Dong            SMLABB  xi0, xi1, xi5, xit
2410c1bc742181ded4930842b46e9507372f0b1b963James Dong            SMLATT  xi5, xi1, xi5, xit
2420c1bc742181ded4930842b46e9507372f0b1b963James Dong            MOV     xi3, xi3, ASR #SHIFT
2430c1bc742181ded4930842b46e9507372f0b1b963James Dong            PKHBT   xi4, xi3, xi4, LSL #(16-SHIFT)
2440c1bc742181ded4930842b46e9507372f0b1b963James Dong            LDR     xi3, [pScale, #6*16-4]
2450c1bc742181ded4930842b46e9507372f0b1b963James Dong            SMLABB  xi1, xi2, xi6, xit
2460c1bc742181ded4930842b46e9507372f0b1b963James Dong            SMLATT  xi6, xi2, xi6, xit
2470c1bc742181ded4930842b46e9507372f0b1b963James Dong            MOV     xi0, xi0, ASR #SHIFT
2480c1bc742181ded4930842b46e9507372f0b1b963James Dong            PKHBT   xi5, xi0, xi5, LSL #(16-SHIFT)
2490c1bc742181ded4930842b46e9507372f0b1b963James Dong            SMLABB  xi2, xi3, xi7, xit
2500c1bc742181ded4930842b46e9507372f0b1b963James Dong            SMLATT  xi7, xi3, xi7, xit
2510c1bc742181ded4930842b46e9507372f0b1b963James Dong            MOV     xi1, xi1, ASR #SHIFT
2520c1bc742181ded4930842b46e9507372f0b1b963James Dong            PKHBT   xi6, xi1, xi6, LSL #(16-SHIFT)
2530c1bc742181ded4930842b46e9507372f0b1b963James Dong            MOV     xi2, xi2, ASR #SHIFT
2540c1bc742181ded4930842b46e9507372f0b1b963James Dong            PKHBT   xi7, xi2, xi7, LSL #(16-SHIFT)
2550c1bc742181ded4930842b46e9507372f0b1b963James Dong        ENDIF
2560c1bc742181ded4930842b46e9507372f0b1b963James Dong        IF "$inscale"="s32" ;// 32x16 mul
2570c1bc742181ded4930842b46e9507372f0b1b963James DongSHIFT       SETA    (12+8-16)
2580c1bc742181ded4930842b46e9507372f0b1b963James Dong            MOV     xit, #1<<(SHIFT-1)
2590c1bc742181ded4930842b46e9507372f0b1b963James Dong            LDR     xi0, [pScale], #8
2600c1bc742181ded4930842b46e9507372f0b1b963James Dong            LDR     xi1, [pScale, #0*32+4-8]
2610c1bc742181ded4930842b46e9507372f0b1b963James Dong            LDR     xi2, [pScale, #4*32-8]
2620c1bc742181ded4930842b46e9507372f0b1b963James Dong            LDR     xi3, [pScale, #4*32+4-8]
2630c1bc742181ded4930842b46e9507372f0b1b963James Dong            SMLAWB  xi0, xi0, xi4, xit
2640c1bc742181ded4930842b46e9507372f0b1b963James Dong            SMLAWT  xi1, xi1, xi4, xit
2650c1bc742181ded4930842b46e9507372f0b1b963James Dong            SMLAWB  xi2, xi2, xi5, xit
2660c1bc742181ded4930842b46e9507372f0b1b963James Dong            SMLAWT  xi3, xi3, xi5, xit
2670c1bc742181ded4930842b46e9507372f0b1b963James Dong            MOV     xi0, xi0, ASR #SHIFT
2680c1bc742181ded4930842b46e9507372f0b1b963James Dong            PKHBT   xi4, xi0, xi1, LSL #(16-SHIFT)
2690c1bc742181ded4930842b46e9507372f0b1b963James Dong            MOV     xi2, xi2, ASR #SHIFT
2700c1bc742181ded4930842b46e9507372f0b1b963James Dong            PKHBT   xi5, xi2, xi3, LSL #(16-SHIFT)
2710c1bc742181ded4930842b46e9507372f0b1b963James Dong            LDR     xi0, [pScale, #2*32-8]
2720c1bc742181ded4930842b46e9507372f0b1b963James Dong            LDR     xi1, [pScale, #2*32+4-8]
2730c1bc742181ded4930842b46e9507372f0b1b963James Dong            LDR     xi2, [pScale, #6*32-8]
2740c1bc742181ded4930842b46e9507372f0b1b963James Dong            LDR     xi3, [pScale, #6*32+4-8]
2750c1bc742181ded4930842b46e9507372f0b1b963James Dong            SMLAWB  xi0, xi0, xi6, xit
2760c1bc742181ded4930842b46e9507372f0b1b963James Dong            SMLAWT  xi1, xi1, xi6, xit
2770c1bc742181ded4930842b46e9507372f0b1b963James Dong            SMLAWB  xi2, xi2, xi7, xit
2780c1bc742181ded4930842b46e9507372f0b1b963James Dong            SMLAWT  xi3, xi3, xi7, xit
2790c1bc742181ded4930842b46e9507372f0b1b963James Dong            MOV     xi0, xi0, ASR #SHIFT
2800c1bc742181ded4930842b46e9507372f0b1b963James Dong            PKHBT   xi6, xi0, xi1, LSL #(16-SHIFT)
2810c1bc742181ded4930842b46e9507372f0b1b963James Dong            MOV     xi2, xi2, ASR #SHIFT
2820c1bc742181ded4930842b46e9507372f0b1b963James Dong            PKHBT   xi7, xi2, xi3, LSL #(16-SHIFT)
2830c1bc742181ded4930842b46e9507372f0b1b963James Dong        ENDIF
2840c1bc742181ded4930842b46e9507372f0b1b963James Dong
2850c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Load odd values
2860c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR     xi0, [pSrc, #1*16-4]      ;// j1
2870c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR     xi1, [pSrc, #7*16-4]      ;// j7
2880c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR     xi2, [pSrc, #5*16-4]      ;// j5
2890c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR     xi3, [pSrc, #3*16-4]      ;// j3
2900c1bc742181ded4930842b46e9507372f0b1b963James Dong
2910c1bc742181ded4930842b46e9507372f0b1b963James Dong        IF  {TRUE}
2920c1bc742181ded4930842b46e9507372f0b1b963James Dong            ;// shortcut if odd values 0
2930c1bc742181ded4930842b46e9507372f0b1b963James Dong            TEQ     xi0, #0
2940c1bc742181ded4930842b46e9507372f0b1b963James Dong            TEQEQ   xi1, #0
2950c1bc742181ded4930842b46e9507372f0b1b963James Dong            TEQEQ   xi2, #0
2960c1bc742181ded4930842b46e9507372f0b1b963James Dong            TEQEQ   xi3, #0
2970c1bc742181ded4930842b46e9507372f0b1b963James Dong            BEQ     v6OddZero$_F
2980c1bc742181ded4930842b46e9507372f0b1b963James Dong        ENDIF
2990c1bc742181ded4930842b46e9507372f0b1b963James Dong
3000c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Store scaled even values
3010c1bc742181ded4930842b46e9507372f0b1b963James Dong        STMIA   pDest, {xi4, xi5, xi6, xi7}
3020c1bc742181ded4930842b46e9507372f0b1b963James Dong
3030c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Scale odd values
3040c1bc742181ded4930842b46e9507372f0b1b963James Dong        IF "$inscale"="s16"
3050c1bc742181ded4930842b46e9507372f0b1b963James Dong            ;// Perform AAN Scale
3060c1bc742181ded4930842b46e9507372f0b1b963James Dong            LDR     xi4, [pScale, #1*16-4]
3070c1bc742181ded4930842b46e9507372f0b1b963James Dong            LDR     xi5, [pScale, #7*16-4]
3080c1bc742181ded4930842b46e9507372f0b1b963James Dong            LDR     xi6, [pScale, #5*16-4]
3090c1bc742181ded4930842b46e9507372f0b1b963James Dong            SMLABB  xi7, xi0, xi4, xit
3100c1bc742181ded4930842b46e9507372f0b1b963James Dong            SMLATT  xi0, xi0, xi4, xit
3110c1bc742181ded4930842b46e9507372f0b1b963James Dong            SMLABB  xi4, xi1, xi5, xit
3120c1bc742181ded4930842b46e9507372f0b1b963James Dong            SMLATT  xi1, xi1, xi5, xit
3130c1bc742181ded4930842b46e9507372f0b1b963James Dong            MOV     xi7, xi7, ASR #SHIFT
3140c1bc742181ded4930842b46e9507372f0b1b963James Dong            PKHBT   xi0, xi7, xi0, LSL #(16-SHIFT)
3150c1bc742181ded4930842b46e9507372f0b1b963James Dong            LDR     xi7, [pScale, #3*16-4]
3160c1bc742181ded4930842b46e9507372f0b1b963James Dong            SMLABB  xi5, xi2, xi6, xit
3170c1bc742181ded4930842b46e9507372f0b1b963James Dong            SMLATT  xi2, xi2, xi6, xit
3180c1bc742181ded4930842b46e9507372f0b1b963James Dong            MOV     xi4, xi4, ASR #SHIFT
3190c1bc742181ded4930842b46e9507372f0b1b963James Dong            PKHBT   xi1, xi4, xi1, LSL #(16-SHIFT)
3200c1bc742181ded4930842b46e9507372f0b1b963James Dong            SMLABB  xi6, xi3, xi7, xit
3210c1bc742181ded4930842b46e9507372f0b1b963James Dong            SMLATT  xi3, xi3, xi7, xit
3220c1bc742181ded4930842b46e9507372f0b1b963James Dong            MOV     xi5, xi5, ASR #SHIFT
3230c1bc742181ded4930842b46e9507372f0b1b963James Dong            PKHBT   xi2, xi5, xi2, LSL #(16-SHIFT)
3240c1bc742181ded4930842b46e9507372f0b1b963James Dong            MOV     xi6, xi6, ASR #SHIFT
3250c1bc742181ded4930842b46e9507372f0b1b963James Dong            PKHBT   xi3, xi6, xi3, LSL #(16-SHIFT)
3260c1bc742181ded4930842b46e9507372f0b1b963James Dong        ENDIF
3270c1bc742181ded4930842b46e9507372f0b1b963James Dong        IF "$inscale"="s32" ;// 32x16 mul
3280c1bc742181ded4930842b46e9507372f0b1b963James Dong            LDR     xi4, [pScale, #1*32-8]
3290c1bc742181ded4930842b46e9507372f0b1b963James Dong            LDR     xi5, [pScale, #1*32+4-8]
3300c1bc742181ded4930842b46e9507372f0b1b963James Dong            LDR     xi6, [pScale, #7*32-8]
3310c1bc742181ded4930842b46e9507372f0b1b963James Dong            LDR     xi7, [pScale, #7*32+4-8]
3320c1bc742181ded4930842b46e9507372f0b1b963James Dong            SMLAWB  xi4, xi4, xi0, xit
3330c1bc742181ded4930842b46e9507372f0b1b963James Dong            SMLAWT  xi5, xi5, xi0, xit
3340c1bc742181ded4930842b46e9507372f0b1b963James Dong            SMLAWB  xi6, xi6, xi1, xit
3350c1bc742181ded4930842b46e9507372f0b1b963James Dong            SMLAWT  xi7, xi7, xi1, xit
3360c1bc742181ded4930842b46e9507372f0b1b963James Dong            MOV     xi4, xi4, ASR #SHIFT
3370c1bc742181ded4930842b46e9507372f0b1b963James Dong            PKHBT   xi0, xi4, xi5, LSL #(16-SHIFT)
3380c1bc742181ded4930842b46e9507372f0b1b963James Dong            MOV     xi6, xi6, ASR #SHIFT
3390c1bc742181ded4930842b46e9507372f0b1b963James Dong            PKHBT   xi1, xi6, xi7, LSL #(16-SHIFT)
3400c1bc742181ded4930842b46e9507372f0b1b963James Dong            LDR     xi4, [pScale, #5*32-8]
3410c1bc742181ded4930842b46e9507372f0b1b963James Dong            LDR     xi5, [pScale, #5*32+4-8]
3420c1bc742181ded4930842b46e9507372f0b1b963James Dong            LDR     xi6, [pScale, #3*32-8]
3430c1bc742181ded4930842b46e9507372f0b1b963James Dong            LDR     xi7, [pScale, #3*32+4-8]
3440c1bc742181ded4930842b46e9507372f0b1b963James Dong            SMLAWB  xi4, xi4, xi2, xit
3450c1bc742181ded4930842b46e9507372f0b1b963James Dong            SMLAWT  xi5, xi5, xi2, xit
3460c1bc742181ded4930842b46e9507372f0b1b963James Dong            SMLAWB  xi6, xi6, xi3, xit
3470c1bc742181ded4930842b46e9507372f0b1b963James Dong            SMLAWT  xi7, xi7, xi3, xit
3480c1bc742181ded4930842b46e9507372f0b1b963James Dong            MOV     xi4, xi4, ASR #SHIFT
3490c1bc742181ded4930842b46e9507372f0b1b963James Dong            PKHBT   xi2, xi4, xi5, LSL #(16-SHIFT)
3500c1bc742181ded4930842b46e9507372f0b1b963James Dong            MOV     xi6, xi6, ASR #SHIFT
3510c1bc742181ded4930842b46e9507372f0b1b963James Dong            PKHBT   xi3, xi6, xi7, LSL #(16-SHIFT)
3520c1bc742181ded4930842b46e9507372f0b1b963James Dong        ENDIF
3530c1bc742181ded4930842b46e9507372f0b1b963James Dong
3540c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR     xit, =0x00010001        ;// rounding constant
3550c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD16 xi5, xi0, xi1           ;// (j1+j7)/2
3560c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHADD16 xi5, xi5, xit
3570c1bc742181ded4930842b46e9507372f0b1b963James Dong
3580c1bc742181ded4930842b46e9507372f0b1b963James Dong        SSUB16  xi6, xi0, xi1           ;// j1-j7
3590c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD16 xi7, xi2, xi3           ;// (j5+j3)/2
3600c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHADD16 xi7, xi7, xit
3610c1bc742181ded4930842b46e9507372f0b1b963James Dong
3620c1bc742181ded4930842b46e9507372f0b1b963James Dong        SSUB16  xi4, xi2, xi3           ;// j5-j3
3630c1bc742181ded4930842b46e9507372f0b1b963James Dong
3640c1bc742181ded4930842b46e9507372f0b1b963James Dong        SSUB16  xi3, xi5, xi7           ;// (i5-i7)/2
3650c1bc742181ded4930842b46e9507372f0b1b963James Dong
3660c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHBT   xi0, xi6, xi4, LSL#16   ;// [i4,i6] row a
3670c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHTB   xi1, xi4, xi6, ASR#16   ;// [i4,i6] row b
3680c1bc742181ded4930842b46e9507372f0b1b963James Dong
3690c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMUADX  xi2, xi0, csPiBy8       ;// rowa by [c,s]
3700c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMUADX  xi4, xi1, csPiBy8       ;// rowb by [c,s]
3710c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMUSD   xi0, xi0, csPiBy8       ;// rowa by [-s,c]
3720c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMUSD   xi6, xi1, csPiBy8       ;// rowb by [-s,c]
3730c1bc742181ded4930842b46e9507372f0b1b963James Dong
3740c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMULBB  xi1, xi3, LoopRR2
3750c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMULTB  xi3, xi3, LoopRR2
3760c1bc742181ded4930842b46e9507372f0b1b963James Dong
3770c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHTB   xh4, xi4, xi2, ASR#16   ;// h4/4
3780c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHTB   xh6, xi6, xi0, ASR#16   ;// h6/4
3790c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHADD16 xh7, xi5, xi7           ;// (i5+i7)/4
3800c1bc742181ded4930842b46e9507372f0b1b963James Dong
3810c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// xi0,xi1,xi2,xi3 now free
3820c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// IStage 4,3, rows 2to3 x1/2
3830c1bc742181ded4930842b46e9507372f0b1b963James Dong
3840c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     xi3, xi3, LSL #1
3850c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHTB   xh5, xi3, xi1, ASR#15   ;// h5/4
3860c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRD    xi0, [pDest, #8]        ;// j2,j6 scaled
3870c1bc742181ded4930842b46e9507372f0b1b963James Dong
3880c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// IStage 2, rows4to7
3890c1bc742181ded4930842b46e9507372f0b1b963James Dong        SSUB16  xg6, xh6, xh7
3900c1bc742181ded4930842b46e9507372f0b1b963James Dong        SSUB16  xg5, xh5, xg6
3910c1bc742181ded4930842b46e9507372f0b1b963James Dong        SSUB16  xg4, xh4, xg5
3920c1bc742181ded4930842b46e9507372f0b1b963James Dong
3930c1bc742181ded4930842b46e9507372f0b1b963James Dong        SSUB16  xi2, xi0, xi1           ;// (j2-j6)
3940c1bc742181ded4930842b46e9507372f0b1b963James Dong
3950c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHADD16 xi3, xi0, xi1           ;// (j2+j6)/2
3960c1bc742181ded4930842b46e9507372f0b1b963James Dong
3970c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMULBB  xi0, xi2, LoopRR2
3980c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMULTB  xi2, xi2, LoopRR2
3990c1bc742181ded4930842b46e9507372f0b1b963James Dong
4000c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     xi2, xi2, LSL #1
4010c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHTB   xh2, xi2, xi0, ASR#15   ;// i2*sqrt(2)/4
4020c1bc742181ded4930842b46e9507372f0b1b963James Dong
4030c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// xi0, xi1 now free
4040c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// IStage 4,3 rows 0to1 x 1/2
4050c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRD    xi0, [pDest]            ;// j0, j4 scaled
4060c1bc742181ded4930842b46e9507372f0b1b963James Dong        SSUB16  xh2, xh2, xi3
4070c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADDS    LoopRR2, LoopRR2, #2<<29    ;// done two rows
4080c1bc742181ded4930842b46e9507372f0b1b963James Dong
4090c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHADD16 xh0, xi0, xi1
4100c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHSUB16 xh1, xi0, xi1
4110c1bc742181ded4930842b46e9507372f0b1b963James Dong
4120c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// IStage 2 rows 0to3 x 1/2
4130c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHSUB16 xg2, xh1, xh2
4140c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHADD16 xg1, xh1, xh2
4150c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHSUB16 xg3, xh0, xh3
4160c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHADD16 xg0, xh0, xh3
4170c1bc742181ded4930842b46e9507372f0b1b963James Dong
4180c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// IStage 1 all rows
4190c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD16  xf3, xg3, xg4
4200c1bc742181ded4930842b46e9507372f0b1b963James Dong        SSUB16  xf4, xg3, xg4
4210c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD16  xf2, xg2, xg5
4220c1bc742181ded4930842b46e9507372f0b1b963James Dong        SSUB16  xf5, xg2, xg5
4230c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD16  xf1, xg1, xg6
4240c1bc742181ded4930842b46e9507372f0b1b963James Dong        SSUB16  xf6, xg1, xg6
4250c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD16  xf0, xg0, xg7
4260c1bc742181ded4930842b46e9507372f0b1b963James Dong        SSUB16  xf7, xg0, xg7
4270c1bc742181ded4930842b46e9507372f0b1b963James Dong
4280c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Transpose, store and loop
4290c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHBT   ra01, xf0, xf1, LSL #16
4300c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHTB   rb01, xf1, xf0, ASR #16
4310c1bc742181ded4930842b46e9507372f0b1b963James Dong
4320c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHBT   ra23, xf2, xf3, LSL #16
4330c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHTB   rb23, xf3, xf2, ASR #16
4340c1bc742181ded4930842b46e9507372f0b1b963James Dong
4350c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHBT   ra45, xf4, xf5, LSL #16
4360c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHTB   rb45, xf5, xf4, ASR #16
4370c1bc742181ded4930842b46e9507372f0b1b963James Dong
4380c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHBT   ra67, xf6, xf7, LSL #16
4390c1bc742181ded4930842b46e9507372f0b1b963James Dong        STMIA   pDest!, {ra01, ra23, ra45, ra67}
4400c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHTB   rb67, xf7, xf6, ASR #16
4410c1bc742181ded4930842b46e9507372f0b1b963James Dong        STMIA   pDest!, {rb01, rb23, rb45, rb67}
4420c1bc742181ded4930842b46e9507372f0b1b963James Dong        BCC     v6_idct_col$_F
4430c1bc742181ded4930842b46e9507372f0b1b963James Dong
4440c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB     pSrc, pDest, #(64*2)
4450c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR   pDest, ppDest
4460c1bc742181ded4930842b46e9507372f0b1b963James Dong        IF "$stride"="s"
4470c1bc742181ded4930842b46e9507372f0b1b963James Dong            M_LDR   pScale, pStride
4480c1bc742181ded4930842b46e9507372f0b1b963James Dong        ENDIF
4490c1bc742181ded4930842b46e9507372f0b1b963James Dong        B       v6_idct_row$_F
4500c1bc742181ded4930842b46e9507372f0b1b963James Dong
4510c1bc742181ded4930842b46e9507372f0b1b963James Dongv6OddZero$_F
4520c1bc742181ded4930842b46e9507372f0b1b963James Dong        SSUB16  xi2, xi6, xi7           ;// (j2-j6)
4530c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHADD16 xi3, xi6, xi7           ;// (j2+j6)/2
4540c1bc742181ded4930842b46e9507372f0b1b963James Dong
4550c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMULBB  xi0, xi2, LoopRR2
4560c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMULTB  xi2, xi2, LoopRR2
4570c1bc742181ded4930842b46e9507372f0b1b963James Dong
4580c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     xi2, xi2, LSL #1
4590c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHTB   xh2, xi2, xi0, ASR#15   ;// i2*sqrt(2)/4
4600c1bc742181ded4930842b46e9507372f0b1b963James Dong        SSUB16  xh2, xh2, xi3
4610c1bc742181ded4930842b46e9507372f0b1b963James Dong
4620c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// xi0, xi1 now free
4630c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// IStage 4,3 rows 0to1 x 1/2
4640c1bc742181ded4930842b46e9507372f0b1b963James Dong
4650c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHADD16 xh0, xi4, xi5
4660c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHSUB16 xh1, xi4, xi5
4670c1bc742181ded4930842b46e9507372f0b1b963James Dong
4680c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// IStage 2 rows 0to3 x 1/2
4690c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHSUB16 xg2, xh1, xh2
4700c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHADD16 xg1, xh1, xh2
4710c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHSUB16 xg3, xh0, xh3
4720c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHADD16 xg0, xh0, xh3
4730c1bc742181ded4930842b46e9507372f0b1b963James Dong
4740c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// IStage 1 all rows
4750c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV  xf3, xg3
4760c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV  xf4, xg3
4770c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV  xf2, xg2
4780c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV  xf5, xg2
4790c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV  xf1, xg1
4800c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV  xf6, xg1
4810c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV  xf0, xg0
4820c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV  xf7, xg0
4830c1bc742181ded4930842b46e9507372f0b1b963James Dong
4840c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Transpose
4850c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHBT   ra01, xf0, xf1, LSL #16
4860c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHTB   rb01, xf1, xf0, ASR #16
4870c1bc742181ded4930842b46e9507372f0b1b963James Dong
4880c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHBT   ra23, xf2, xf3, LSL #16
4890c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHTB   rb23, xf3, xf2, ASR #16
4900c1bc742181ded4930842b46e9507372f0b1b963James Dong
4910c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHBT   ra45, xf4, xf5, LSL #16
4920c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHTB   rb45, xf5, xf4, ASR #16
4930c1bc742181ded4930842b46e9507372f0b1b963James Dong
4940c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHBT   ra67, xf6, xf7, LSL #16
4950c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHTB   rb67, xf7, xf6, ASR #16
4960c1bc742181ded4930842b46e9507372f0b1b963James Dong
4970c1bc742181ded4930842b46e9507372f0b1b963James Dong        STMIA   pDest!, {ra01, ra23, ra45, ra67}
4980c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADDS    LoopRR2, LoopRR2, #2<<29    ;// done two rows
4990c1bc742181ded4930842b46e9507372f0b1b963James Dong        STMIA   pDest!, {rb01, rb23, rb45, rb67}
5000c1bc742181ded4930842b46e9507372f0b1b963James Dong
5010c1bc742181ded4930842b46e9507372f0b1b963James Dong        BCC     v6_idct_col$_F
5020c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB     pSrc, pDest, #(64*2)
5030c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR   pDest, ppDest
5040c1bc742181ded4930842b46e9507372f0b1b963James Dong        IF "$stride"="s"
5050c1bc742181ded4930842b46e9507372f0b1b963James Dong            M_LDR   pScale, pStride
5060c1bc742181ded4930842b46e9507372f0b1b963James Dong        ENDIF
5070c1bc742181ded4930842b46e9507372f0b1b963James Dong
5080c1bc742181ded4930842b46e9507372f0b1b963James Dong
5090c1bc742181ded4930842b46e9507372f0b1b963James Dongv6_idct_row$_F
5100c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// IStage 4,3, rows4to7 x1/4
5110c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR     xit, =0x00010001        ;// rounding constant
5120c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR     xi0, [pSrc, #1*16]      ;// j1
5130c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR     xi1, [pSrc, #7*16]      ;// 4*j7
5140c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR     xi2, [pSrc, #5*16]      ;// j5
5150c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR     xi3, [pSrc, #3*16]      ;// j3
5160c1bc742181ded4930842b46e9507372f0b1b963James Dong
5170c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHADD16 xi1, xi1, xit           ;// 2*j7
5180c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHADD16 xi1, xi1, xit           ;// j7
5190c1bc742181ded4930842b46e9507372f0b1b963James Dong
5200c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHADD16 xi5, xi0, xi1           ;// (j1+j7)/2
5210c1bc742181ded4930842b46e9507372f0b1b963James Dong        SSUB16  xi6, xi0, xi1           ;// j1-j7
5220c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHADD16 xi7, xi2, xi3           ;// (j5+j3)/2
5230c1bc742181ded4930842b46e9507372f0b1b963James Dong        SSUB16  xi4, xi2, xi3           ;// j5-j3
5240c1bc742181ded4930842b46e9507372f0b1b963James Dong
5250c1bc742181ded4930842b46e9507372f0b1b963James Dong        SSUB16  xi3, xi5, xi7           ;// (i5-i7)/2
5260c1bc742181ded4930842b46e9507372f0b1b963James Dong
5270c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHBT   xi0, xi6, xi4, LSL#16   ;// [i4,i6] row a
5280c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHTB   xi1, xi4, xi6, ASR#16   ;// [i4,i6] row b
5290c1bc742181ded4930842b46e9507372f0b1b963James Dong
5300c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMUADX  xi2, xi0, csPiBy8       ;// rowa by [c,s]
5310c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMUADX  xi4, xi1, csPiBy8       ;// rowb by [c,s]
5320c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMUSD   xi0, xi0, csPiBy8       ;// rowa by [-s,c]
5330c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMUSD   xi6, xi1, csPiBy8       ;// rowb by [-s,c]
5340c1bc742181ded4930842b46e9507372f0b1b963James Dong
5350c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMULBB  xi1, xi3, LoopRR2
5360c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMULTB  xi3, xi3, LoopRR2
5370c1bc742181ded4930842b46e9507372f0b1b963James Dong
5380c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHTB   xh4, xi4, xi2, ASR#16   ;// h4/4
5390c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHTB   xh6, xi6, xi0, ASR#16   ;// h6/4
5400c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHADD16 xh7, xi5, xi7           ;// (i5+i7)/4
5410c1bc742181ded4930842b46e9507372f0b1b963James Dong
5420c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     xi3, xi3, LSL #1
5430c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHTB   xh5, xi3, xi1, ASR#15   ;// h5/4
5440c1bc742181ded4930842b46e9507372f0b1b963James Dong
5450c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// xi0,xi1,xi2,xi3 now free
5460c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// IStage 4,3, rows 2to3 x1/2
5470c1bc742181ded4930842b46e9507372f0b1b963James Dong
5480c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR     xi0, [pSrc, #2*16]      ;// j2
5490c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR     xi1, [pSrc, #6*16]      ;// 2*j6
5500c1bc742181ded4930842b46e9507372f0b1b963James Dong
5510c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// IStage 2, rows4to7
5520c1bc742181ded4930842b46e9507372f0b1b963James Dong        SSUB16  xg6, xh6, xh7
5530c1bc742181ded4930842b46e9507372f0b1b963James Dong        SSUB16  xg5, xh5, xg6
5540c1bc742181ded4930842b46e9507372f0b1b963James Dong        SSUB16  xg4, xh4, xg5
5550c1bc742181ded4930842b46e9507372f0b1b963James Dong
5560c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHADD16 xi1, xi1, xit           ;// j6
5570c1bc742181ded4930842b46e9507372f0b1b963James Dong        SSUB16  xi2, xi0, xi1           ;// (j2-j6)
5580c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHADD16 xi3, xi0, xi1           ;// (j2+j6)/2
5590c1bc742181ded4930842b46e9507372f0b1b963James Dong
5600c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMULBB  xi0, xi2, LoopRR2
5610c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMULTB  xi2, xi2, LoopRR2
5620c1bc742181ded4930842b46e9507372f0b1b963James Dong
5630c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     xi2, xi2, LSL #1
5640c1bc742181ded4930842b46e9507372f0b1b963James Dong
5650c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHTB   xh2, xi2, xi0, ASR#15   ;// i2*sqrt(2)/4
5660c1bc742181ded4930842b46e9507372f0b1b963James Dong
5670c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// xi0, xi1 now free
5680c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// IStage 4,3 rows 0to1 x 1/2
5690c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR     xi1, [pSrc, #4*16]      ;// j4
5700c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR     xi0, [pSrc], #4         ;// j0
5710c1bc742181ded4930842b46e9507372f0b1b963James Dong
5720c1bc742181ded4930842b46e9507372f0b1b963James Dong        SSUB16  xh2, xh2, xi3
5730c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADDS    LoopRR2, LoopRR2, #2<<29    ;// done two rows
5740c1bc742181ded4930842b46e9507372f0b1b963James Dong
5750c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     xi0, xi0, xit, LSL #2   ;// ensure correct round
5760c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHADD16 xh0, xi0, xi1           ;// of DC result
5770c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHSUB16 xh1, xi0, xi1
5780c1bc742181ded4930842b46e9507372f0b1b963James Dong
5790c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// IStage 2 rows 0to3 x 1/2
5800c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHSUB16 xg2, xh1, xh2
5810c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHADD16 xg1, xh1, xh2
5820c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHSUB16 xg3, xh0, xh3
5830c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHADD16 xg0, xh0, xh3
5840c1bc742181ded4930842b46e9507372f0b1b963James Dong
5850c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// IStage 1 all rows
5860c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHADD16 xf3, xg3, xg4
5870c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHSUB16 xf4, xg3, xg4
5880c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHADD16 xf2, xg2, xg5
5890c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHSUB16 xf5, xg2, xg5
5900c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHADD16 xf1, xg1, xg6
5910c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHSUB16 xf6, xg1, xg6
5920c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHADD16 xf0, xg0, xg7
5930c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHSUB16 xf7, xg0, xg7
5940c1bc742181ded4930842b46e9507372f0b1b963James Dong
5950c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Saturate
5960c1bc742181ded4930842b46e9507372f0b1b963James Dong        IF ("$outsize"="u8")
5970c1bc742181ded4930842b46e9507372f0b1b963James Dong            USAT16  xf0, #8, xf0
5980c1bc742181ded4930842b46e9507372f0b1b963James Dong            USAT16  xf1, #8, xf1
5990c1bc742181ded4930842b46e9507372f0b1b963James Dong            USAT16  xf2, #8, xf2
6000c1bc742181ded4930842b46e9507372f0b1b963James Dong            USAT16  xf3, #8, xf3
6010c1bc742181ded4930842b46e9507372f0b1b963James Dong            USAT16  xf4, #8, xf4
6020c1bc742181ded4930842b46e9507372f0b1b963James Dong            USAT16  xf5, #8, xf5
6030c1bc742181ded4930842b46e9507372f0b1b963James Dong            USAT16  xf6, #8, xf6
6040c1bc742181ded4930842b46e9507372f0b1b963James Dong            USAT16  xf7, #8, xf7
6050c1bc742181ded4930842b46e9507372f0b1b963James Dong        ENDIF
6060c1bc742181ded4930842b46e9507372f0b1b963James Dong        IF ("$outsize"="s9")
6070c1bc742181ded4930842b46e9507372f0b1b963James Dong            SSAT16  xf0, #9, xf0
6080c1bc742181ded4930842b46e9507372f0b1b963James Dong            SSAT16  xf1, #9, xf1
6090c1bc742181ded4930842b46e9507372f0b1b963James Dong            SSAT16  xf2, #9, xf2
6100c1bc742181ded4930842b46e9507372f0b1b963James Dong            SSAT16  xf3, #9, xf3
6110c1bc742181ded4930842b46e9507372f0b1b963James Dong            SSAT16  xf4, #9, xf4
6120c1bc742181ded4930842b46e9507372f0b1b963James Dong            SSAT16  xf5, #9, xf5
6130c1bc742181ded4930842b46e9507372f0b1b963James Dong            SSAT16  xf6, #9, xf6
6140c1bc742181ded4930842b46e9507372f0b1b963James Dong            SSAT16  xf7, #9, xf7
6150c1bc742181ded4930842b46e9507372f0b1b963James Dong        ENDIF
6160c1bc742181ded4930842b46e9507372f0b1b963James Dong
6170c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Transpose to Row, Pack and store
6180c1bc742181ded4930842b46e9507372f0b1b963James Dong        IF ("$outsize"="u8")
6190c1bc742181ded4930842b46e9507372f0b1b963James Dong            ORR     xf0, xf0, xf1, LSL #8 ;// [ b1 b0 a1 a0 ]
6200c1bc742181ded4930842b46e9507372f0b1b963James Dong            ORR     xf2, xf2, xf3, LSL #8 ;// [ b3 b2 a3 a2 ]
6210c1bc742181ded4930842b46e9507372f0b1b963James Dong            ORR     xf4, xf4, xf5, LSL #8 ;// [ b5 b4 a5 a4 ]
6220c1bc742181ded4930842b46e9507372f0b1b963James Dong            ORR     xf6, xf6, xf7, LSL #8 ;// [ b7 b6 a7 a6 ]
6230c1bc742181ded4930842b46e9507372f0b1b963James Dong            PKHBT   ra01, xf0, xf2, LSL #16
6240c1bc742181ded4930842b46e9507372f0b1b963James Dong            PKHTB   rb01, xf2, xf0, ASR #16
6250c1bc742181ded4930842b46e9507372f0b1b963James Dong            PKHBT   ra23, xf4, xf6, LSL #16
6260c1bc742181ded4930842b46e9507372f0b1b963James Dong            PKHTB   rb23, xf6, xf4, ASR #16
6270c1bc742181ded4930842b46e9507372f0b1b963James Dong            STMIA   pDest, {ra01, ra23}
6280c1bc742181ded4930842b46e9507372f0b1b963James Dong            IF "$stride"="s"
6290c1bc742181ded4930842b46e9507372f0b1b963James Dong                ADD     pDest, pDest, pScale
6300c1bc742181ded4930842b46e9507372f0b1b963James Dong                STMIA   pDest, {rb01, rb23}
6310c1bc742181ded4930842b46e9507372f0b1b963James Dong                ADD     pDest, pDest, pScale
6320c1bc742181ded4930842b46e9507372f0b1b963James Dong            ELSE
6330c1bc742181ded4930842b46e9507372f0b1b963James Dong                ADD     pDest, pDest, #($stride)
6340c1bc742181ded4930842b46e9507372f0b1b963James Dong                STMIA   pDest, {rb01, rb23}
6350c1bc742181ded4930842b46e9507372f0b1b963James Dong                ADD     pDest, pDest, #($stride)
6360c1bc742181ded4930842b46e9507372f0b1b963James Dong            ENDIF
6370c1bc742181ded4930842b46e9507372f0b1b963James Dong        ENDIF
6380c1bc742181ded4930842b46e9507372f0b1b963James Dong        IF ("$outsize"="s9"):LOR:("$outsize"="s16")
6390c1bc742181ded4930842b46e9507372f0b1b963James Dong            PKHBT   ra01, xf0, xf1, LSL #16
6400c1bc742181ded4930842b46e9507372f0b1b963James Dong            PKHTB   rb01, xf1, xf0, ASR #16
6410c1bc742181ded4930842b46e9507372f0b1b963James Dong
6420c1bc742181ded4930842b46e9507372f0b1b963James Dong            PKHBT   ra23, xf2, xf3, LSL #16
6430c1bc742181ded4930842b46e9507372f0b1b963James Dong            PKHTB   rb23, xf3, xf2, ASR #16
6440c1bc742181ded4930842b46e9507372f0b1b963James Dong
6450c1bc742181ded4930842b46e9507372f0b1b963James Dong            PKHBT   ra45, xf4, xf5, LSL #16
6460c1bc742181ded4930842b46e9507372f0b1b963James Dong            PKHTB   rb45, xf5, xf4, ASR #16
6470c1bc742181ded4930842b46e9507372f0b1b963James Dong
6480c1bc742181ded4930842b46e9507372f0b1b963James Dong            PKHBT   ra67, xf6, xf7, LSL #16
6490c1bc742181ded4930842b46e9507372f0b1b963James Dong            PKHTB   rb67, xf7, xf6, ASR #16
6500c1bc742181ded4930842b46e9507372f0b1b963James Dong
6510c1bc742181ded4930842b46e9507372f0b1b963James Dong            STMIA   pDest, {ra01, ra23, ra45, ra67}
6520c1bc742181ded4930842b46e9507372f0b1b963James Dong            IF "$stride"="s"
6530c1bc742181ded4930842b46e9507372f0b1b963James Dong                ADD     pDest, pDest, pScale
6540c1bc742181ded4930842b46e9507372f0b1b963James Dong                STMIA   pDest, {rb01, rb23, rb45, rb67}
6550c1bc742181ded4930842b46e9507372f0b1b963James Dong                ADD     pDest, pDest, pScale
6560c1bc742181ded4930842b46e9507372f0b1b963James Dong            ELSE
6570c1bc742181ded4930842b46e9507372f0b1b963James Dong                ADD     pDest, pDest, #($stride)
6580c1bc742181ded4930842b46e9507372f0b1b963James Dong                STMIA   pDest, {rb01, rb23, rb45, rb67}
6590c1bc742181ded4930842b46e9507372f0b1b963James Dong                ADD     pDest, pDest, #($stride)
6600c1bc742181ded4930842b46e9507372f0b1b963James Dong            ENDIF
6610c1bc742181ded4930842b46e9507372f0b1b963James Dong        ENDIF
6620c1bc742181ded4930842b46e9507372f0b1b963James Dong
6630c1bc742181ded4930842b46e9507372f0b1b963James Dong        BCC     v6_idct_row$_F
6640c1bc742181ded4930842b46e9507372f0b1b963James Dong        ENDIF ;// ARM1136JS
6650c1bc742181ded4930842b46e9507372f0b1b963James Dong
6660c1bc742181ded4930842b46e9507372f0b1b963James Dong
6670c1bc742181ded4930842b46e9507372f0b1b963James Dong        IF CortexA8
6680c1bc742181ded4930842b46e9507372f0b1b963James Dong
6690c1bc742181ded4930842b46e9507372f0b1b963James DongSrc0            EQU  7
6700c1bc742181ded4930842b46e9507372f0b1b963James DongSrc1            EQU  8
6710c1bc742181ded4930842b46e9507372f0b1b963James DongSrc2            EQU  9
6720c1bc742181ded4930842b46e9507372f0b1b963James DongSrc3            EQU  10
6730c1bc742181ded4930842b46e9507372f0b1b963James DongSrc4            EQU  11
6740c1bc742181ded4930842b46e9507372f0b1b963James DongSrc5            EQU  12
6750c1bc742181ded4930842b46e9507372f0b1b963James DongSrc6            EQU  13
6760c1bc742181ded4930842b46e9507372f0b1b963James DongSrc7            EQU  14
6770c1bc742181ded4930842b46e9507372f0b1b963James DongTmp             EQU  15
6780c1bc742181ded4930842b46e9507372f0b1b963James Dong
6790c1bc742181ded4930842b46e9507372f0b1b963James DongqXj0            QN Src0.S16
6800c1bc742181ded4930842b46e9507372f0b1b963James DongqXj1            QN Src1.S16
6810c1bc742181ded4930842b46e9507372f0b1b963James DongqXj2            QN Src2.S16
6820c1bc742181ded4930842b46e9507372f0b1b963James DongqXj3            QN Src3.S16
6830c1bc742181ded4930842b46e9507372f0b1b963James DongqXj4            QN Src4.S16
6840c1bc742181ded4930842b46e9507372f0b1b963James DongqXj5            QN Src5.S16
6850c1bc742181ded4930842b46e9507372f0b1b963James DongqXj6            QN Src6.S16
6860c1bc742181ded4930842b46e9507372f0b1b963James DongqXj7            QN Src7.S16
6870c1bc742181ded4930842b46e9507372f0b1b963James DongqXjt            QN Tmp.S16
6880c1bc742181ded4930842b46e9507372f0b1b963James Dong
6890c1bc742181ded4930842b46e9507372f0b1b963James DongdXj0lo          DN (Src0*2).S16
6900c1bc742181ded4930842b46e9507372f0b1b963James DongdXj0hi          DN (Src0*2+1).S16
6910c1bc742181ded4930842b46e9507372f0b1b963James DongdXj1lo          DN (Src1*2).S16
6920c1bc742181ded4930842b46e9507372f0b1b963James DongdXj1hi          DN (Src1*2+1).S16
6930c1bc742181ded4930842b46e9507372f0b1b963James DongdXj2lo          DN (Src2*2).S16
6940c1bc742181ded4930842b46e9507372f0b1b963James DongdXj2hi          DN (Src2*2+1).S16
6950c1bc742181ded4930842b46e9507372f0b1b963James DongdXj3lo          DN (Src3*2).S16
6960c1bc742181ded4930842b46e9507372f0b1b963James DongdXj3hi          DN (Src3*2+1).S16
6970c1bc742181ded4930842b46e9507372f0b1b963James DongdXj4lo          DN (Src4*2).S16
6980c1bc742181ded4930842b46e9507372f0b1b963James DongdXj4hi          DN (Src4*2+1).S16
6990c1bc742181ded4930842b46e9507372f0b1b963James DongdXj5lo          DN (Src5*2).S16
7000c1bc742181ded4930842b46e9507372f0b1b963James DongdXj5hi          DN (Src5*2+1).S16
7010c1bc742181ded4930842b46e9507372f0b1b963James DongdXj6lo          DN (Src6*2).S16
7020c1bc742181ded4930842b46e9507372f0b1b963James DongdXj6hi          DN (Src6*2+1).S16
7030c1bc742181ded4930842b46e9507372f0b1b963James DongdXj7lo          DN (Src7*2).S16
7040c1bc742181ded4930842b46e9507372f0b1b963James DongdXj7hi          DN (Src7*2+1).S16
7050c1bc742181ded4930842b46e9507372f0b1b963James DongdXjtlo          DN (Tmp*2).S16
7060c1bc742181ded4930842b46e9507372f0b1b963James DongdXjthi          DN (Tmp*2+1).S16
7070c1bc742181ded4930842b46e9507372f0b1b963James Dong
7080c1bc742181ded4930842b46e9507372f0b1b963James DongqXi0            QN qXj0
7090c1bc742181ded4930842b46e9507372f0b1b963James DongqXi1            QN qXj4
7100c1bc742181ded4930842b46e9507372f0b1b963James DongqXi2            QN qXj2
7110c1bc742181ded4930842b46e9507372f0b1b963James DongqXi3            QN qXj7
7120c1bc742181ded4930842b46e9507372f0b1b963James DongqXi4            QN qXj5
7130c1bc742181ded4930842b46e9507372f0b1b963James DongqXi5            QN qXjt
7140c1bc742181ded4930842b46e9507372f0b1b963James DongqXi6            QN qXj1
7150c1bc742181ded4930842b46e9507372f0b1b963James DongqXi7            QN qXj6
7160c1bc742181ded4930842b46e9507372f0b1b963James DongqXit            QN qXj3
7170c1bc742181ded4930842b46e9507372f0b1b963James Dong
7180c1bc742181ded4930842b46e9507372f0b1b963James DongdXi0lo          DN dXj0lo
7190c1bc742181ded4930842b46e9507372f0b1b963James DongdXi0hi          DN dXj0hi
7200c1bc742181ded4930842b46e9507372f0b1b963James DongdXi1lo          DN dXj4lo
7210c1bc742181ded4930842b46e9507372f0b1b963James DongdXi1hi          DN dXj4hi
7220c1bc742181ded4930842b46e9507372f0b1b963James DongdXi2lo          DN dXj2lo
7230c1bc742181ded4930842b46e9507372f0b1b963James DongdXi2hi          DN dXj2hi
7240c1bc742181ded4930842b46e9507372f0b1b963James DongdXi3lo          DN dXj7lo
7250c1bc742181ded4930842b46e9507372f0b1b963James DongdXi3hi          DN dXj7hi
7260c1bc742181ded4930842b46e9507372f0b1b963James DongdXi4lo          DN dXj5lo
7270c1bc742181ded4930842b46e9507372f0b1b963James DongdXi4hi          DN dXj5hi
7280c1bc742181ded4930842b46e9507372f0b1b963James DongdXi5lo          DN dXjtlo
7290c1bc742181ded4930842b46e9507372f0b1b963James DongdXi5hi          DN dXjthi
7300c1bc742181ded4930842b46e9507372f0b1b963James DongdXi6lo          DN dXj1lo
7310c1bc742181ded4930842b46e9507372f0b1b963James DongdXi6hi          DN dXj1hi
7320c1bc742181ded4930842b46e9507372f0b1b963James DongdXi7lo          DN dXj6lo
7330c1bc742181ded4930842b46e9507372f0b1b963James DongdXi7hi          DN dXj6hi
7340c1bc742181ded4930842b46e9507372f0b1b963James DongdXitlo          DN dXj3lo
7350c1bc742181ded4930842b46e9507372f0b1b963James DongdXithi          DN dXj3hi
7360c1bc742181ded4930842b46e9507372f0b1b963James Dong
7370c1bc742181ded4930842b46e9507372f0b1b963James DongqXh0            QN qXit
7380c1bc742181ded4930842b46e9507372f0b1b963James DongqXh1            QN qXi0
7390c1bc742181ded4930842b46e9507372f0b1b963James DongqXh2            QN qXi2
7400c1bc742181ded4930842b46e9507372f0b1b963James DongqXh3            QN qXi3
7410c1bc742181ded4930842b46e9507372f0b1b963James DongqXh4            QN qXi7
7420c1bc742181ded4930842b46e9507372f0b1b963James DongqXh5            QN qXi5
7430c1bc742181ded4930842b46e9507372f0b1b963James DongqXh6            QN qXi4
7440c1bc742181ded4930842b46e9507372f0b1b963James DongqXh7            QN qXi1
7450c1bc742181ded4930842b46e9507372f0b1b963James DongqXht            QN qXi6
7460c1bc742181ded4930842b46e9507372f0b1b963James Dong
7470c1bc742181ded4930842b46e9507372f0b1b963James DongdXh0lo          DN dXitlo
7480c1bc742181ded4930842b46e9507372f0b1b963James DongdXh0hi          DN dXithi
7490c1bc742181ded4930842b46e9507372f0b1b963James DongdXh1lo          DN dXi0lo
7500c1bc742181ded4930842b46e9507372f0b1b963James DongdXh1hi          DN dXi0hi
7510c1bc742181ded4930842b46e9507372f0b1b963James DongdXh2lo          DN dXi2lo
7520c1bc742181ded4930842b46e9507372f0b1b963James DongdXh2hi          DN dXi2hi
7530c1bc742181ded4930842b46e9507372f0b1b963James DongdXh3lo          DN dXi3lo
7540c1bc742181ded4930842b46e9507372f0b1b963James DongdXh3hi          DN dXi3hi
7550c1bc742181ded4930842b46e9507372f0b1b963James DongdXh4lo          DN dXi7lo
7560c1bc742181ded4930842b46e9507372f0b1b963James DongdXh4hi          DN dXi7hi
7570c1bc742181ded4930842b46e9507372f0b1b963James DongdXh5lo          DN dXi5lo
7580c1bc742181ded4930842b46e9507372f0b1b963James DongdXh5hi          DN dXi5hi
7590c1bc742181ded4930842b46e9507372f0b1b963James DongdXh6lo          DN dXi4lo
7600c1bc742181ded4930842b46e9507372f0b1b963James DongdXh6hi          DN dXi4hi
7610c1bc742181ded4930842b46e9507372f0b1b963James DongdXh7lo          DN dXi1lo
7620c1bc742181ded4930842b46e9507372f0b1b963James DongdXh7hi          DN dXi1hi
7630c1bc742181ded4930842b46e9507372f0b1b963James DongdXhtlo          DN dXi6lo
7640c1bc742181ded4930842b46e9507372f0b1b963James DongdXhthi          DN dXi6hi
7650c1bc742181ded4930842b46e9507372f0b1b963James Dong
7660c1bc742181ded4930842b46e9507372f0b1b963James DongqXg0            QN qXh2
7670c1bc742181ded4930842b46e9507372f0b1b963James DongqXg1            QN qXht
7680c1bc742181ded4930842b46e9507372f0b1b963James DongqXg2            QN qXh1
7690c1bc742181ded4930842b46e9507372f0b1b963James DongqXg3            QN qXh0
7700c1bc742181ded4930842b46e9507372f0b1b963James DongqXg4            QN qXh4
7710c1bc742181ded4930842b46e9507372f0b1b963James DongqXg5            QN qXh5
7720c1bc742181ded4930842b46e9507372f0b1b963James DongqXg6            QN qXh6
7730c1bc742181ded4930842b46e9507372f0b1b963James DongqXg7            QN qXh7
7740c1bc742181ded4930842b46e9507372f0b1b963James DongqXgt            QN qXh3
7750c1bc742181ded4930842b46e9507372f0b1b963James Dong
7760c1bc742181ded4930842b46e9507372f0b1b963James DongqXf0            QN qXg6
7770c1bc742181ded4930842b46e9507372f0b1b963James DongqXf1            QN qXg5
7780c1bc742181ded4930842b46e9507372f0b1b963James DongqXf2            QN qXg4
7790c1bc742181ded4930842b46e9507372f0b1b963James DongqXf3            QN qXgt
7800c1bc742181ded4930842b46e9507372f0b1b963James DongqXf4            QN qXg3
7810c1bc742181ded4930842b46e9507372f0b1b963James DongqXf5            QN qXg2
7820c1bc742181ded4930842b46e9507372f0b1b963James DongqXf6            QN qXg1
7830c1bc742181ded4930842b46e9507372f0b1b963James DongqXf7            QN qXg0
7840c1bc742181ded4930842b46e9507372f0b1b963James DongqXft            QN qXg7
7850c1bc742181ded4930842b46e9507372f0b1b963James Dong
7860c1bc742181ded4930842b46e9507372f0b1b963James Dong
7870c1bc742181ded4930842b46e9507372f0b1b963James DongqXt0            QN 1.S32
7880c1bc742181ded4930842b46e9507372f0b1b963James DongqXt1            QN 2.S32
7890c1bc742181ded4930842b46e9507372f0b1b963James DongqT0lo           QN 1.S32
7900c1bc742181ded4930842b46e9507372f0b1b963James DongqT0hi           QN 2.S32
7910c1bc742181ded4930842b46e9507372f0b1b963James DongqT1lo           QN 3.S32
7920c1bc742181ded4930842b46e9507372f0b1b963James DongqT1hi           QN 4.S32
7930c1bc742181ded4930842b46e9507372f0b1b963James DongqScalelo        QN 5.S32        ;// used to read post scale values
7940c1bc742181ded4930842b46e9507372f0b1b963James DongqScalehi        QN 6.S32
7950c1bc742181ded4930842b46e9507372f0b1b963James DongqTemp0          QN 5.S32
7960c1bc742181ded4930842b46e9507372f0b1b963James DongqTemp1          QN 6.S32
7970c1bc742181ded4930842b46e9507372f0b1b963James Dong
7980c1bc742181ded4930842b46e9507372f0b1b963James Dong
7990c1bc742181ded4930842b46e9507372f0b1b963James DongScale1          EQU 6
8000c1bc742181ded4930842b46e9507372f0b1b963James DongScale2          EQU 15
8010c1bc742181ded4930842b46e9507372f0b1b963James DongqScale1         QN Scale1.S16
8020c1bc742181ded4930842b46e9507372f0b1b963James DongqScale2         QN Scale2.S16
8030c1bc742181ded4930842b46e9507372f0b1b963James DongdScale1lo       DN (Scale1*2).S16
8040c1bc742181ded4930842b46e9507372f0b1b963James DongdScale1hi       DN (Scale1*2+1).S16
8050c1bc742181ded4930842b46e9507372f0b1b963James DongdScale2lo       DN (Scale2*2).S16
8060c1bc742181ded4930842b46e9507372f0b1b963James DongdScale2hi       DN (Scale2*2+1).S16
8070c1bc742181ded4930842b46e9507372f0b1b963James Dong
8080c1bc742181ded4930842b46e9507372f0b1b963James DongdCoefs          DN 0.S16        ;// Scale coefficients in format {[0] [C] [S] [InvSqrt2]}
8090c1bc742181ded4930842b46e9507372f0b1b963James DongInvSqrt2        DN dCoefs[0]    ;// 1/sqrt(2) in Q15
8100c1bc742181ded4930842b46e9507372f0b1b963James DongS               DN dCoefs[1]    ;// Sin(PI/8) in Q15
8110c1bc742181ded4930842b46e9507372f0b1b963James DongC               DN dCoefs[2]    ;// Cos(PI/8) in Q15
8120c1bc742181ded4930842b46e9507372f0b1b963James Dong
8130c1bc742181ded4930842b46e9507372f0b1b963James DongpTemp           RN 12
8140c1bc742181ded4930842b46e9507372f0b1b963James Dong
8150c1bc742181ded4930842b46e9507372f0b1b963James Dong
8160c1bc742181ded4930842b46e9507372f0b1b963James Dong        IMPORT  armCOMM_IDCTCoef
8170c1bc742181ded4930842b46e9507372f0b1b963James Dong
8180c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1        {qXj0,qXj1}, [pSrc @64]!
8190c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1        {qXj2,qXj3}, [pSrc @64]!
8200c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1        {qXj4,qXj5}, [pSrc @64]!
8210c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1        {qXj6,qXj7}, [pSrc @64]!
8220c1bc742181ded4930842b46e9507372f0b1b963James Dong
8230c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Load PreScale and multiply with Src
8240c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// IStage 4
8250c1bc742181ded4930842b46e9507372f0b1b963James Dong
8260c1bc742181ded4930842b46e9507372f0b1b963James Dong        IF "$inscale"="s16"                         ;// 16X16 Mul
8270c1bc742181ded4930842b46e9507372f0b1b963James Dong            M_IDCT_PRESCALE16
8280c1bc742181ded4930842b46e9507372f0b1b963James Dong        ENDIF
8290c1bc742181ded4930842b46e9507372f0b1b963James Dong
8300c1bc742181ded4930842b46e9507372f0b1b963James Dong        IF "$inscale"="s32"                         ;// 32X32 ,ul
8310c1bc742181ded4930842b46e9507372f0b1b963James Dong            M_IDCT_PRESCALE32
8320c1bc742181ded4930842b46e9507372f0b1b963James Dong        ENDIF
8330c1bc742181ded4930842b46e9507372f0b1b963James Dong
8340c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// IStage 3
8350c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQDMULH     qXi2, qXi2, InvSqrt2            ;// i2/sqrt(2)
8360c1bc742181ded4930842b46e9507372f0b1b963James Dong        VHADD       qXh0, qXi0, qXi1                ;// (i0+i1)/2
8370c1bc742181ded4930842b46e9507372f0b1b963James Dong        VHSUB       qXh1, qXi0, qXi1                ;// (i0-i1)/2
8380c1bc742181ded4930842b46e9507372f0b1b963James Dong        VHADD       qXh7, qXi5, qXi7                ;// (i5+i7)/4
8390c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSUB        qXh5, qXi5, qXi7                ;// (i5-i7)/2
8400c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQDMULH     qXh5, qXh5, InvSqrt2            ;// h5/sqrt(2)
8410c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSUB        qXh2, qXi2, qXi3                ;// h2, h3
8420c1bc742181ded4930842b46e9507372f0b1b963James Dong
8430c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMULL       qXt0, dXi4lo, C                 ;// c*i4
8440c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMLAL       qXt0, dXi6lo, S                 ;// c*i4+s*i6
8450c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMULL       qXt1, dXi4hi, C
8460c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMLAL       qXt1, dXi6hi, S
8470c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSHRN       dXh4lo, qXt0, #16               ;// h4
8480c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSHRN       dXh4hi, qXt1, #16
8490c1bc742181ded4930842b46e9507372f0b1b963James Dong
8500c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMULL       qXt0, dXi6lo, C                 ;// c*i6
8510c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMLSL       qXt0, dXi4lo, S                 ;// -s*i4 + c*h6
8520c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMULL       qXt1, dXi6hi, C
8530c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMLSL       qXt1, dXi4hi, S
8540c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSHRN       dXh6lo, qXt0, #16               ;// h6
8550c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSHRN       dXh6hi, qXt1, #16
8560c1bc742181ded4930842b46e9507372f0b1b963James Dong
8570c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// IStage 2
8580c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSUB        qXg6, qXh6, qXh7
8590c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSUB        qXg5, qXh5, qXg6
8600c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSUB        qXg4, qXh4, qXg5
8610c1bc742181ded4930842b46e9507372f0b1b963James Dong        VHADD       qXg1, qXh1, qXh2        ;// (h1+h2)/2
8620c1bc742181ded4930842b46e9507372f0b1b963James Dong        VHSUB       qXg2, qXh1, qXh2        ;// (h1-h2)/2
8630c1bc742181ded4930842b46e9507372f0b1b963James Dong        VHADD       qXg0, qXh0, qXh3        ;// (h0+h3)/2
8640c1bc742181ded4930842b46e9507372f0b1b963James Dong        VHSUB       qXg3, qXh0, qXh3        ;// (h0-h3)/2
8650c1bc742181ded4930842b46e9507372f0b1b963James Dong
8660c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// IStage 1 all rows
8670c1bc742181ded4930842b46e9507372f0b1b963James Dong        VADD        qXf3, qXg3, qXg4
8680c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSUB        qXf4, qXg3, qXg4
8690c1bc742181ded4930842b46e9507372f0b1b963James Dong        VADD        qXf2, qXg2, qXg5
8700c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSUB        qXf5, qXg2, qXg5
8710c1bc742181ded4930842b46e9507372f0b1b963James Dong        VADD        qXf1, qXg1, qXg6
8720c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSUB        qXf6, qXg1, qXg6
8730c1bc742181ded4930842b46e9507372f0b1b963James Dong        VADD        qXf0, qXg0, qXg7
8740c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSUB        qXf7, qXg0, qXg7
8750c1bc742181ded4930842b46e9507372f0b1b963James Dong
8760c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Transpose, store and loop
8770c1bc742181ded4930842b46e9507372f0b1b963James DongXTR0            EQU Src5
8780c1bc742181ded4930842b46e9507372f0b1b963James DongXTR1            EQU Tmp
8790c1bc742181ded4930842b46e9507372f0b1b963James DongXTR2            EQU Src6
8800c1bc742181ded4930842b46e9507372f0b1b963James DongXTR3            EQU Src7
8810c1bc742181ded4930842b46e9507372f0b1b963James DongXTR4            EQU Src3
8820c1bc742181ded4930842b46e9507372f0b1b963James DongXTR5            EQU Src0
8830c1bc742181ded4930842b46e9507372f0b1b963James DongXTR6            EQU Src1
8840c1bc742181ded4930842b46e9507372f0b1b963James DongXTR7            EQU Src2
8850c1bc742181ded4930842b46e9507372f0b1b963James DongXTRt            EQU Src4
8860c1bc742181ded4930842b46e9507372f0b1b963James Dong
8870c1bc742181ded4930842b46e9507372f0b1b963James DongqA0             QN  XTR0.S32  ;// for XTRpose
8880c1bc742181ded4930842b46e9507372f0b1b963James DongqA1             QN  XTR1.S32
8890c1bc742181ded4930842b46e9507372f0b1b963James DongqA2             QN  XTR2.S32
8900c1bc742181ded4930842b46e9507372f0b1b963James DongqA3             QN  XTR3.S32
8910c1bc742181ded4930842b46e9507372f0b1b963James DongqA4             QN  XTR4.S32
8920c1bc742181ded4930842b46e9507372f0b1b963James DongqA5             QN  XTR5.S32
8930c1bc742181ded4930842b46e9507372f0b1b963James DongqA6             QN  XTR6.S32
8940c1bc742181ded4930842b46e9507372f0b1b963James DongqA7             QN  XTR7.S32
8950c1bc742181ded4930842b46e9507372f0b1b963James Dong
8960c1bc742181ded4930842b46e9507372f0b1b963James DongdB0             DN  XTR0*2+1      ;// for using VSWP
8970c1bc742181ded4930842b46e9507372f0b1b963James DongdB1             DN  XTR1*2+1
8980c1bc742181ded4930842b46e9507372f0b1b963James DongdB2             DN  XTR2*2+1
8990c1bc742181ded4930842b46e9507372f0b1b963James DongdB3             DN  XTR3*2+1
9000c1bc742181ded4930842b46e9507372f0b1b963James DongdB4             DN  XTR4*2
9010c1bc742181ded4930842b46e9507372f0b1b963James DongdB5             DN  XTR5*2
9020c1bc742181ded4930842b46e9507372f0b1b963James DongdB6             DN  XTR6*2
9030c1bc742181ded4930842b46e9507372f0b1b963James DongdB7             DN  XTR7*2
9040c1bc742181ded4930842b46e9507372f0b1b963James Dong
9050c1bc742181ded4930842b46e9507372f0b1b963James Dong
9060c1bc742181ded4930842b46e9507372f0b1b963James Dong        VTRN        qXf0, qXf1
9070c1bc742181ded4930842b46e9507372f0b1b963James Dong        VTRN        qXf2, qXf3
9080c1bc742181ded4930842b46e9507372f0b1b963James Dong        VTRN        qXf4, qXf5
9090c1bc742181ded4930842b46e9507372f0b1b963James Dong        VTRN        qXf6, qXf7
9100c1bc742181ded4930842b46e9507372f0b1b963James Dong        VTRN        qA0, qA2
9110c1bc742181ded4930842b46e9507372f0b1b963James Dong        VTRN        qA1, qA3
9120c1bc742181ded4930842b46e9507372f0b1b963James Dong        VTRN        qA4, qA6
9130c1bc742181ded4930842b46e9507372f0b1b963James Dong        VTRN        qA5, qA7
9140c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSWP        dB0, dB4
9150c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSWP        dB1, dB5
9160c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSWP        dB2, dB6
9170c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSWP        dB3, dB7
9180c1bc742181ded4930842b46e9507372f0b1b963James Dong
9190c1bc742181ded4930842b46e9507372f0b1b963James Dong
9200c1bc742181ded4930842b46e9507372f0b1b963James DongqYj0            QN qXf0
9210c1bc742181ded4930842b46e9507372f0b1b963James DongqYj1            QN qXf1
9220c1bc742181ded4930842b46e9507372f0b1b963James DongqYj2            QN qXf2
9230c1bc742181ded4930842b46e9507372f0b1b963James DongqYj3            QN qXf3
9240c1bc742181ded4930842b46e9507372f0b1b963James DongqYj4            QN qXf4
9250c1bc742181ded4930842b46e9507372f0b1b963James DongqYj5            QN qXf5
9260c1bc742181ded4930842b46e9507372f0b1b963James DongqYj6            QN qXf6
9270c1bc742181ded4930842b46e9507372f0b1b963James DongqYj7            QN qXf7
9280c1bc742181ded4930842b46e9507372f0b1b963James DongqYjt            QN qXft
9290c1bc742181ded4930842b46e9507372f0b1b963James Dong
9300c1bc742181ded4930842b46e9507372f0b1b963James DongdYj0lo          DN (XTR0*2).S16
9310c1bc742181ded4930842b46e9507372f0b1b963James DongdYj0hi          DN (XTR0*2+1).S16
9320c1bc742181ded4930842b46e9507372f0b1b963James DongdYj1lo          DN (XTR1*2).S16
9330c1bc742181ded4930842b46e9507372f0b1b963James DongdYj1hi          DN (XTR1*2+1).S16
9340c1bc742181ded4930842b46e9507372f0b1b963James DongdYj2lo          DN (XTR2*2).S16
9350c1bc742181ded4930842b46e9507372f0b1b963James DongdYj2hi          DN (XTR2*2+1).S16
9360c1bc742181ded4930842b46e9507372f0b1b963James DongdYj3lo          DN (XTR3*2).S16
9370c1bc742181ded4930842b46e9507372f0b1b963James DongdYj3hi          DN (XTR3*2+1).S16
9380c1bc742181ded4930842b46e9507372f0b1b963James DongdYj4lo          DN (XTR4*2).S16
9390c1bc742181ded4930842b46e9507372f0b1b963James DongdYj4hi          DN (XTR4*2+1).S16
9400c1bc742181ded4930842b46e9507372f0b1b963James DongdYj5lo          DN (XTR5*2).S16
9410c1bc742181ded4930842b46e9507372f0b1b963James DongdYj5hi          DN (XTR5*2+1).S16
9420c1bc742181ded4930842b46e9507372f0b1b963James DongdYj6lo          DN (XTR6*2).S16
9430c1bc742181ded4930842b46e9507372f0b1b963James DongdYj6hi          DN (XTR6*2+1).S16
9440c1bc742181ded4930842b46e9507372f0b1b963James DongdYj7lo          DN (XTR7*2).S16
9450c1bc742181ded4930842b46e9507372f0b1b963James DongdYj7hi          DN (XTR7*2+1).S16
9460c1bc742181ded4930842b46e9507372f0b1b963James DongdYjtlo          DN (XTRt*2).S16
9470c1bc742181ded4930842b46e9507372f0b1b963James DongdYjthi          DN (XTRt*2+1).S16
9480c1bc742181ded4930842b46e9507372f0b1b963James Dong
9490c1bc742181ded4930842b46e9507372f0b1b963James DongqYi0            QN qYj0
9500c1bc742181ded4930842b46e9507372f0b1b963James DongqYi1            QN qYj4
9510c1bc742181ded4930842b46e9507372f0b1b963James DongqYi2            QN qYj2
9520c1bc742181ded4930842b46e9507372f0b1b963James DongqYi3            QN qYj7
9530c1bc742181ded4930842b46e9507372f0b1b963James DongqYi4            QN qYj5
9540c1bc742181ded4930842b46e9507372f0b1b963James DongqYi5            QN qYjt
9550c1bc742181ded4930842b46e9507372f0b1b963James DongqYi6            QN qYj1
9560c1bc742181ded4930842b46e9507372f0b1b963James DongqYi7            QN qYj6
9570c1bc742181ded4930842b46e9507372f0b1b963James DongqYit            QN qYj3
9580c1bc742181ded4930842b46e9507372f0b1b963James Dong
9590c1bc742181ded4930842b46e9507372f0b1b963James DongdYi0lo          DN dYj0lo
9600c1bc742181ded4930842b46e9507372f0b1b963James DongdYi0hi          DN dYj0hi
9610c1bc742181ded4930842b46e9507372f0b1b963James DongdYi1lo          DN dYj4lo
9620c1bc742181ded4930842b46e9507372f0b1b963James DongdYi1hi          DN dYj4hi
9630c1bc742181ded4930842b46e9507372f0b1b963James DongdYi2lo          DN dYj2lo
9640c1bc742181ded4930842b46e9507372f0b1b963James DongdYi2hi          DN dYj2hi
9650c1bc742181ded4930842b46e9507372f0b1b963James DongdYi3lo          DN dYj7lo
9660c1bc742181ded4930842b46e9507372f0b1b963James DongdYi3hi          DN dYj7hi
9670c1bc742181ded4930842b46e9507372f0b1b963James DongdYi4lo          DN dYj5lo
9680c1bc742181ded4930842b46e9507372f0b1b963James DongdYi4hi          DN dYj5hi
9690c1bc742181ded4930842b46e9507372f0b1b963James DongdYi5lo          DN dYjtlo
9700c1bc742181ded4930842b46e9507372f0b1b963James DongdYi5hi          DN dYjthi
9710c1bc742181ded4930842b46e9507372f0b1b963James DongdYi6lo          DN dYj1lo
9720c1bc742181ded4930842b46e9507372f0b1b963James DongdYi6hi          DN dYj1hi
9730c1bc742181ded4930842b46e9507372f0b1b963James DongdYi7lo          DN dYj6lo
9740c1bc742181ded4930842b46e9507372f0b1b963James DongdYi7hi          DN dYj6hi
9750c1bc742181ded4930842b46e9507372f0b1b963James DongdYitlo          DN dYj3lo
9760c1bc742181ded4930842b46e9507372f0b1b963James DongdYithi          DN dYj3hi
9770c1bc742181ded4930842b46e9507372f0b1b963James Dong
9780c1bc742181ded4930842b46e9507372f0b1b963James DongqYh0            QN qYit
9790c1bc742181ded4930842b46e9507372f0b1b963James DongqYh1            QN qYi0
9800c1bc742181ded4930842b46e9507372f0b1b963James DongqYh2            QN qYi2
9810c1bc742181ded4930842b46e9507372f0b1b963James DongqYh3            QN qYi3
9820c1bc742181ded4930842b46e9507372f0b1b963James DongqYh4            QN qYi7
9830c1bc742181ded4930842b46e9507372f0b1b963James DongqYh5            QN qYi5
9840c1bc742181ded4930842b46e9507372f0b1b963James DongqYh6            QN qYi4
9850c1bc742181ded4930842b46e9507372f0b1b963James DongqYh7            QN qYi1
9860c1bc742181ded4930842b46e9507372f0b1b963James DongqYht            QN qYi6
9870c1bc742181ded4930842b46e9507372f0b1b963James Dong
9880c1bc742181ded4930842b46e9507372f0b1b963James DongdYh0lo          DN dYitlo
9890c1bc742181ded4930842b46e9507372f0b1b963James DongdYh0hi          DN dYithi
9900c1bc742181ded4930842b46e9507372f0b1b963James DongdYh1lo          DN dYi0lo
9910c1bc742181ded4930842b46e9507372f0b1b963James DongdYh1hi          DN dYi0hi
9920c1bc742181ded4930842b46e9507372f0b1b963James DongdYh2lo          DN dYi2lo
9930c1bc742181ded4930842b46e9507372f0b1b963James DongdYh2hi          DN dYi2hi
9940c1bc742181ded4930842b46e9507372f0b1b963James DongdYh3lo          DN dYi3lo
9950c1bc742181ded4930842b46e9507372f0b1b963James DongdYh3hi          DN dYi3hi
9960c1bc742181ded4930842b46e9507372f0b1b963James DongdYh4lo          DN dYi7lo
9970c1bc742181ded4930842b46e9507372f0b1b963James DongdYh4hi          DN dYi7hi
9980c1bc742181ded4930842b46e9507372f0b1b963James DongdYh5lo          DN dYi5lo
9990c1bc742181ded4930842b46e9507372f0b1b963James DongdYh5hi          DN dYi5hi
10000c1bc742181ded4930842b46e9507372f0b1b963James DongdYh6lo          DN dYi4lo
10010c1bc742181ded4930842b46e9507372f0b1b963James DongdYh6hi          DN dYi4hi
10020c1bc742181ded4930842b46e9507372f0b1b963James DongdYh7lo          DN dYi1lo
10030c1bc742181ded4930842b46e9507372f0b1b963James DongdYh7hi          DN dYi1hi
10040c1bc742181ded4930842b46e9507372f0b1b963James DongdYhtlo          DN dYi6lo
10050c1bc742181ded4930842b46e9507372f0b1b963James DongdYhthi          DN dYi6hi
10060c1bc742181ded4930842b46e9507372f0b1b963James Dong
10070c1bc742181ded4930842b46e9507372f0b1b963James DongqYg0            QN qYh2
10080c1bc742181ded4930842b46e9507372f0b1b963James DongqYg1            QN qYht
10090c1bc742181ded4930842b46e9507372f0b1b963James DongqYg2            QN qYh1
10100c1bc742181ded4930842b46e9507372f0b1b963James DongqYg3            QN qYh0
10110c1bc742181ded4930842b46e9507372f0b1b963James DongqYg4            QN qYh4
10120c1bc742181ded4930842b46e9507372f0b1b963James DongqYg5            QN qYh5
10130c1bc742181ded4930842b46e9507372f0b1b963James DongqYg6            QN qYh6
10140c1bc742181ded4930842b46e9507372f0b1b963James DongqYg7            QN qYh7
10150c1bc742181ded4930842b46e9507372f0b1b963James DongqYgt            QN qYh3
10160c1bc742181ded4930842b46e9507372f0b1b963James Dong
10170c1bc742181ded4930842b46e9507372f0b1b963James DongqYf0            QN qYg6
10180c1bc742181ded4930842b46e9507372f0b1b963James DongqYf1            QN qYg5
10190c1bc742181ded4930842b46e9507372f0b1b963James DongqYf2            QN qYg4
10200c1bc742181ded4930842b46e9507372f0b1b963James DongqYf3            QN qYgt
10210c1bc742181ded4930842b46e9507372f0b1b963James DongqYf4            QN qYg3
10220c1bc742181ded4930842b46e9507372f0b1b963James DongqYf5            QN qYg2
10230c1bc742181ded4930842b46e9507372f0b1b963James DongqYf6            QN qYg1
10240c1bc742181ded4930842b46e9507372f0b1b963James DongqYf7            QN qYg0
10250c1bc742181ded4930842b46e9507372f0b1b963James DongqYft            QN qYg7
10260c1bc742181ded4930842b46e9507372f0b1b963James Dong
10270c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRSHR       qYj7, qYj7, #2
10280c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRSHR       qYj6, qYj6, #1
10290c1bc742181ded4930842b46e9507372f0b1b963James Dong
10300c1bc742181ded4930842b46e9507372f0b1b963James Dong        VHADD       qYi5, qYj1, qYj7        ;// i5 = (j1+j7)/2
10310c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSUB        qYi6, qYj1, qYj7        ;// i6 = j1-j7
10320c1bc742181ded4930842b46e9507372f0b1b963James Dong        VHADD       qYi3, qYj2, qYj6        ;// i3 = (j2+j6)/2
10330c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSUB        qYi2, qYj2, qYj6        ;// i2 = j2-j6
10340c1bc742181ded4930842b46e9507372f0b1b963James Dong        VHADD       qYi7, qYj5, qYj3        ;// i7 = (j5+j3)/2
10350c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSUB        qYi4, qYj5, qYj3        ;// i4 = j5-j3
10360c1bc742181ded4930842b46e9507372f0b1b963James Dong
10370c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQDMULH     qYi2, qYi2, InvSqrt2    ;// i2/sqrt(2)
10380c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// IStage 4,3 rows 0to1 x 1/2
10390c1bc742181ded4930842b46e9507372f0b1b963James Dong
10400c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         pTemp, #0x4             ;// ensure correct round
10410c1bc742181ded4930842b46e9507372f0b1b963James Dong        VDUP        qScale1, pTemp           ;// of DC result
10420c1bc742181ded4930842b46e9507372f0b1b963James Dong        VADD        qYi0, qYi0, qScale1
10430c1bc742181ded4930842b46e9507372f0b1b963James Dong
10440c1bc742181ded4930842b46e9507372f0b1b963James Dong        VHADD       qYh0, qYi0, qYi1        ;// (i0+i1)/2
10450c1bc742181ded4930842b46e9507372f0b1b963James Dong        VHSUB       qYh1, qYi0, qYi1        ;// (i0-i1)/2
10460c1bc742181ded4930842b46e9507372f0b1b963James Dong
10470c1bc742181ded4930842b46e9507372f0b1b963James Dong        VHADD       qYh7, qYi5, qYi7        ;// (i5+i7)/4
10480c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSUB        qYh5, qYi5, qYi7        ;// (i5-i7)/2
10490c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSUB        qYh2, qYi2, qYi3        ;// h2, h3
10500c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQDMULH     qYh5, qYh5, InvSqrt2    ;// h5/sqrt(2)
10510c1bc742181ded4930842b46e9507372f0b1b963James Dong
10520c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMULL       qXt0, dYi4lo, C         ;// c*i4
10530c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMLAL       qXt0, dYi6lo, S         ;// c*i4+s*i6
10540c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMULL       qXt1, dYi4hi, C
10550c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMLAL       qXt1, dYi6hi, S
10560c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSHRN       dYh4lo, qXt0, #16       ;// h4
10570c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSHRN       dYh4hi, qXt1, #16
10580c1bc742181ded4930842b46e9507372f0b1b963James Dong
10590c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMULL       qXt0, dYi6lo, C         ;// c*i6
10600c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMLSL       qXt0, dYi4lo, S         ;// -s*i4 + c*h6
10610c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMULL       qXt1, dYi6hi, C
10620c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMLSL       qXt1, dYi4hi, S
10630c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSHRN       dYh6lo, qXt0, #16       ;// h6
10640c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSHRN       dYh6hi, qXt1, #16
10650c1bc742181ded4930842b46e9507372f0b1b963James Dong
10660c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSUB        qYg6, qYh6, qYh7
10670c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSUB        qYg5, qYh5, qYg6
10680c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSUB        qYg4, qYh4, qYg5
10690c1bc742181ded4930842b46e9507372f0b1b963James Dong
10700c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// IStage 2 rows 0to3 x 1/2
10710c1bc742181ded4930842b46e9507372f0b1b963James Dong        VHADD       qYg1, qYh1, qYh2        ;// (h1+h2)/2
10720c1bc742181ded4930842b46e9507372f0b1b963James Dong        VHSUB       qYg2, qYh1, qYh2        ;// (h1-h2)/2
10730c1bc742181ded4930842b46e9507372f0b1b963James Dong        VHADD       qYg0, qYh0, qYh3        ;// (h0+h3)/2
10740c1bc742181ded4930842b46e9507372f0b1b963James Dong        VHSUB       qYg3, qYh0, qYh3        ;// (h0-h3)/2
10750c1bc742181ded4930842b46e9507372f0b1b963James Dong
10760c1bc742181ded4930842b46e9507372f0b1b963James Dong
10770c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// IStage 1 all rows
10780c1bc742181ded4930842b46e9507372f0b1b963James Dong        VHADD        qYf3, qYg3, qYg4
10790c1bc742181ded4930842b46e9507372f0b1b963James Dong        VHSUB        qYf4, qYg3, qYg4
10800c1bc742181ded4930842b46e9507372f0b1b963James Dong        VHADD        qYf2, qYg2, qYg5
10810c1bc742181ded4930842b46e9507372f0b1b963James Dong        VHSUB        qYf5, qYg2, qYg5
10820c1bc742181ded4930842b46e9507372f0b1b963James Dong        VHADD        qYf1, qYg1, qYg6
10830c1bc742181ded4930842b46e9507372f0b1b963James Dong        VHSUB        qYf6, qYg1, qYg6
10840c1bc742181ded4930842b46e9507372f0b1b963James Dong        VHADD        qYf0, qYg0, qYg7
10850c1bc742181ded4930842b46e9507372f0b1b963James Dong        VHSUB        qYf7, qYg0, qYg7
10860c1bc742181ded4930842b46e9507372f0b1b963James Dong
10870c1bc742181ded4930842b46e9507372f0b1b963James DongYTR0            EQU Src0
10880c1bc742181ded4930842b46e9507372f0b1b963James DongYTR1            EQU Src4
10890c1bc742181ded4930842b46e9507372f0b1b963James DongYTR2            EQU Src1
10900c1bc742181ded4930842b46e9507372f0b1b963James DongYTR3            EQU Src2
10910c1bc742181ded4930842b46e9507372f0b1b963James DongYTR4            EQU Src7
10920c1bc742181ded4930842b46e9507372f0b1b963James DongYTR5            EQU Src5
10930c1bc742181ded4930842b46e9507372f0b1b963James DongYTR6            EQU Tmp
10940c1bc742181ded4930842b46e9507372f0b1b963James DongYTR7            EQU Src6
10950c1bc742181ded4930842b46e9507372f0b1b963James DongYTRt            EQU Src3
10960c1bc742181ded4930842b46e9507372f0b1b963James Dong
10970c1bc742181ded4930842b46e9507372f0b1b963James DongqC0             QN  YTR0.S32                ;// for YTRpose
10980c1bc742181ded4930842b46e9507372f0b1b963James DongqC1             QN  YTR1.S32
10990c1bc742181ded4930842b46e9507372f0b1b963James DongqC2             QN  YTR2.S32
11000c1bc742181ded4930842b46e9507372f0b1b963James DongqC3             QN  YTR3.S32
11010c1bc742181ded4930842b46e9507372f0b1b963James DongqC4             QN  YTR4.S32
11020c1bc742181ded4930842b46e9507372f0b1b963James DongqC5             QN  YTR5.S32
11030c1bc742181ded4930842b46e9507372f0b1b963James DongqC6             QN  YTR6.S32
11040c1bc742181ded4930842b46e9507372f0b1b963James DongqC7             QN  YTR7.S32
11050c1bc742181ded4930842b46e9507372f0b1b963James Dong
11060c1bc742181ded4930842b46e9507372f0b1b963James DongdD0             DN  YTR0*2+1                ;// for using VSWP
11070c1bc742181ded4930842b46e9507372f0b1b963James DongdD1             DN  YTR1*2+1
11080c1bc742181ded4930842b46e9507372f0b1b963James DongdD2             DN  YTR2*2+1
11090c1bc742181ded4930842b46e9507372f0b1b963James DongdD3             DN  YTR3*2+1
11100c1bc742181ded4930842b46e9507372f0b1b963James DongdD4             DN  YTR4*2
11110c1bc742181ded4930842b46e9507372f0b1b963James DongdD5             DN  YTR5*2
11120c1bc742181ded4930842b46e9507372f0b1b963James DongdD6             DN  YTR6*2
11130c1bc742181ded4930842b46e9507372f0b1b963James DongdD7             DN  YTR7*2
11140c1bc742181ded4930842b46e9507372f0b1b963James Dong
11150c1bc742181ded4930842b46e9507372f0b1b963James Dong        VTRN        qYf0, qYf1
11160c1bc742181ded4930842b46e9507372f0b1b963James Dong        VTRN        qYf2, qYf3
11170c1bc742181ded4930842b46e9507372f0b1b963James Dong        VTRN        qYf4, qYf5
11180c1bc742181ded4930842b46e9507372f0b1b963James Dong        VTRN        qYf6, qYf7
11190c1bc742181ded4930842b46e9507372f0b1b963James Dong        VTRN        qC0, qC2
11200c1bc742181ded4930842b46e9507372f0b1b963James Dong        VTRN        qC1, qC3
11210c1bc742181ded4930842b46e9507372f0b1b963James Dong        VTRN        qC4, qC6
11220c1bc742181ded4930842b46e9507372f0b1b963James Dong        VTRN        qC5, qC7
11230c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSWP        dD0, dD4
11240c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSWP        dD1, dD5
11250c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSWP        dD2, dD6
11260c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSWP        dD3, dD7
11270c1bc742181ded4930842b46e9507372f0b1b963James Dong
11280c1bc742181ded4930842b46e9507372f0b1b963James Dong
11290c1bc742181ded4930842b46e9507372f0b1b963James DongdYf0U8          DN YTR0*2.U8
11300c1bc742181ded4930842b46e9507372f0b1b963James DongdYf1U8          DN YTR1*2.U8
11310c1bc742181ded4930842b46e9507372f0b1b963James DongdYf2U8          DN YTR2*2.U8
11320c1bc742181ded4930842b46e9507372f0b1b963James DongdYf3U8          DN YTR3*2.U8
11330c1bc742181ded4930842b46e9507372f0b1b963James DongdYf4U8          DN YTR4*2.U8
11340c1bc742181ded4930842b46e9507372f0b1b963James DongdYf5U8          DN YTR5*2.U8
11350c1bc742181ded4930842b46e9507372f0b1b963James DongdYf6U8          DN YTR6*2.U8
11360c1bc742181ded4930842b46e9507372f0b1b963James DongdYf7U8          DN YTR7*2.U8
11370c1bc742181ded4930842b46e9507372f0b1b963James Dong
11380c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
11390c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Do saturation if outsize is other than S16
11400c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
11410c1bc742181ded4930842b46e9507372f0b1b963James Dong
11420c1bc742181ded4930842b46e9507372f0b1b963James Dong        IF ("$outsize"="u8")
11430c1bc742181ded4930842b46e9507372f0b1b963James Dong            ;// Output range [0-255]
11440c1bc742181ded4930842b46e9507372f0b1b963James Dong            VQMOVN            dYf0U8, qYf0
11450c1bc742181ded4930842b46e9507372f0b1b963James Dong            VQMOVN            dYf1U8, qYf1
11460c1bc742181ded4930842b46e9507372f0b1b963James Dong            VQMOVN            dYf2U8, qYf2
11470c1bc742181ded4930842b46e9507372f0b1b963James Dong            VQMOVN            dYf3U8, qYf3
11480c1bc742181ded4930842b46e9507372f0b1b963James Dong            VQMOVN            dYf4U8, qYf4
11490c1bc742181ded4930842b46e9507372f0b1b963James Dong            VQMOVN            dYf5U8, qYf5
11500c1bc742181ded4930842b46e9507372f0b1b963James Dong            VQMOVN            dYf6U8, qYf6
11510c1bc742181ded4930842b46e9507372f0b1b963James Dong            VQMOVN            dYf7U8, qYf7
11520c1bc742181ded4930842b46e9507372f0b1b963James Dong        ENDIF
11530c1bc742181ded4930842b46e9507372f0b1b963James Dong
11540c1bc742181ded4930842b46e9507372f0b1b963James Dong        IF ("$outsize"="s9")
11550c1bc742181ded4930842b46e9507372f0b1b963James Dong            ;// Output range [-256 to +255]
11560c1bc742181ded4930842b46e9507372f0b1b963James Dong            VQSHL            qYf0, qYf0, #16-9
11570c1bc742181ded4930842b46e9507372f0b1b963James Dong            VQSHL            qYf1, qYf1, #16-9
11580c1bc742181ded4930842b46e9507372f0b1b963James Dong            VQSHL            qYf2, qYf2, #16-9
11590c1bc742181ded4930842b46e9507372f0b1b963James Dong            VQSHL            qYf3, qYf3, #16-9
11600c1bc742181ded4930842b46e9507372f0b1b963James Dong            VQSHL            qYf4, qYf4, #16-9
11610c1bc742181ded4930842b46e9507372f0b1b963James Dong            VQSHL            qYf5, qYf5, #16-9
11620c1bc742181ded4930842b46e9507372f0b1b963James Dong            VQSHL            qYf6, qYf6, #16-9
11630c1bc742181ded4930842b46e9507372f0b1b963James Dong            VQSHL            qYf7, qYf7, #16-9
11640c1bc742181ded4930842b46e9507372f0b1b963James Dong
11650c1bc742181ded4930842b46e9507372f0b1b963James Dong            VSHR             qYf0, qYf0, #16-9
11660c1bc742181ded4930842b46e9507372f0b1b963James Dong            VSHR             qYf1, qYf1, #16-9
11670c1bc742181ded4930842b46e9507372f0b1b963James Dong            VSHR             qYf2, qYf2, #16-9
11680c1bc742181ded4930842b46e9507372f0b1b963James Dong            VSHR             qYf3, qYf3, #16-9
11690c1bc742181ded4930842b46e9507372f0b1b963James Dong            VSHR             qYf4, qYf4, #16-9
11700c1bc742181ded4930842b46e9507372f0b1b963James Dong            VSHR             qYf5, qYf5, #16-9
11710c1bc742181ded4930842b46e9507372f0b1b963James Dong            VSHR             qYf6, qYf6, #16-9
11720c1bc742181ded4930842b46e9507372f0b1b963James Dong            VSHR             qYf7, qYf7, #16-9
11730c1bc742181ded4930842b46e9507372f0b1b963James Dong        ENDIF
11740c1bc742181ded4930842b46e9507372f0b1b963James Dong
11750c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Store output depending on the Stride size
11760c1bc742181ded4930842b46e9507372f0b1b963James Dong        IF "$stride"="s"
11770c1bc742181ded4930842b46e9507372f0b1b963James Dong            VST1        qYf0, [pDest @64], Stride
11780c1bc742181ded4930842b46e9507372f0b1b963James Dong            VST1        qYf1, [pDest @64], Stride
11790c1bc742181ded4930842b46e9507372f0b1b963James Dong            VST1        qYf2, [pDest @64], Stride
11800c1bc742181ded4930842b46e9507372f0b1b963James Dong            VST1        qYf3, [pDest @64], Stride
11810c1bc742181ded4930842b46e9507372f0b1b963James Dong            VST1        qYf4, [pDest @64], Stride
11820c1bc742181ded4930842b46e9507372f0b1b963James Dong            VST1        qYf5, [pDest @64], Stride
11830c1bc742181ded4930842b46e9507372f0b1b963James Dong            VST1        qYf6, [pDest @64], Stride
11840c1bc742181ded4930842b46e9507372f0b1b963James Dong            VST1        qYf7, [pDest @64]
11850c1bc742181ded4930842b46e9507372f0b1b963James Dong        ELSE
11860c1bc742181ded4930842b46e9507372f0b1b963James Dong            IF ("$outsize"="u8")
11870c1bc742181ded4930842b46e9507372f0b1b963James Dong                VST1        dYf0U8, [pDest @64], #8
11880c1bc742181ded4930842b46e9507372f0b1b963James Dong                VST1        dYf1U8, [pDest @64], #8
11890c1bc742181ded4930842b46e9507372f0b1b963James Dong                VST1        dYf2U8, [pDest @64], #8
11900c1bc742181ded4930842b46e9507372f0b1b963James Dong                VST1        dYf3U8, [pDest @64], #8
11910c1bc742181ded4930842b46e9507372f0b1b963James Dong                VST1        dYf4U8, [pDest @64], #8
11920c1bc742181ded4930842b46e9507372f0b1b963James Dong                VST1        dYf5U8, [pDest @64], #8
11930c1bc742181ded4930842b46e9507372f0b1b963James Dong                VST1        dYf6U8, [pDest @64], #8
11940c1bc742181ded4930842b46e9507372f0b1b963James Dong                VST1        dYf7U8, [pDest @64]
11950c1bc742181ded4930842b46e9507372f0b1b963James Dong            ELSE
11960c1bc742181ded4930842b46e9507372f0b1b963James Dong                ;// ("$outsize"="s9") or ("$outsize"="s16")
11970c1bc742181ded4930842b46e9507372f0b1b963James Dong                VST1        qYf0, [pDest @64], #16
11980c1bc742181ded4930842b46e9507372f0b1b963James Dong                VST1        qYf1, [pDest @64], #16
11990c1bc742181ded4930842b46e9507372f0b1b963James Dong                VST1        qYf2, [pDest @64], #16
12000c1bc742181ded4930842b46e9507372f0b1b963James Dong                VST1        qYf3, [pDest @64], #16
12010c1bc742181ded4930842b46e9507372f0b1b963James Dong                VST1        qYf4, [pDest @64], #16
12020c1bc742181ded4930842b46e9507372f0b1b963James Dong                VST1        qYf5, [pDest @64], #16
12030c1bc742181ded4930842b46e9507372f0b1b963James Dong                VST1        qYf6, [pDest @64], #16
12040c1bc742181ded4930842b46e9507372f0b1b963James Dong                VST1        qYf7, [pDest @64]
12050c1bc742181ded4930842b46e9507372f0b1b963James Dong            ENDIF
12060c1bc742181ded4930842b46e9507372f0b1b963James Dong
12070c1bc742181ded4930842b46e9507372f0b1b963James Dong        ENDIF
12080c1bc742181ded4930842b46e9507372f0b1b963James Dong
12090c1bc742181ded4930842b46e9507372f0b1b963James Dong
12100c1bc742181ded4930842b46e9507372f0b1b963James Dong
12110c1bc742181ded4930842b46e9507372f0b1b963James Dong        ENDIF ;// CortexA8
12120c1bc742181ded4930842b46e9507372f0b1b963James Dong
12130c1bc742181ded4930842b46e9507372f0b1b963James Dong
12140c1bc742181ded4930842b46e9507372f0b1b963James Dong
12150c1bc742181ded4930842b46e9507372f0b1b963James Dong        MEND
12160c1bc742181ded4930842b46e9507372f0b1b963James Dong
12170c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Scale TWO input rows with TWO rows of 16 bit scale values
12180c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
12190c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// This macro is used by M_IDCT_PRESCALE16 to pre-scale one row
12200c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// input (Eight input values) with one row of scale values. Also
12210c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Loads next scale values from pScale, if $LastRow flag is not set.
12220c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
12230c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Input Registers:
12240c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
12250c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// $dAlo           - Input D register with first four S16 values of row n
12260c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// $dAhi           - Input D register with next four S16 values of row n
12270c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// $dBlo           - Input D register with first four S16 values of row n+1
12280c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// $dBhi           - Input D register with next four S16 values of row n+1
12290c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// pScale          - Pointer to next row of scale values
12300c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// qT0lo           - Temporary scratch register
12310c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// qT0hi           - Temporary scratch register
12320c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// qT1lo           - Temporary scratch register
12330c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// qT1hi           - Temporary scratch register
12340c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// dScale1lo       - Scale value of row n
12350c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// dScale1hi       - Scale value of row n
12360c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// dScale2lo       - Scale value of row n+1
12370c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// dScale2hi       - Scale value of row n+1
12380c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
12390c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Input Flag
12400c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
12410c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// $LastRow        - Flag to indicate whether current row is last row
12420c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
12430c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Output Registers:
12440c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
12450c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// $dAlo           - Scaled output values (first four S16 of row n)
12460c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// $dAhi           - Scaled output values (next four S16 of row n)
12470c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// $dBlo           - Scaled output values (first four S16 of row n+1)
12480c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// $dBhi           - Scaled output values (next four S16 of row n+1)
12490c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// qScale1         - Scale values for next row
12500c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// qScale2         - Scale values for next row+1
12510c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// pScale          - Pointer to next row of scale values
12520c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
12530c1bc742181ded4930842b46e9507372f0b1b963James Dong        MACRO
12540c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_IDCT_SCALE16 $dAlo, $dAhi, $dBlo, $dBhi, $LastRow
12550c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMULL       qT0lo, $dAlo, dScale1lo
12560c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMULL       qT0hi, $dAhi, dScale1hi
12570c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMULL       qT1lo, $dBlo, dScale2lo
12580c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMULL       qT1hi, $dBhi, dScale2hi
12590c1bc742181ded4930842b46e9507372f0b1b963James Dong        IF "$LastRow"="0"
12600c1bc742181ded4930842b46e9507372f0b1b963James Dong            VLD1        qScale1, [pScale], #16  ;// Load scale for row n+1
12610c1bc742181ded4930842b46e9507372f0b1b963James Dong            VLD1        qScale2, [pScale], #16  ;// Load scale for row n+2
12620c1bc742181ded4930842b46e9507372f0b1b963James Dong        ENDIF
12630c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQRSHRN       $dAlo, qT0lo, #12
12640c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQRSHRN       $dAhi, qT0hi, #12
12650c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQRSHRN       $dBlo, qT1lo, #12
12660c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQRSHRN       $dBhi, qT1hi, #12
12670c1bc742181ded4930842b46e9507372f0b1b963James Dong        MEND
12680c1bc742181ded4930842b46e9507372f0b1b963James Dong
12690c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Scale 8x8 block input values with 16 bit scale values
12700c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
12710c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// This macro is used to pre-scale block of 8x8 input.
12720c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// This also do the Ist stage transformations of IDCT.
12730c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
12740c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Input Registers:
12750c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
12760c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// dXjnlo          - n th input D register with first four S16 values
12770c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// dXjnhi          - n th input D register with next four S16 values
12780c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// qXjn            - n th input Q register with eight S16 values
12790c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// pScale          - Pointer to scale values
12800c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
12810c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Output Registers:
12820c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
12830c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// qXin            - n th output Q register with eight S16 output values of 1st stage
12840c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
12850c1bc742181ded4930842b46e9507372f0b1b963James Dong        MACRO
12860c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_IDCT_PRESCALE16
12870c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1        qScale1, [pScale], #16      ;// Load Pre scale for row 0
12880c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1        qScale2, [pScale], #16      ;// Load Pre scale for row 0
12890c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_IDCT_SCALE16 dXj0lo, dXj0hi, dXj1lo, dXj1hi, 0        ;// Pre scale row 0 & 1
12900c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_IDCT_SCALE16 dXj2lo, dXj2hi, dXj3lo, dXj3hi, 0
12910c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_IDCT_SCALE16 dXj4lo, dXj4hi, dXj5lo, dXj5hi, 0
12920c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_IDCT_SCALE16 dXj6lo, dXj6hi, dXj7lo, dXj7hi, 1
12930c1bc742181ded4930842b46e9507372f0b1b963James Dong        VHADD       qXi5, qXj1, qXj7            ;// (j1+j7)/2
12940c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSUB        qXi6, qXj1, qXj7            ;// j1-j7
12950c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR         pSrc, =armCOMM_IDCTCoef ;// Address of DCT inverse AAN constants
12960c1bc742181ded4930842b46e9507372f0b1b963James Dong        VHADD       qXi3, qXj2, qXj6            ;// (j2+j6)/2
12970c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSUB        qXi2, qXj2, qXj6            ;// j2-j6
12980c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLDR        dCoefs, [pSrc]              ;// Load DCT inverse AAN constants
12990c1bc742181ded4930842b46e9507372f0b1b963James Dong        VHADD       qXi7, qXj5, qXj3            ;// (j5+j3)/2
13000c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSUB        qXi4, qXj5, qXj3            ;// j5-j3
13010c1bc742181ded4930842b46e9507372f0b1b963James Dong        MEND
13020c1bc742181ded4930842b46e9507372f0b1b963James Dong
13030c1bc742181ded4930842b46e9507372f0b1b963James Dong
13040c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Scale 8x8 block input values with 32 bit scale values
13050c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
13060c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// This macro is used to pre-scale block of 8x8 input.
13070c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// This also do the Ist stage transformations of IDCT.
13080c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
13090c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Input Registers:
13100c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
13110c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// dXjnlo          - n th input D register with first four S16 values
13120c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// dXjnhi          - n th input D register with next four S16 values
13130c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// qXjn            - n th input Q register with eight S16 values
13140c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// pScale          - Pointer to 32bit scale values in Q23 format
13150c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
13160c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Output Registers:
13170c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
13180c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// dXinlo          - n th output D register with first four S16 output values of 1st stage
13190c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// dXinhi          - n th output D register with next four S16 output values of 1st stage
13200c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
13210c1bc742181ded4930842b46e9507372f0b1b963James Dong        MACRO
13220c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_IDCT_PRESCALE32
13230c1bc742181ded4930842b46e9507372f0b1b963James DongqScale0lo       QN 0.S32
13240c1bc742181ded4930842b46e9507372f0b1b963James DongqScale0hi       QN 1.S32
13250c1bc742181ded4930842b46e9507372f0b1b963James DongqScale1lo       QN 2.S32
13260c1bc742181ded4930842b46e9507372f0b1b963James DongqScale1hi       QN 3.S32
13270c1bc742181ded4930842b46e9507372f0b1b963James DongqScale2lo       QN qScale1lo
13280c1bc742181ded4930842b46e9507372f0b1b963James DongqScale2hi       QN qScale1hi
13290c1bc742181ded4930842b46e9507372f0b1b963James DongqScale3lo       QN qScale1lo
13300c1bc742181ded4930842b46e9507372f0b1b963James DongqScale3hi       QN qScale1hi
13310c1bc742181ded4930842b46e9507372f0b1b963James DongqScale4lo       QN qScale1lo
13320c1bc742181ded4930842b46e9507372f0b1b963James DongqScale4hi       QN qScale1hi
13330c1bc742181ded4930842b46e9507372f0b1b963James DongqScale5lo       QN qScale0lo
13340c1bc742181ded4930842b46e9507372f0b1b963James DongqScale5hi       QN qScale0hi
13350c1bc742181ded4930842b46e9507372f0b1b963James DongqScale6lo       QN qScale0lo
13360c1bc742181ded4930842b46e9507372f0b1b963James DongqScale6hi       QN qScale0hi
13370c1bc742181ded4930842b46e9507372f0b1b963James DongqScale7lo       QN qScale0lo
13380c1bc742181ded4930842b46e9507372f0b1b963James DongqScale7hi       QN qScale0hi
13390c1bc742181ded4930842b46e9507372f0b1b963James Dong
13400c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc0lo         QN 4.S32
13410c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc0hi         QN 5.S32
13420c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc1lo         QN 6.S32
13430c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc1hi         QN Src4.S32
13440c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc2lo         QN qSrc0lo
13450c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc2hi         QN qSrc0hi
13460c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc3lo         QN qSrc0lo
13470c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc3hi         QN qSrc0hi
13480c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc4lo         QN qSrc0lo
13490c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc4hi         QN qSrc0hi
13500c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc5lo         QN qSrc1lo
13510c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc5hi         QN qSrc1hi
13520c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc6lo         QN qSrc1lo
13530c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc6hi         QN qSrc1hi
13540c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc7lo         QN qSrc0lo
13550c1bc742181ded4930842b46e9507372f0b1b963James DongqSrc7hi         QN qSrc0hi
13560c1bc742181ded4930842b46e9507372f0b1b963James Dong
13570c1bc742181ded4930842b46e9507372f0b1b963James DongqRes17lo        QN qScale0lo
13580c1bc742181ded4930842b46e9507372f0b1b963James DongqRes17hi        QN qScale0hi
13590c1bc742181ded4930842b46e9507372f0b1b963James DongqRes26lo        QN qScale0lo
13600c1bc742181ded4930842b46e9507372f0b1b963James DongqRes26hi        QN qScale0hi
13610c1bc742181ded4930842b46e9507372f0b1b963James DongqRes53lo        QN qScale0lo
13620c1bc742181ded4930842b46e9507372f0b1b963James DongqRes53hi        QN qScale0hi
13630c1bc742181ded4930842b46e9507372f0b1b963James Dong
13640c1bc742181ded4930842b46e9507372f0b1b963James Dong            ADD         pTemp, pScale, #4*8*7           ;// Address of  pScale[7]
13650c1bc742181ded4930842b46e9507372f0b1b963James Dong
13660c1bc742181ded4930842b46e9507372f0b1b963James Dong            ;// Row 0
13670c1bc742181ded4930842b46e9507372f0b1b963James Dong            VLD1        {qScale0lo, qScale0hi}, [pScale]!
13680c1bc742181ded4930842b46e9507372f0b1b963James Dong            VSHLL       qSrc0lo, dXj0lo, #(12-1)
13690c1bc742181ded4930842b46e9507372f0b1b963James Dong            VSHLL       qSrc0hi, dXj0hi, #(12-1)
13700c1bc742181ded4930842b46e9507372f0b1b963James Dong            VLD1        {qScale1lo, qScale1hi}, [pScale]!
13710c1bc742181ded4930842b46e9507372f0b1b963James Dong            VQRDMULH    qSrc0lo, qScale0lo, qSrc0lo
13720c1bc742181ded4930842b46e9507372f0b1b963James Dong            VQRDMULH    qSrc0hi, qScale0hi, qSrc0hi
13730c1bc742181ded4930842b46e9507372f0b1b963James Dong            VLD1        {qScale7lo, qScale7hi}, [pTemp]!
13740c1bc742181ded4930842b46e9507372f0b1b963James Dong            VSHLL       qSrc1lo, dXj1lo, #(12-1)
13750c1bc742181ded4930842b46e9507372f0b1b963James Dong            VSHLL       qSrc1hi, dXj1hi, #(12-1)
13760c1bc742181ded4930842b46e9507372f0b1b963James Dong            VMOVN       dXi0lo, qSrc0lo                 ;// Output i0
13770c1bc742181ded4930842b46e9507372f0b1b963James Dong            VMOVN       dXi0hi, qSrc0hi
13780c1bc742181ded4930842b46e9507372f0b1b963James Dong            VSHLL       qSrc7lo, dXj7lo, #(12-1)
13790c1bc742181ded4930842b46e9507372f0b1b963James Dong            VSHLL       qSrc7hi, dXj7hi, #(12-1)
13800c1bc742181ded4930842b46e9507372f0b1b963James Dong            SUB         pTemp, pTemp, #((16*2)+(4*8*1))
13810c1bc742181ded4930842b46e9507372f0b1b963James Dong            VQRDMULH    qSrc1lo, qScale1lo, qSrc1lo
13820c1bc742181ded4930842b46e9507372f0b1b963James Dong            VQRDMULH    qSrc1hi, qScale1hi, qSrc1hi
13830c1bc742181ded4930842b46e9507372f0b1b963James Dong            VQRDMULH    qSrc7lo, qScale7lo, qSrc7lo
13840c1bc742181ded4930842b46e9507372f0b1b963James Dong            VQRDMULH    qSrc7hi, qScale7hi, qSrc7hi
13850c1bc742181ded4930842b46e9507372f0b1b963James Dong            VLD1        {qScale2lo, qScale2hi}, [pScale]!
13860c1bc742181ded4930842b46e9507372f0b1b963James Dong
13870c1bc742181ded4930842b46e9507372f0b1b963James Dong            ;// Row 1 & 7
13880c1bc742181ded4930842b46e9507372f0b1b963James Dong            VHADD       qRes17lo, qSrc1lo, qSrc7lo      ;// (j1+j7)/2
13890c1bc742181ded4930842b46e9507372f0b1b963James Dong            VHADD       qRes17hi, qSrc1hi, qSrc7hi      ;// (j1+j7)/2
13900c1bc742181ded4930842b46e9507372f0b1b963James Dong            VMOVN       dXi5lo, qRes17lo                ;// Output i5
13910c1bc742181ded4930842b46e9507372f0b1b963James Dong            VMOVN       dXi5hi, qRes17hi
13920c1bc742181ded4930842b46e9507372f0b1b963James Dong            VSUB        qRes17lo, qSrc1lo, qSrc7lo      ;// j1-j7
13930c1bc742181ded4930842b46e9507372f0b1b963James Dong            VSUB        qRes17hi, qSrc1hi, qSrc7hi      ;// j1-j7
13940c1bc742181ded4930842b46e9507372f0b1b963James Dong            VMOVN       dXi6lo, qRes17lo                ;// Output i6
13950c1bc742181ded4930842b46e9507372f0b1b963James Dong            VMOVN       dXi6hi, qRes17hi
13960c1bc742181ded4930842b46e9507372f0b1b963James Dong            VSHLL       qSrc2lo, dXj2lo, #(12-1)
13970c1bc742181ded4930842b46e9507372f0b1b963James Dong            VSHLL       qSrc2hi, dXj2hi, #(12-1)
13980c1bc742181ded4930842b46e9507372f0b1b963James Dong            VLD1        {qScale6lo, qScale6hi}, [pTemp]!
13990c1bc742181ded4930842b46e9507372f0b1b963James Dong            VSHLL       qSrc6lo, dXj6lo, #(12-1)
14000c1bc742181ded4930842b46e9507372f0b1b963James Dong            VSHLL       qSrc6hi, dXj6hi, #(12-1)
14010c1bc742181ded4930842b46e9507372f0b1b963James Dong            SUB         pTemp, pTemp, #((16*2)+(4*8*1))
14020c1bc742181ded4930842b46e9507372f0b1b963James Dong            VQRDMULH    qSrc2lo, qScale2lo, qSrc2lo
14030c1bc742181ded4930842b46e9507372f0b1b963James Dong            VQRDMULH    qSrc2hi, qScale2hi, qSrc2hi
14040c1bc742181ded4930842b46e9507372f0b1b963James Dong            VQRDMULH    qSrc6lo, qScale6lo, qSrc6lo
14050c1bc742181ded4930842b46e9507372f0b1b963James Dong            VQRDMULH    qSrc6hi, qScale6hi, qSrc6hi
14060c1bc742181ded4930842b46e9507372f0b1b963James Dong            VLD1        {qScale3lo, qScale3hi}, [pScale]!
14070c1bc742181ded4930842b46e9507372f0b1b963James Dong
14080c1bc742181ded4930842b46e9507372f0b1b963James Dong            ;// Row 2 & 6
14090c1bc742181ded4930842b46e9507372f0b1b963James Dong            VHADD       qRes26lo, qSrc2lo, qSrc6lo      ;// (j2+j6)/2
14100c1bc742181ded4930842b46e9507372f0b1b963James Dong            VHADD       qRes26hi, qSrc2hi, qSrc6hi      ;// (j2+j6)/2
14110c1bc742181ded4930842b46e9507372f0b1b963James Dong            VMOVN       dXi3lo, qRes26lo                ;// Output i3
14120c1bc742181ded4930842b46e9507372f0b1b963James Dong            VMOVN       dXi3hi, qRes26hi
14130c1bc742181ded4930842b46e9507372f0b1b963James Dong            VSUB        qRes26lo, qSrc2lo, qSrc6lo      ;// j2-j6
14140c1bc742181ded4930842b46e9507372f0b1b963James Dong            VSUB        qRes26hi, qSrc2hi, qSrc6hi      ;// j2-j6
14150c1bc742181ded4930842b46e9507372f0b1b963James Dong            VMOVN       dXi2lo, qRes26lo                ;// Output i2
14160c1bc742181ded4930842b46e9507372f0b1b963James Dong            VMOVN       dXi2hi, qRes26hi
14170c1bc742181ded4930842b46e9507372f0b1b963James Dong            VSHLL       qSrc3lo, dXj3lo, #(12-1)
14180c1bc742181ded4930842b46e9507372f0b1b963James Dong            VSHLL       qSrc3hi, dXj3hi, #(12-1)
14190c1bc742181ded4930842b46e9507372f0b1b963James Dong            VLD1        {qScale5lo, qScale5hi}, [pTemp]!
14200c1bc742181ded4930842b46e9507372f0b1b963James Dong            VSHLL       qSrc5lo, dXj5lo, #(12-1)
14210c1bc742181ded4930842b46e9507372f0b1b963James Dong            VSHLL       qSrc5hi, dXj5hi, #(12-1)
14220c1bc742181ded4930842b46e9507372f0b1b963James Dong            VQRDMULH    qSrc3lo, qScale3lo, qSrc3lo
14230c1bc742181ded4930842b46e9507372f0b1b963James Dong            VQRDMULH    qSrc3hi, qScale3hi, qSrc3hi
14240c1bc742181ded4930842b46e9507372f0b1b963James Dong            VQRDMULH    qSrc5lo, qScale5lo, qSrc5lo
14250c1bc742181ded4930842b46e9507372f0b1b963James Dong            VQRDMULH    qSrc5hi, qScale5hi, qSrc5hi
14260c1bc742181ded4930842b46e9507372f0b1b963James Dong
14270c1bc742181ded4930842b46e9507372f0b1b963James Dong            ;// Row 3 & 5
14280c1bc742181ded4930842b46e9507372f0b1b963James Dong            VHADD       qRes53lo, qSrc5lo, qSrc3lo      ;// (j5+j3)/2
14290c1bc742181ded4930842b46e9507372f0b1b963James Dong            VHADD       qRes53hi, qSrc5hi, qSrc3hi      ;// (j5+j3)/2
14300c1bc742181ded4930842b46e9507372f0b1b963James Dong            SUB         pSrc, pSrc, #16*2*2
14310c1bc742181ded4930842b46e9507372f0b1b963James Dong            VMOVN       dXi7lo, qRes53lo                ;// Output i7
14320c1bc742181ded4930842b46e9507372f0b1b963James Dong            VMOVN       dXi7hi, qRes53hi
14330c1bc742181ded4930842b46e9507372f0b1b963James Dong            VSUB        qRes53lo, qSrc5lo, qSrc3lo      ;// j5-j3
14340c1bc742181ded4930842b46e9507372f0b1b963James Dong            VSUB        qRes53hi, qSrc5hi, qSrc3hi      ;// j5-j3
14350c1bc742181ded4930842b46e9507372f0b1b963James Dong            VLD1        qXj4, [pSrc @64]
14360c1bc742181ded4930842b46e9507372f0b1b963James Dong            VMOVN       dXi4lo, qRes53lo                ;// Output i4
14370c1bc742181ded4930842b46e9507372f0b1b963James Dong            VMOVN       dXi4hi, qRes53hi
14380c1bc742181ded4930842b46e9507372f0b1b963James Dong            VSHLL       qSrc4lo, dXj4lo, #(12-1)
14390c1bc742181ded4930842b46e9507372f0b1b963James Dong            VSHLL       qSrc4hi, dXj4hi, #(12-1)
14400c1bc742181ded4930842b46e9507372f0b1b963James Dong            VLD1        {qScale4lo, qScale4hi}, [pScale]
14410c1bc742181ded4930842b46e9507372f0b1b963James Dong            LDR         pSrc, =armCOMM_IDCTCoef     ;// Address of DCT inverse AAN constants
14420c1bc742181ded4930842b46e9507372f0b1b963James Dong            VQRDMULH    qSrc4lo, qScale4lo, qSrc4lo
14430c1bc742181ded4930842b46e9507372f0b1b963James Dong            VQRDMULH    qSrc4hi, qScale4hi, qSrc4hi
14440c1bc742181ded4930842b46e9507372f0b1b963James Dong            VLDR        dCoefs, [pSrc]                  ;// Load DCT inverse AAN constants
14450c1bc742181ded4930842b46e9507372f0b1b963James Dong            ;// Row 4
14460c1bc742181ded4930842b46e9507372f0b1b963James Dong            VMOVN       dXi1lo, qSrc4lo                 ;// Output i1
14470c1bc742181ded4930842b46e9507372f0b1b963James Dong            VMOVN       dXi1hi, qSrc4hi
14480c1bc742181ded4930842b46e9507372f0b1b963James Dong
14490c1bc742181ded4930842b46e9507372f0b1b963James Dong        MEND
14500c1bc742181ded4930842b46e9507372f0b1b963James Dong
14510c1bc742181ded4930842b46e9507372f0b1b963James Dong        END
1452