omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.S revision 22e06318d04074d1a7c90caa173857abdcfd153e
1/*
2 * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
3 *
4 */
5
6    .eabi_attribute 24, 1
7    .eabi_attribute 25, 1
8
9    .arm
10    .fpu neon
11    .text
12
13    .global omxVCM4P10_DequantTransformResidualFromPairAndAdd
14    .func   omxVCM4P10_DequantTransformResidualFromPairAndAdd
15omxVCM4P10_DequantTransformResidualFromPairAndAdd:
16    PUSH     {r4-r12,lr}
17    VPUSH    {d8-d9}
18    SUB      sp,sp,#0x20
19    ADD      r4,sp,#0
20    LDR      r5,[sp,#0x64]
21    MOV      r7,r1
22    MOV      r8,r2
23    MOV      r9,r3
24    CMP      r5,#0
25    BEQ      L0x114
26    MOV      r1,r4
27    BL       armVCM4P10_UnpackBlock4x4  ;//
28    LDR      r1,[sp,#0x60]
29    LDR      r11, .LarmVCM4P10_QPModuloTable
30P0: ADD      r11, pc
31    LDR      r10, .LarmVCM4P10_QPDivTable
32P1: ADD      r10, pc
33    LDR      r2, .LarmVCM4P10_VMatrixU16
34P2: ADD      r2, pc
35    LDRSB    r12,[r11,r1]
36    LDRSB    lr,[r10,r1]
37    LDR      r10, =0x3020504
38    LDR      r1, =0x5040100
39    ADD      r2,r2,r12
40    VDUP.32  d7,r1
41    VDUP.32  d9,r10
42    VDUP.16  d5,lr
43    VLD1.8   {d6},[r2]
44    VTBL.8   d8,{d6},d7
45    VTBL.8   d4,{d6},d9
46    CMP      r8,#0
47    VLD1.16  {d0,d1,d2,d3},[r4]
48    VSHL.U16 d8,d8,d5
49    VSHL.U16 d4,d4,d5
50    BEQ      L1
51    LDRSH    r10,[r8,#0]
52L1:
53    VMUL.I16 d0,d0,d8
54    VMUL.I16 d1,d1,d4
55    VMUL.I16 d2,d2,d8
56    VMUL.I16 d3,d3,d4
57    VMOVNE.16 d0[0],r10
58    VTRN.16  d0,d1
59    VTRN.16  d2,d3
60    VTRN.32  q0,q1
61    VMOV.I16 d4,#0
62    VADD.I16 d5,d0,d2
63    VSUB.I16 d6,d0,d2
64    VHADD.S16 d7,d1,d4
65    VHADD.S16 d8,d3,d4
66    VSUB.I16 d7,d7,d3
67    VADD.I16 d8,d1,d8
68    VADD.I16 d0,d5,d8
69    VADD.I16 d1,d6,d7
70    VSUB.I16 d2,d6,d7
71    VSUB.I16 d3,d5,d8
72    VTRN.16  d0,d1
73    VTRN.16  d2,d3
74    VTRN.32  q0,q1
75    VADD.I16 d5,d0,d2
76    VSUB.I16 d6,d0,d2
77    VHADD.S16 d7,d1,d4
78    VHADD.S16 d8,d3,d4
79    VSUB.I16 d7,d7,d3
80    VADD.I16 d8,d1,d8
81    VADD.I16 d0,d5,d8
82    VADD.I16 d1,d6,d7
83    VSUB.I16 d2,d6,d7
84    VSUB.I16 d3,d5,d8
85    VRSHR.S16 d0,d0,#6
86    VRSHR.S16 d1,d1,#6
87    VRSHR.S16 d2,d2,#6
88    VRSHR.S16 d3,d3,#6
89    B        L0x130
90L0x114:
91    LDRSH    r10,[r8,#0]
92    ADD      r10,r10,#0x20
93    ASR      r10,r10,#6
94    VDUP.16  d0,r10
95    VDUP.16  d1,r10
96    VDUP.16  d2,r10
97    VDUP.16  d3,r10
98L0x130:
99    LDR      r1,[sp,#0x58]
100    LDR      r10,[sp,#0x5c]
101    LDR      r3,[r7],r1
102    LDR      r5,[r7],r1
103    VMOV     d4,r3,r5
104    LDR      r3,[r7],r1
105    LDR      r5,[r7,#0]
106    VMOV     d5,r3,r5
107    VADDW.U8 q3,q0,d4
108    VADDW.U8 q4,q1,d5
109    VQMOVUN.S16 d0,q3
110    VQMOVUN.S16 d1,q4
111    VST1.32  {d0[0]},[r9],r10
112    VST1.32  {d0[1]},[r9],r10
113    VST1.32  {d1[0]},[r9],r10
114    VST1.32  {d1[1]},[r9]
115    MOV      r0,#0
116    ADD      sp,sp,#0x20
117    VPOP     {d8-d9}
118    POP      {r4-r12,pc}
119    .endfunc
120
121.LarmVCM4P10_QPModuloTable:
122    .word   armVCM4P10_QPModuloTable-(P0+8)
123.LarmVCM4P10_QPDivTable:
124    .word   armVCM4P10_QPDivTable-(P1+8)
125.LarmVCM4P10_VMatrixU16:
126    .word   armVCM4P10_VMatrixU16-(P2+8)
127
128    .end
129
130