1;//
2;//
3;// File Name:  armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s
4;// OpenMAX DL: v1.0.2
5;// Revision:   9641
6;// Date:       Thursday, February 7, 2008
7;//
8;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
9;//
10;//
11;//
12
13        INCLUDE omxtypes_s.h
14        INCLUDE armCOMM_s.h
15
16        M_VARIANTS ARM1136JS
17
18        EXPORT armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
19
20DEBUG_ON    SETL {FALSE}
21
22
23    IF ARM1136JS
24
25;// Function:
26;//     armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
27;//
28;// Implements horizontal interpolation for a block of size 4x4. Input and output should
29;// be aligned.
30;//
31;// Registers used as input for this function
32;// r0,r1,r2,r3 where r0,r2  input pointer and r1,r3 corresponding step size
33;//
34;// Registers preserved for top level function
35;// r0,r1,r2,r3,r4,r5,r6,r14
36;//
37;// Registers modified by the function
38;// r7,r8,r9,r10,r11,r12
39;//
40;// Output registers
41;// None. Function will preserve r0-r3
42
43
44;// Declare input registers
45pSrc            RN 0
46srcStep         RN 1
47pDst            RN 2
48dstStep         RN 3
49
50;// Declare inner loop registers
51Acc0            RN 4
52Acc1            RN 5
53Acc2            RN 6
54Acc3            RN 7
55
56ValA            RN 4
57ValB            RN 5
58ValC            RN 6
59ValD            RN 7
60ValE            RN 8
61ValF            RN 9
62ValG            RN 12
63ValH            RN 14
64ValI            RN 1
65
66Temp1           RN 3
67Temp2           RN 1
68Temp3           RN 12
69Temp4           RN 7
70Temp5           RN 5
71r0x0fe00fe0     RN 3                                    ;// [0 (16*255 - 16) 0 (16*255 - 16)]
72r0x00ff00ff     RN 10                                   ;// [0 255 0 255] where 255 is offset
73Counter         RN 11
74
75Height          RN 3
76
77        M_ALLOC4 pDstStep, 4
78        M_ALLOC4 pSrcStep, 4
79
80        ;// Function header
81        M_START armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe, r6
82
83        MOV     Counter, #2
84        M_STR   dstStep, pDstStep
85        M_STR   srcStep, pSrcStep
86        LDR     r0x00ff00ff, =0x00ff00ff               ;// [0 255 0 255] 255 is offset to avoid negative results
87
88NextTwoRowsLoop
89        LDR     ValD, [pSrc, srcStep]                   ;// Load row 1 [d1 c1 b1 a1]
90        LDR     ValA, [pSrc], #4                        ;// Load row 0 [d0 c0 b0 a0]
91        LDR     ValH, [pSrc, srcStep]                   ;// Load  [h1 g1 f1 e1]
92        LDR     ValE, [pSrc], #4                        ;// Load  [h0 g0 f0 e0]
93        LDRB    Temp2, [pSrc, srcStep]                  ;// Load row 1 [l1 k1 j1 i1]
94        LDRB    Temp1, [pSrc], #-8                      ;// Load row 0 [l0 k0 j0 i0]
95
96        PKHBT   ValB, ValA, ValD, LSL #16               ;// [b1 a1 b0 a0]
97        PKHTB   ValD, ValD, ValA, ASR #16               ;// [d1 c1 d0 c0]
98        UXTAB16 ValA, r0x00ff00ff, ValB                 ;// [00 a1 00 a0] + [0 255 0 255]
99        UXTAB16 ValC, r0x00ff00ff, ValD                 ;// [00 c1 00 c0] + [0 255 0 255]
100        PKHBT   ValI, Temp1, Temp2, LSL #16             ;// [00 i1 00 i0]
101        PKHBT   ValF, ValE, ValH, LSL #16               ;// [f1 e1 f0 e0]
102        PKHTB   ValH, ValH, ValE, ASR #16               ;// [h1 g1 h0 g0]
103        UXTAB16 ValE, r0x00ff00ff, ValF                 ;// [00 e1 00 e0] + [0 255 0 255]
104
105        ;// Calculate Acc0
106        ;// Acc0 = a - 5*b + 20*c + 20*d - 5*e + f
107        UXTAB16 Temp1, ValC, ValD, ROR #8
108        UXTAB16 Temp3, ValE, ValB, ROR #8
109        RSB     Temp1, Temp3, Temp1, LSL #2
110        UXTAB16 Acc0, ValA, ValF, ROR #8
111        ADD     Temp1, Temp1, Temp1, LSL #2
112        ADD     Acc0, Acc0, Temp1
113
114        ;// Calculate Acc1
115        ;// Acc1 = b - 5*c + 20*d + 20*e - 5*f + g
116        UXTAB16 Temp1, ValE, ValD, ROR #8
117        UXTAB16 Temp3, ValC, ValF, ROR #8
118        RSB     Temp1, Temp3, Temp1, LSL #2
119        UXTAB16 ValG, r0x00ff00ff, ValH                 ;// [00 g1 00 g0] + [0 255 0 255]
120        ADD     Temp1, Temp1, Temp1, LSL #2
121        UXTAB16 Acc1, ValG, ValB, ROR #8
122        ADD     Acc1, Acc1, Temp1
123
124        LDR     r0x0fe00fe0, =0x0fe00fe0                ;// 0x0fe00fe0 = (16 * Offset) - 16 where Offset is 255
125        UXTAB16 Acc2, ValC, ValH, ROR #8
126        ADD     ValI, r0x00ff00ff, ValI                 ;// [00 i1 00 i0] + [0 255 0 255]
127        UQSUB16 Acc0, Acc0, r0x0fe00fe0
128        UQSUB16 Acc1, Acc1, r0x0fe00fe0
129        USAT16  Acc0, #13, Acc0
130        USAT16  Acc1, #13, Acc1
131
132        ;// Calculate Acc2
133        ;// Acc2 = c - 5*d + 20*e + 20*f - 5*g + h
134        UXTAB16 Temp1, ValG, ValD, ROR #8
135        UXTAB16 Acc3, ValI, ValD, ROR #8
136        UXTAB16 Temp2, ValE, ValF, ROR #8
137        AND     Acc1, r0x00ff00ff, Acc1, LSR #5
138        AND     Acc0, r0x00ff00ff, Acc0, LSR #5
139        ORR     Acc0, Acc0, Acc1, LSL #8
140        RSB     Temp5, Temp1, Temp2, LSL #2
141        UXTAB16 Temp2, ValG, ValF, ROR #8
142        ADD     Temp5, Temp5, Temp5, LSL #2
143        ADD     Acc2, Acc2, Temp5
144
145        ;// Calculate Acc3
146        ;// Acc3 = d - 5*e + 20*f + 20*g - 5*h + i
147        UXTAB16 Temp5, ValE, ValH, ROR #8
148        RSB     Temp5, Temp5, Temp2, LSL #2
149        LDR     r0x0fe00fe0, =0x0fe00fe0
150        ADD     Temp5, Temp5, Temp5, LSL #2
151        ADD     Acc3, Acc3, Temp5
152
153        UQSUB16 Acc3, Acc3, r0x0fe00fe0
154        UQSUB16 Acc2, Acc2, r0x0fe00fe0
155        USAT16  Acc3, #13, Acc3
156        USAT16  Acc2, #13, Acc2
157
158        M_LDR   dstStep, pDstStep
159        AND     Acc3, r0x00ff00ff, Acc3, LSR #5
160        AND     Acc2, r0x00ff00ff, Acc2, LSR #5
161        ORR     Acc2, Acc2, Acc3, LSL #8
162
163        SUBS    Counter, Counter, #1
164        M_LDR   srcStep, pSrcStep
165        PKHBT   Acc1, Acc0, Acc2, LSL #16
166        M_STR   Acc1, [pDst], dstStep                   ;// Store result1
167        PKHTB   Acc2, Acc2, Acc0, ASR #16
168        M_STR   Acc2, [pDst], dstStep                   ;// Store result2
169        ADD     pSrc, pSrc, srcStep, LSL #1
170
171        BGT     NextTwoRowsLoop
172End
173        SUB     pDst, pDst, dstStep, LSL #2
174        SUB     pSrc, pSrc, srcStep, LSL #2
175
176        M_END
177
178    ENDIF
179
180    END
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240