1;//
2;//
3;// File Name:  armVCM4P10_Interpolate_Chroma_s.s
4;// OpenMAX DL: v1.0.2
5;// Revision:   9641
6;// Date:       Thursday, February 7, 2008
7;//
8;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
9;//
10;//
11;//
12
13
14        INCLUDE omxtypes_s.h
15        INCLUDE armCOMM_s.h
16
17        M_VARIANTS CortexA8
18
19
20    IF CortexA8
21
22    M_TABLE armVCM4P10_WidthBranchTableMVIsNotZero
23
24    DCD   WidthIs2MVIsNotZero, WidthIs2MVIsNotZero
25    DCD   WidthIs4MVIsNotZero, WidthIs4MVIsNotZero
26    DCD   WidthIs8MVIsNotZero
27
28    M_TABLE armVCM4P10_WidthBranchTableMVIsZero
29
30    DCD   WidthIs2MVIsZero, WidthIs2MVIsZero
31    DCD   WidthIs4MVIsZero, WidthIs4MVIsZero
32    DCD   WidthIs8MVIsZero
33
34
35;// input registers
36
37pSrc                 RN 0
38iSrcStep             RN 1
39pDst                 RN 2
40iDstStep             RN 3
41iWidth               RN 4
42iHeight              RN 5
43dx                   RN 6
44dy                   RN 7
45
46;// local variable registers
47pc                   RN 15
48return               RN 0
49EightMinusdx         RN 8
50EightMinusdy         RN 9
51
52ACoeff               RN 12
53BCoeff               RN 9
54CCoeff               RN 8
55DCoeff               RN 6
56
57pTable               RN 11
58
59Step1                RN 10
60SrcStepMinus1        RN 14
61
62dACoeff              DN D12.U8
63dBCoeff              DN D13.U8
64dCCoeff              DN D14.U8
65dDCoeff              DN D15.U8
66
67dRow0a               DN D0.U8
68dRow0b               DN D1.U8
69dRow1a               DN D2.U8
70dRow1b               DN D3.U8
71
72qRow0a               QN Q2.S16
73qRow0b               QN Q3.S16
74
75;//dIndex               DN    D16.U8
76qRow1a               QN Q11.S16
77qRow1b               QN Q12.S16
78
79dRow2a               DN D16.U8
80dRow2b               DN D17.U8
81dRow3a               DN D18.U8
82dRow3b               DN D19.U8
83
84qOutRow2             QN Q11.U16
85qOutRow3             QN Q12.U16
86dOutRow2             DN D20.U8
87dOutRow3             DN D21.U8
88dOutRow2U64          DN D20.U64
89dOutRow3U64          DN D21.U64
90
91qOutRow0             QN Q2.U16
92qOutRow1             QN Q3.U16
93dOutRow0             DN D8.U8
94dOutRow1             DN D9.U8
95
96dOutRow0U64          DN D8.U64
97dOutRow1U64          DN D9.U64
98
99dOutRow0U32          DN D8.U32
100dOutRow1U32          DN D9.U32
101
102dOutRow0U16          DN D8.U16
103dOutRow1U16          DN D9.U16
104
105
106dOut0U64             DN D0.U64
107dOut1U64             DN D1.U64
108
109dOut00U32            DN D0.U32
110dOut01U32            DN D1.U32
111dOut10U32            DN D2.U32
112dOut11U32            DN D3.U32
113
114dOut0U16             DN D0.U16
115dOut1U16             DN D1.U16
116
117;//-----------------------------------------------------------------------------------------------
118;// armVCM4P10_Interpolate_Chroma_asm starts
119;//-----------------------------------------------------------------------------------------------
120
121        ;// Write function header
122        M_START armVCM4P10_Interpolate_Chroma, r11, d15
123
124        ;// Define stack arguments
125        M_ARG   Width,      4
126        M_ARG   Height,     4
127        M_ARG   Dx,         4
128        M_ARG   Dy,         4
129
130        ;// Load argument from the stack
131        ;// M_STALL ARM1136JS=4
132
133        M_LDRD   dx, dy, Dx
134        M_LDRD   iWidth, iHeight, Width
135
136        ;// EightMinusdx = 8 - dx
137        ;// EightMinusdy = 8 - dy
138
139        ;// ACoeff = EightMinusdx * EightMinusdy
140        ;// BCoeff = dx * EightMinusdy
141        ;// CCoeff = EightMinusdx * dy
142        ;// DCoeff = dx * dy
143
144        RSB     EightMinusdx, dx, #8
145        RSB     EightMinusdy, dy, #8
146        CMN     dx,dy
147        MOV     Step1, #1
148        LDREQ   pTable, =armVCM4P10_WidthBranchTableMVIsZero
149        SUB     SrcStepMinus1, iSrcStep, Step1
150        LDRNE   pTable, =armVCM4P10_WidthBranchTableMVIsNotZero
151
152        VLD1    dRow0a, [pSrc], Step1                   ;// 0a
153
154        SMULBB  ACoeff, EightMinusdx, EightMinusdy
155        SMULBB  BCoeff, dx, EightMinusdy
156        VLD1    dRow0b, [pSrc], SrcStepMinus1           ;// 0b
157        SMULBB  CCoeff, EightMinusdx, dy
158        SMULBB  DCoeff, dx, dy
159
160        VDUP    dACoeff, ACoeff
161        VDUP    dBCoeff, BCoeff
162        VDUP    dCCoeff, CCoeff
163        VDUP    dDCoeff, DCoeff
164
165        LDR     pc, [pTable, iWidth, LSL #1]      ;// Branch to the case based on iWidth
166
167;// Pixel layout:
168;//
169;//   x00 x01 x02
170;//   x10 x11 x12
171;//   x20 x21 x22
172
173;// If fractionl mv is not (0, 0)
174WidthIs8MVIsNotZero
175
176                VLD1   dRow1a, [pSrc], Step1            ;// 1a
177                VMULL  qRow0a, dRow0a, dACoeff
178                VLD1   dRow1b, [pSrc], SrcStepMinus1    ;// 1b
179                VMULL  qRow0b, dRow1a, dACoeff
180                VLD1   dRow2a, [pSrc], Step1            ;// 2a
181                VMLAL  qRow0a, dRow0b, dBCoeff
182                VLD1   dRow2b, [pSrc], SrcStepMinus1    ;// 2b
183                VMULL  qRow1a, dRow2a, dACoeff
184                VMLAL  qRow0b, dRow1b, dBCoeff
185                VLD1   dRow3a, [pSrc], Step1            ;// 3a
186                VMLAL  qRow0a, dRow1a, dCCoeff
187                VMLAL  qRow1a, dRow2b, dBCoeff
188                VMULL  qRow1b, dRow3a, dACoeff
189                VLD1   dRow3b, [pSrc], SrcStepMinus1    ;// 3b
190                VMLAL  qRow0b, dRow2a, dCCoeff
191                VLD1   dRow0a, [pSrc], Step1            ;// 0a
192                VMLAL  qRow1b, dRow3b, dBCoeff
193                VMLAL  qRow1a, dRow3a, dCCoeff
194                VMLAL  qRow0a, dRow1b, dDCoeff
195                VLD1   dRow0b, [pSrc], SrcStepMinus1    ;// 0b
196                VMLAL  qRow1b, dRow0a, dCCoeff
197                VMLAL  qRow0b, dRow2b, dDCoeff
198                VMLAL  qRow1a, dRow3b, dDCoeff
199
200
201                SUBS   iHeight, iHeight, #4
202                VMLAL  qRow1b, dRow0b, dDCoeff
203
204                VQRSHRN dOutRow0, qOutRow0, #6
205                VQRSHRN dOutRow1, qOutRow1, #6
206                VQRSHRN dOutRow2, qOutRow2, #6
207                VST1   dOutRow0U64, [pDst], iDstStep
208                VQRSHRN dOutRow3, qOutRow3, #6
209
210                VST1   dOutRow1U64, [pDst], iDstStep
211                VST1   dOutRow2U64, [pDst], iDstStep
212                VST1   dOutRow3U64, [pDst], iDstStep
213
214
215                BGT     WidthIs8MVIsNotZero
216                MOV     return,  #OMX_Sts_NoErr
217                M_EXIT
218
219WidthIs4MVIsNotZero
220
221                VLD1   dRow1a, [pSrc], Step1
222                VMULL  qRow0a, dRow0a, dACoeff
223                VMULL  qRow0b, dRow1a, dACoeff
224                VLD1   dRow1b, [pSrc], SrcStepMinus1
225                VMLAL  qRow0a, dRow0b, dBCoeff
226                VMLAL  qRow0b, dRow1b, dBCoeff
227                VLD1   dRow0a, [pSrc], Step1
228                VMLAL  qRow0a, dRow1a, dCCoeff
229                VMLAL  qRow0b, dRow0a, dCCoeff
230                VLD1   dRow0b, [pSrc], SrcStepMinus1
231                SUBS   iHeight, iHeight, #2
232                VMLAL  qRow0b, dRow0b, dDCoeff
233                VMLAL  qRow0a, dRow1b, dDCoeff
234
235                VQRSHRN dOutRow1, qOutRow1, #6
236                VQRSHRN dOutRow0, qOutRow0, #6
237
238                VST1   dOutRow0U32[0], [pDst], iDstStep
239                VST1   dOutRow1U32[0], [pDst], iDstStep
240
241                BGT     WidthIs4MVIsNotZero
242                MOV     return,  #OMX_Sts_NoErr
243                M_EXIT
244
245WidthIs2MVIsNotZero
246
247                VLD1   dRow1a, [pSrc], Step1
248                VMULL  qRow0a, dRow0a, dACoeff
249                VMULL  qRow0b, dRow1a, dACoeff
250                VLD1   dRow1b, [pSrc], SrcStepMinus1
251                VMLAL  qRow0a, dRow0b, dBCoeff
252                VMLAL  qRow0b, dRow1b, dBCoeff
253                VLD1   dRow0a, [pSrc], Step1
254                VMLAL  qRow0a, dRow1a, dCCoeff
255                VMLAL  qRow0b, dRow0a, dCCoeff
256                VLD1   dRow0b, [pSrc], SrcStepMinus1
257                SUBS   iHeight, iHeight, #2
258                VMLAL  qRow0b, dRow0b, dDCoeff
259                VMLAL  qRow0a, dRow1b, dDCoeff
260
261                VQRSHRN dOutRow1, qOutRow1, #6
262                VQRSHRN dOutRow0, qOutRow0, #6
263
264                VST1   dOutRow0U16[0], [pDst], iDstStep
265                VST1   dOutRow1U16[0], [pDst], iDstStep
266
267                BGT     WidthIs2MVIsNotZero
268                MOV     return,  #OMX_Sts_NoErr
269                M_EXIT
270
271;// If fractionl mv is (0, 0)
272WidthIs8MVIsZero
273                SUB     pSrc, pSrc, iSrcStep
274
275WidthIs8LoopMVIsZero
276                VLD1    dRow0a, [pSrc], iSrcStep
277                SUBS    iHeight, iHeight, #2
278                VLD1    dRow0b, [pSrc], iSrcStep
279                VST1    dOut0U64, [pDst], iDstStep
280                VST1    dOut1U64, [pDst], iDstStep
281                BGT     WidthIs8LoopMVIsZero
282
283                MOV     return,  #OMX_Sts_NoErr
284                M_EXIT
285
286WidthIs4MVIsZero
287                VLD1    dRow0b, [pSrc], iSrcStep
288
289                SUBS    iHeight, iHeight, #2
290
291                VST1    dOut00U32[0], [pDst], iDstStep
292                VLD1    dRow0a, [pSrc], iSrcStep
293                VST1    dOut01U32[0], [pDst], iDstStep
294
295                BGT     WidthIs4MVIsZero
296                MOV     return,  #OMX_Sts_NoErr
297                M_EXIT
298
299WidthIs2MVIsZero
300                VLD1    dRow0b, [pSrc], iSrcStep
301                SUBS    iHeight, iHeight, #2
302
303                VST1    dOut0U16[0], [pDst], iDstStep
304                VLD1    dRow0a, [pSrc], iSrcStep
305                VST1    dOut1U16[0], [pDst], iDstStep
306
307                BGT     WidthIs2MVIsZero
308                MOV     return,  #OMX_Sts_NoErr
309                M_END
310
311        ENDIF ;// CortexA8
312
313        END
314
315;//-----------------------------------------------------------------------------------------------
316;// armVCM4P10_Interpolate_Chroma_asm ends
317;//-----------------------------------------------------------------------------------------------
318
319