1;//
2;//
3;// File Name:  armVCM4P10_Interpolate_Chroma_s.s
4;// OpenMAX DL: v1.0.2
5;// Revision:   9641
6;// Date:       Thursday, February 7, 2008
7;//
8;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
9;//
10;//
11;//
12
13
14        INCLUDE omxtypes_s.h
15        INCLUDE armCOMM_s.h
16
17        M_VARIANTS ARM1136JS
18
19    IF ARM1136JS
20
21;// input registers
22
23pSrc                 RN 0
24iSrcStep             RN 1
25pDst                 RN 2
26iDstStep             RN 3
27iWidth               RN 4
28iHeight              RN 5
29dx                   RN 6
30dy                   RN 7
31
32
33;// local variable registers
34temp                 RN 11
35r0x20                RN 12
36tmp0x20              RN 14
37return               RN 0
38dxPlusdy             RN 10
39EightMinusdx         RN 8
40EightMinusdy         RN 9
41dxEightMinusdx       RN 8
42BACoeff              RN 6
43DCCoeff              RN 7
44
45iDstStepx2MinusWidth RN 8
46iSrcStepx2MinusWidth RN 9
47iSrcStep1            RN 10
48
49pSrc1                RN 1
50pSrc2                RN 8
51pDst1                RN 8
52pDst2                RN 12
53
54pix00                RN 8
55pix01                RN 9
56pix10                RN 10
57pix11                RN 11
58
59Out0100              RN 8
60Out1110              RN 10
61
62x00                  RN 8
63x01                  RN 10
64x02                  RN 12
65x10                  RN 9
66x11                  RN 11
67x12                  RN 14
68x20                  RN 10
69x21                  RN 12
70x22                  RN 14
71
72x01x00               RN 8
73x02x01               RN 10
74x11x10               RN 9
75x12x11               RN 11
76x21x20               RN 10
77x22x21               RN 12
78
79OutRow00             RN 12
80OutRow01             RN 14
81OutRow10             RN 10
82OutRow11             RN 12
83
84OutRow0100           RN 12
85OutRow1110           RN 12
86
87;//-----------------------------------------------------------------------------------------------
88;// armVCM4P10_Interpolate_Chroma_asm starts
89;//-----------------------------------------------------------------------------------------------
90
91        ;// Write function header
92        M_START armVCM4P10_Interpolate_Chroma, r11
93
94        ;// Define stack arguments
95        M_ARG   Width,      4
96        M_ARG   Height,     4
97        M_ARG   Dx,         4
98        M_ARG   Dy,         4
99
100        ;// Load argument from the stack
101        ;// M_STALL ARM1136JS=4
102
103        M_LDR   iWidth,  Width
104        M_LDR   iHeight, Height
105        M_LDR   dx,      Dx
106        M_LDR   dy,      Dy
107
108        ;// EightMinusdx = 8 - dx
109        ;// EightMinusdy = 8 - dy
110
111        ;// ACoeff = EightMinusdx * EightMinusdy
112        ;// BCoeff = dx * EightMinusdy
113        ;// CCoeff = EightMinusdx * dy
114        ;// DCoeff = dx * dy
115
116        ADD     pSrc1, pSrc, iSrcStep
117        SUB     temp, iWidth, #1
118        RSB     EightMinusdx, dx, #8
119        RSB     EightMinusdy, dy, #8
120        CMN     dx,dy
121        ADD     dxEightMinusdx, EightMinusdx, dx, LSL #16
122        ORR     iWidth, iWidth, temp, LSL #16
123
124        ;// Packed Coeffs.
125
126        MUL     BACoeff, dxEightMinusdx, EightMinusdy
127        MUL     DCCoeff, dxEightMinusdx, dy
128
129
130        ;// Checking either of dx and dy being non-zero
131
132        BEQ     MVIsZero
133
134;// Pixel layout:
135;//
136;//   x00 x01 x02
137;//   x10 x11 x12
138;//   x20 x21 x22
139
140;// If fractionl mv is not (0, 0)
141
142OuterLoopMVIsNotZero
143
144InnerLoopMVIsNotZero
145
146                LDRB    x00, [pSrc,  #+0]
147                LDRB    x10, [pSrc1, #+0]
148                LDRB    x01, [pSrc,  #+1]
149                LDRB    x11, [pSrc1, #+1]
150                LDRB    x02, [pSrc,  #+2]!
151                LDRB    x12, [pSrc1, #+2]!
152
153                ORR     x01x00, x00, x01, LSL #16
154                ;// M_STALL ARM1136JS=1
155                ORR     x02x01, x01, x02, LSL #16
156                MOV     r0x20,  #32
157                ORR     x11x10, x10, x11, LSL #16
158                ORR     x12x11, x11, x12, LSL #16
159
160                SMLAD   x01x00, x01x00, BACoeff, r0x20
161                SMLAD   x02x01, x02x01, BACoeff, r0x20
162
163                ;// iWidth packed with MSB (top 16 bits)
164                ;// as inner loop counter value i.e
165                ;// (iWidth -1) and LSB (lower 16 bits)
166                ;// as original width
167
168                SUBS    iWidth, iWidth, #1<<17
169
170                SMLAD   OutRow00, x11x10, DCCoeff, x01x00
171                SMLAD   OutRow01, x12x11, DCCoeff, x02x01
172
173                RSB     pSrc2, pSrc, pSrc1, LSL #1
174
175                MOV     OutRow00, OutRow00, LSR #6
176                MOV     OutRow01, OutRow01, LSR #6
177
178                LDRB    x20,[pSrc2, #-2]
179
180                ORR     OutRow0100, OutRow00, OutRow01, LSL #8
181                STRH    OutRow0100, [pDst], #2
182
183                LDRB    x21,[pSrc2, #-1]
184                LDRB    x22,[pSrc2, #+0]
185
186                ADD     pDst1, pDst, iDstStep
187
188                ;// M_STALL ARM1136JS=1
189
190                ORR     x21x20, x20, x21, LSL #16
191                ORR     x22x21, x21, x22, LSL #16
192
193                MOV     tmp0x20, #32
194
195                ;// Reusing the packed data x11x10 and x12x11
196
197                SMLAD   x11x10,  x11x10,  BACoeff, tmp0x20
198                SMLAD   x12x11,  x12x11,  BACoeff, tmp0x20
199                SMLAD   OutRow10, x21x20, DCCoeff, x11x10
200                SMLAD   OutRow11, x22x21, DCCoeff, x12x11
201
202                MOV     OutRow10, OutRow10, LSR #6
203                MOV     OutRow11, OutRow11, LSR #6
204
205                ;// M_STALL ARM1136JS=1
206
207                ORR     OutRow1110, OutRow10, OutRow11, LSL #8
208
209                STRH    OutRow1110, [pDst1, #-2]
210
211                BGT     InnerLoopMVIsNotZero
212
213                SUBS    iHeight, iHeight, #2
214                ADD     iWidth, iWidth, #1<<16
215                RSB     iDstStepx2MinusWidth, iWidth, iDstStep, LSL #1
216                SUB     iSrcStep1, pSrc1, pSrc
217                SUB     temp, iWidth, #1
218                RSB     iSrcStepx2MinusWidth, iWidth, iSrcStep1, LSL #1
219                ADD     pDst, pDst, iDstStepx2MinusWidth
220                ADD     pSrc1, pSrc1, iSrcStepx2MinusWidth
221                ADD     pSrc, pSrc, iSrcStepx2MinusWidth
222                ORR     iWidth, iWidth, temp, LSL #16
223                BGT     OuterLoopMVIsNotZero
224                MOV     return,  #OMX_Sts_NoErr
225                M_EXIT
226
227;// If fractionl mv is (0, 0)
228
229MVIsZero
230                ;// M_STALL ARM1136JS=4
231OuterLoopMVIsZero
232
233InnerLoopMVIsZero
234
235                LDRB    pix00, [pSrc],  #+1
236                LDRB    pix01, [pSrc],  #+1
237                LDRB    pix10, [pSrc1], #+1
238                LDRB    pix11, [pSrc1], #+1
239
240                ADD     pDst2,  pDst, iDstStep
241                SUBS    iWidth, iWidth, #1<<17
242
243                ORR     Out0100, pix00, pix01, LSL #8
244                ORR     Out1110, pix10, pix11, LSL #8
245
246                STRH    Out0100, [pDst],  #2
247                STRH    Out1110, [pDst2], #2
248
249                BGT     InnerLoopMVIsZero
250
251                SUBS    iHeight, iHeight, #2
252                ADD     iWidth, iWidth, #1<<16
253                RSB     iDstStepx2MinusWidth, iWidth, iDstStep, LSL #1
254                SUB     iSrcStep1, pSrc1, pSrc
255                SUB     temp, iWidth, #1
256                RSB     iSrcStepx2MinusWidth, iWidth, iSrcStep1, LSL #1
257                ADD     pDst, pDst, iDstStepx2MinusWidth
258                ADD     pSrc1, pSrc1, iSrcStepx2MinusWidth
259                ADD     pSrc, pSrc, iSrcStepx2MinusWidth
260                ORR     iWidth, iWidth, temp, LSL #16
261                BGT     OuterLoopMVIsZero
262                MOV     return,  #OMX_Sts_NoErr
263                M_END
264
265        ENDIF ;// ARM1136JS
266
267
268        END
269
270;//-----------------------------------------------------------------------------------------------
271;// armVCM4P10_Interpolate_Chroma_asm ends
272;//-----------------------------------------------------------------------------------------------
273
274