1;//
2;// Copyright (C) 2007-2008 ARM Limited
3;//
4;// Licensed under the Apache License, Version 2.0 (the "License");
5;// you may not use this file except in compliance with the License.
6;// You may obtain a copy of the License at
7;//
8;//      http://www.apache.org/licenses/LICENSE-2.0
9;//
10;// Unless required by applicable law or agreed to in writing, software
11;// distributed under the License is distributed on an "AS IS" BASIS,
12;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13;// See the License for the specific language governing permissions and
14;// limitations under the License.
15;//
16;//
17;//
18;// File Name:  armVCM4P10_Interpolate_Chroma_s.s
19;// OpenMAX DL: v1.0.2
20;// Revision:   9641
21;// Date:       Thursday, February 7, 2008
22;//
23;//
24;//
25;//
26
27
28        INCLUDE omxtypes_s.h
29        INCLUDE armCOMM_s.h
30
31        M_VARIANTS ARM1136JS
32
33    IF ARM1136JS
34
35;// input registers
36
37pSrc                 RN 0
38iSrcStep             RN 1
39pDst                 RN 2
40iDstStep             RN 3
41iWidth               RN 4
42iHeight              RN 5
43dx                   RN 6
44dy                   RN 7
45
46
47;// local variable registers
48temp                 RN 11
49r0x20                RN 12
50tmp0x20              RN 14
51return               RN 0
52dxPlusdy             RN 10
53EightMinusdx         RN 8
54EightMinusdy         RN 9
55dxEightMinusdx       RN 8
56BACoeff              RN 6
57DCCoeff              RN 7
58
59iDstStepx2MinusWidth RN 8
60iSrcStepx2MinusWidth RN 9
61iSrcStep1            RN 10
62
63pSrc1                RN 1
64pSrc2                RN 8
65pDst1                RN 8
66pDst2                RN 12
67
68pix00                RN 8
69pix01                RN 9
70pix10                RN 10
71pix11                RN 11
72
73Out0100              RN 8
74Out1110              RN 10
75
76x00                  RN 8
77x01                  RN 10
78x02                  RN 12
79x10                  RN 9
80x11                  RN 11
81x12                  RN 14
82x20                  RN 10
83x21                  RN 12
84x22                  RN 14
85
86x01x00               RN 8
87x02x01               RN 10
88x11x10               RN 9
89x12x11               RN 11
90x21x20               RN 10
91x22x21               RN 12
92
93OutRow00             RN 12
94OutRow01             RN 14
95OutRow10             RN 10
96OutRow11             RN 12
97
98OutRow0100           RN 12
99OutRow1110           RN 12
100
101;//-----------------------------------------------------------------------------------------------
102;// armVCM4P10_Interpolate_Chroma_asm starts
103;//-----------------------------------------------------------------------------------------------
104
105        ;// Write function header
106        M_START armVCM4P10_Interpolate_Chroma, r11
107
108        ;// Define stack arguments
109        M_ARG   Width,      4
110        M_ARG   Height,     4
111        M_ARG   Dx,         4
112        M_ARG   Dy,         4
113
114        ;// Load argument from the stack
115        ;// M_STALL ARM1136JS=4
116
117        M_LDR   iWidth,  Width
118        M_LDR   iHeight, Height
119        M_LDR   dx,      Dx
120        M_LDR   dy,      Dy
121
122        ;// EightMinusdx = 8 - dx
123        ;// EightMinusdy = 8 - dy
124
125        ;// ACoeff = EightMinusdx * EightMinusdy
126        ;// BCoeff = dx * EightMinusdy
127        ;// CCoeff = EightMinusdx * dy
128        ;// DCoeff = dx * dy
129
130        ADD     pSrc1, pSrc, iSrcStep
131        SUB     temp, iWidth, #1
132        RSB     EightMinusdx, dx, #8
133        RSB     EightMinusdy, dy, #8
134        CMN     dx,dy
135        ADD     dxEightMinusdx, EightMinusdx, dx, LSL #16
136        ORR     iWidth, iWidth, temp, LSL #16
137
138        ;// Packed Coeffs.
139
140        MUL     BACoeff, dxEightMinusdx, EightMinusdy
141        MUL     DCCoeff, dxEightMinusdx, dy
142
143
144        ;// Checking either of dx and dy being non-zero
145
146        BEQ     MVIsZero
147
148;// Pixel layout:
149;//
150;//   x00 x01 x02
151;//   x10 x11 x12
152;//   x20 x21 x22
153
154;// If fractionl mv is not (0, 0)
155
156OuterLoopMVIsNotZero
157
158InnerLoopMVIsNotZero
159
160                LDRB    x00, [pSrc,  #+0]
161                LDRB    x10, [pSrc1, #+0]
162                LDRB    x01, [pSrc,  #+1]
163                LDRB    x11, [pSrc1, #+1]
164                LDRB    x02, [pSrc,  #+2]!
165                LDRB    x12, [pSrc1, #+2]!
166
167                ORR     x01x00, x00, x01, LSL #16
168                ;// M_STALL ARM1136JS=1
169                ORR     x02x01, x01, x02, LSL #16
170                MOV     r0x20,  #32
171                ORR     x11x10, x10, x11, LSL #16
172                ORR     x12x11, x11, x12, LSL #16
173
174                SMLAD   x01x00, x01x00, BACoeff, r0x20
175                SMLAD   x02x01, x02x01, BACoeff, r0x20
176
177                ;// iWidth packed with MSB (top 16 bits)
178                ;// as inner loop counter value i.e
179                ;// (iWidth -1) and LSB (lower 16 bits)
180                ;// as original width
181
182                SUBS    iWidth, iWidth, #1<<17
183
184                SMLAD   OutRow00, x11x10, DCCoeff, x01x00
185                SMLAD   OutRow01, x12x11, DCCoeff, x02x01
186
187                RSB     pSrc2, pSrc, pSrc1, LSL #1
188
189                MOV     OutRow00, OutRow00, LSR #6
190                MOV     OutRow01, OutRow01, LSR #6
191
192                LDRB    x20,[pSrc2, #-2]
193
194                ORR     OutRow0100, OutRow00, OutRow01, LSL #8
195                STRH    OutRow0100, [pDst], #2
196
197                LDRB    x21,[pSrc2, #-1]
198                LDRB    x22,[pSrc2, #+0]
199
200                ADD     pDst1, pDst, iDstStep
201
202                ;// M_STALL ARM1136JS=1
203
204                ORR     x21x20, x20, x21, LSL #16
205                ORR     x22x21, x21, x22, LSL #16
206
207                MOV     tmp0x20, #32
208
209                ;// Reusing the packed data x11x10 and x12x11
210
211                SMLAD   x11x10,  x11x10,  BACoeff, tmp0x20
212                SMLAD   x12x11,  x12x11,  BACoeff, tmp0x20
213                SMLAD   OutRow10, x21x20, DCCoeff, x11x10
214                SMLAD   OutRow11, x22x21, DCCoeff, x12x11
215
216                MOV     OutRow10, OutRow10, LSR #6
217                MOV     OutRow11, OutRow11, LSR #6
218
219                ;// M_STALL ARM1136JS=1
220
221                ORR     OutRow1110, OutRow10, OutRow11, LSL #8
222
223                STRH    OutRow1110, [pDst1, #-2]
224
225                BGT     InnerLoopMVIsNotZero
226
227                SUBS    iHeight, iHeight, #2
228                ADD     iWidth, iWidth, #1<<16
229                RSB     iDstStepx2MinusWidth, iWidth, iDstStep, LSL #1
230                SUB     iSrcStep1, pSrc1, pSrc
231                SUB     temp, iWidth, #1
232                RSB     iSrcStepx2MinusWidth, iWidth, iSrcStep1, LSL #1
233                ADD     pDst, pDst, iDstStepx2MinusWidth
234                ADD     pSrc1, pSrc1, iSrcStepx2MinusWidth
235                ADD     pSrc, pSrc, iSrcStepx2MinusWidth
236                ORR     iWidth, iWidth, temp, LSL #16
237                BGT     OuterLoopMVIsNotZero
238                MOV     return,  #OMX_Sts_NoErr
239                M_EXIT
240
241;// If fractionl mv is (0, 0)
242
243MVIsZero
244                ;// M_STALL ARM1136JS=4
245OuterLoopMVIsZero
246
247InnerLoopMVIsZero
248
249                LDRB    pix00, [pSrc],  #+1
250                LDRB    pix01, [pSrc],  #+1
251                LDRB    pix10, [pSrc1], #+1
252                LDRB    pix11, [pSrc1], #+1
253
254                ADD     pDst2,  pDst, iDstStep
255                SUBS    iWidth, iWidth, #1<<17
256
257                ORR     Out0100, pix00, pix01, LSL #8
258                ORR     Out1110, pix10, pix11, LSL #8
259
260                STRH    Out0100, [pDst],  #2
261                STRH    Out1110, [pDst2], #2
262
263                BGT     InnerLoopMVIsZero
264
265                SUBS    iHeight, iHeight, #2
266                ADD     iWidth, iWidth, #1<<16
267                RSB     iDstStepx2MinusWidth, iWidth, iDstStep, LSL #1
268                SUB     iSrcStep1, pSrc1, pSrc
269                SUB     temp, iWidth, #1
270                RSB     iSrcStepx2MinusWidth, iWidth, iSrcStep1, LSL #1
271                ADD     pDst, pDst, iDstStepx2MinusWidth
272                ADD     pSrc1, pSrc1, iSrcStepx2MinusWidth
273                ADD     pSrc, pSrc, iSrcStepx2MinusWidth
274                ORR     iWidth, iWidth, temp, LSL #16
275                BGT     OuterLoopMVIsZero
276                MOV     return,  #OMX_Sts_NoErr
277                M_END
278
279        ENDIF ;// ARM1136JS
280
281
282        END
283
284;//-----------------------------------------------------------------------------------------------
285;// armVCM4P10_Interpolate_Chroma_asm ends
286;//-----------------------------------------------------------------------------------------------
287
288