omxVCM4P10_PredictIntra_4x4_s.s revision 78e52bfac041d71ce53b5b13c2abf78af742b09d
1;//
2;// Copyright (C) 2007-2008 ARM Limited
3;//
4;// Licensed under the Apache License, Version 2.0 (the "License");
5;// you may not use this file except in compliance with the License.
6;// You may obtain a copy of the License at
7;//
8;//      http://www.apache.org/licenses/LICENSE-2.0
9;//
10;// Unless required by applicable law or agreed to in writing, software
11;// distributed under the License is distributed on an "AS IS" BASIS,
12;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13;// See the License for the specific language governing permissions and
14;// limitations under the License.
15;//
16;//
17;//
18;// File Name:  omxVCM4P10_PredictIntra_4x4_s.s
19;// OpenMAX DL: v1.0.2
20;// Revision:   9641
21;// Date:       Thursday, February 7, 2008
22;//
23;//
24;//
25;//
26
27
28        INCLUDE omxtypes_s.h
29        INCLUDE armCOMM_s.h
30
31;// Define the processor variants supported by this file
32
33         M_VARIANTS ARM1136JS
34
35;//-------------------------------------------------------
36;// This table for implementing switch case of C in asm by
37;// the mehtod of two levels of indexing.
38;//-------------------------------------------------------
39
40    M_TABLE armVCM4P10_pSwitchTable4x4
41    DCD  OMX_VC_4x4_VERT,     OMX_VC_4x4_HOR
42    DCD  OMX_VC_4x4_DC,       OMX_VC_4x4_DIAG_DL
43    DCD  OMX_VC_4x4_DIAG_DR,  OMX_VC_4x4_VR
44    DCD  OMX_VC_4x4_HD,       OMX_VC_4x4_VL
45    DCD  OMX_VC_4x4_HU
46
47    IF ARM1136JS
48
49;//--------------------------------------------
50;// Constants
51;//--------------------------------------------
52BLK_SIZE              EQU 0x8
53MUL_CONST0            EQU 0x01010101
54ADD_CONST1            EQU 0x80808080
55
56;//--------------------------------------------
57;// Scratch variable
58;//--------------------------------------------
59return          RN 0
60pTable          RN 9
61pc              RN 15
62r0x01010101     RN 1
63r0x80808080     RN 0
64
65tVal0           RN 0
66tVal1           RN 1
67tVal2           RN 2
68tVal4           RN 4
69tVal6           RN 6
70tVal7           RN 7
71tVal8           RN 8
72tVal9           RN 9
73tVal10          RN 10
74tVal11          RN 11
75tVal12          RN 12
76tVal14          RN 14
77
78Out0            RN 6
79Out1            RN 7
80Out2            RN 8
81Out3            RN 9
82
83Left0           RN 6
84Left1           RN 7
85Left2           RN 8
86Left3           RN 9
87
88Above0123       RN 12
89Above4567       RN 14
90
91AboveLeft       RN 10
92
93;//--------------------------------------------
94;// Declare input registers
95;//--------------------------------------------
96pSrcLeft        RN 0    ;// input pointer
97pSrcAbove       RN 1    ;// input pointer
98pSrcAboveLeft   RN 2    ;// input pointer
99pDst            RN 3    ;// output pointer
100leftStep        RN 4    ;// input variable
101dstStep         RN 5    ;// input variable
102predMode        RN 6    ;// input variable
103availability    RN 7    ;// input variable
104
105;//-----------------------------------------------------------------------------------------------
106;// omxVCM4P10_PredictIntra_4x4 starts
107;//-----------------------------------------------------------------------------------------------
108
109        ;// Write function header
110        M_START omxVCM4P10_PredictIntra_4x4, r11
111
112        ;// Define stack arguments
113        M_ARG    LeftStep,     4
114        M_ARG    DstStep,      4
115        M_ARG    PredMode,     4
116        M_ARG    Availability, 4
117
118        ;// M_STALL ARM1136JS=4
119
120        LDR      pTable,=armVCM4P10_pSwitchTable4x4  ;// Load index table for switch case
121
122        ;// Load argument from the stack
123        M_LDR    predMode, PredMode                  ;// Arg predMode loaded from stack to reg
124        M_LDR    leftStep, LeftStep                  ;// Arg leftStep loaded from stack to reg
125        M_LDR    dstStep,  DstStep                   ;// Arg dstStep loaded from stack to reg
126        M_LDR    availability, Availability          ;// Arg availability loaded from stack to reg
127
128        LDR      pc, [pTable, predMode, LSL #2]      ;// Branch to the case based on preMode
129
130OMX_VC_4x4_VERT
131
132        LDR      Above0123,  [pSrcAbove]             ;// Above0123 = pSrcAbove[0 to 3]
133        M_STR    Above0123,  [pDst],  dstStep        ;// pDst[0  to 3]  = Above0123
134        M_STR    Above0123,  [pDst],  dstStep        ;// pDst[4  to 7]  = Above0123
135        M_STR    Above0123,  [pDst],  dstStep        ;// pDst[8  to 11] = Above0123
136        STR      Above0123,  [pDst]                  ;// pDst[12 to 15] = Above0123
137        MOV      return, #OMX_Sts_NoErr
138        M_EXIT                                      ;// Macro to exit midway-break frm case
139
140OMX_VC_4x4_HOR
141
142        ;// M_STALL ARM1136JS=6
143
144        LDR      r0x01010101,  =MUL_CONST0           ;// Const to repeat the byte in reg 4 times
145        M_LDRB   Left0,  [pSrcLeft],  leftStep       ;// Left0 = pSrcLeft[0]
146        M_LDRB   Left1,  [pSrcLeft],  leftStep       ;// Left1 = pSrcLeft[1]
147        M_LDRB   Left2,  [pSrcLeft],  leftStep       ;// Left2 = pSrcLeft[2]
148        LDRB     Left3,  [pSrcLeft]                  ;// Left3 = pSrcLeft[3]
149        MUL      Out0,   Left0,   r0x01010101        ;// replicate the val in all the bytes
150        MUL      Out1,   Left1,   r0x01010101        ;// replicate the val in all the bytes
151        MUL      Out2,   Left2,   r0x01010101        ;// replicate the val in all the bytes
152        MUL      Out3,   Left3,   r0x01010101        ;// replicate the val in all the bytes
153        M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [0  to 3 ]
154        M_STR    Out1,   [pDst],  dstStep            ;// store {Out1} at pDst [4  to 7 ]
155        M_STR    Out2,   [pDst],  dstStep            ;// store {Out2} at pDst [8  to 11]
156        STR      Out3,   [pDst]                      ;// store {Out3} at pDst [12 to 15]
157        MOV      return, #OMX_Sts_NoErr
158        M_EXIT                                       ;// Macro to exit midway-break frm case
159
160OMX_VC_4x4_DC
161
162        ;// M_STALL ARM1136JS=6
163
164        AND      availability,  availability,  #(OMX_VC_UPPER + OMX_VC_LEFT)
165        CMP      availability,  #(OMX_VC_UPPER + OMX_VC_LEFT)
166        BNE      UpperOrLeftOrNoneAvailable          ;// Jump to Upper if not both
167        LDR      Above0123,  [pSrcAbove]             ;// Above0123  = pSrcAbove[0 to 3]
168
169        ;// M_STALL ARM1136JS=1
170
171        UXTB16   tVal7,  Above0123                   ;// pSrcAbove[0, 2]
172        UXTB16   tVal6,  Above0123,  ROR #8          ;// pSrcAbove[1, 3]
173        UADD16   tVal11, tVal6,   tVal7              ;// pSrcAbove[0, 2] + pSrcAbove[1, 3]
174        M_LDRB   Left0,  [pSrcLeft],  leftStep       ;// Left0 = pSrcLeft[0]
175        M_LDRB   Left1,  [pSrcLeft],  leftStep       ;// Left1 = pSrcLeft[1]
176        ADD      tVal11, tVal11,  LSR #16            ;// sum(pSrcAbove[0] to pSrcAbove[3])
177        M_LDRB   Left2,  [pSrcLeft],  leftStep       ;// Left2 = pSrcLeft[2]
178        LDRB     Left3,  [pSrcLeft]                  ;// Left3 = pSrcLeft[3]
179        UXTH     tVal11, tVal11                      ;// upsum1 (Clear the top junk bits)
180        ADD      tVal6,  Left0,  Left1               ;// tVal6 = Left0 + Left1
181        ADD      tVal7,  Left2,  Left3               ;// tVal7 = Left2 + Left3
182        ADD      tVal6,  tVal6,  tVal7               ;// tVal6 = tVal6 + tVal7
183        ADD      Out0,   tVal6,  tVal11              ;// Out0  = tVal6 + tVal11
184        ADD      Out0,   Out0,   #4                  ;// Out0  = Out0 + 4
185        LDR      r0x01010101,   =MUL_CONST0          ;// 0x01010101
186        MOV      Out0,   Out0,  LSR #3               ;// Out0 = (Out0 + 4)>>3
187
188        ;// M_STALL ARM1136JS=1
189
190        MUL      Out0,   Out0,  r0x01010101          ;// replicate the val in all the bytes
191
192        ;// M_STALL ARM1136JS=1
193
194        MOV      return,  #OMX_Sts_NoErr
195        M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [0  to 3 ]
196        M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [4  to 7 ]
197        M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [8  to 11]
198        STR      Out0,   [pDst]                      ;// store {Out0} at pDst [12 to 15]
199        M_EXIT                                       ;// Macro to exit midway-break frm case
200
201UpperOrLeftOrNoneAvailable
202        ;// M_STALL ARM1136JS=3
203
204        CMP      availability,  #OMX_VC_UPPER        ;// if(availability & #OMX_VC_UPPER)
205        BNE      LeftOrNoneAvailable                 ;// Jump to Left if not upper
206        LDR      Above0123,  [pSrcAbove]             ;// Above0123  = pSrcAbove[0 to 3]
207
208        ;// M_STALL ARM1136JS=3
209
210        UXTB16   tVal7,  Above0123                   ;// pSrcAbove[0, 2]
211        UXTB16   tVal6,  Above0123,  ROR #8          ;// pSrcAbove[1, 3]
212        UADD16   Out0,   tVal6,  tVal7               ;// pSrcAbove[0, 2] + pSrcAbove[1, 3]
213        LDR      r0x01010101,   =MUL_CONST0          ;// 0x01010101
214        ADD      Out0,   Out0,   LSR #16             ;// sum(pSrcAbove[0] to pSrcAbove[3])
215
216        ;// M_STALL ARM1136JS=1
217
218        UXTH     Out0,   Out0                        ;// upsum1 (Clear the top junk bits)
219        ADD      Out0,   Out0,   #2                  ;// Out0  = Out0 + 2
220
221        ;// M_STALL ARM1136JS=1
222
223        MOV      Out0,   Out0,   LSR #2              ;// Out0  = (Out0 + 2)>>2
224
225        ;// M_STALL ARM1136JS=1
226
227        MUL      Out0,   Out0,   r0x01010101         ;// replicate the val in all the bytes
228
229        ;// M_STALL ARM1136JS=1
230
231        MOV      return, #OMX_Sts_NoErr
232        M_STR    Out0,   [pDst],  dstStep            ;// store {tVal6} at pDst [0  to 3 ]
233        M_STR    Out0,   [pDst],  dstStep            ;// store {tVal6} at pDst [4  to 7 ]
234        M_STR    Out0,   [pDst],  dstStep            ;// store {tVal6} at pDst [8  to 11]
235        STR      Out0,   [pDst]                      ;// store {tVal6} at pDst [12 to 15]
236
237        M_EXIT                                       ;// Macro to exit midway-break frm case
238
239LeftOrNoneAvailable
240        ;// M_STALL ARM1136JS=3
241
242        LDR      r0x01010101,   =MUL_CONST0          ;// 0x01010101
243        CMP      availability, #OMX_VC_LEFT
244        BNE      NoneAvailable
245        M_LDRB   Left0,  [pSrcLeft],  leftStep       ;// Left0 = pSrcLeft[0]
246        M_LDRB   Left1,  [pSrcLeft],  leftStep       ;// Left1 = pSrcLeft[1]
247        M_LDRB   Left2,  [pSrcLeft],  leftStep       ;// Left2 = pSrcLeft[2]
248        LDRB     Left3,  [pSrcLeft]                  ;// Left3 = pSrcLeft[3]
249        ADD      Out0,   Left0,  Left1               ;// Out0  = Left0 + Left1
250
251        ;// M_STALL ARM1136JS=1
252
253        ADD      Out1,   Left2,  Left3               ;// Out1  = Left2 + Left3
254        ADD      Out0,   Out0,   Out1                ;// Out0  = Out0  + Out1
255        ADD      Out0,   Out0,   #2                  ;// Out0  = Out0 + 2
256
257        ;// M_STALL ARM1136JS=1
258
259        MOV      Out0,   Out0,   LSR #2              ;// Out0  = (Out0 + 2)>>2
260
261        ;// M_STALL ARM1136JS=1
262
263        MUL      Out0,   Out0,   r0x01010101         ;// replicate the val in all the bytes
264
265        ;// M_STALL ARM1136JS=1
266
267        MOV      return, #OMX_Sts_NoErr
268        M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [0  to 3 ]
269        M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [4  to 7 ]
270        M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [8  to 11]
271        STR      Out0,   [pDst]                      ;// store {Out0} at pDst [12 to 15]
272        M_EXIT                                       ;// Macro to exit midway-break frm case
273
274NoneAvailable
275        MOV      Out0,   #128                        ;// Out0 = 128 if(count == 0)
276
277        ;// M_STALL ARM1136JS=5
278
279        MUL      Out0,   Out0,  r0x01010101          ;// replicate the val in all the bytes
280
281        ;// M_STALL ARM1136JS=1
282
283        MOV      return, #OMX_Sts_NoErr
284        M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [0  to 3 ]
285        M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [4  to 7 ]
286        M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [8  to 11]
287        STR      Out0,   [pDst]                      ;// store {Out0} at pDst [12 to 15]
288        M_EXIT                                       ;// Macro to exit midway-break frm case
289
290OMX_VC_4x4_DIAG_DL
291
292        ;//------------------------------------------------------------------
293        ;// f = (a+2*b+c+2)>>2
294        ;// Calculate as:
295        ;// d = (a + c )>>1
296        ;// e = (d - b')>>1
297        ;// f = e + 128
298        ;//------------------------------------------------------------------
299
300        ;// M_STALL ARM1136JS=3
301
302        TST      availability, #OMX_VC_UPPER_RIGHT
303        LDMIA    pSrcAbove,  {Above0123, Above4567}  ;// Above0123, Above4567 = pSrcAbove[0 to 7]
304        LDR      r0x80808080,  =ADD_CONST1           ;// 0x80808080
305        BNE      DLUpperRightAvailable
306        LDR      r0x01010101,  =MUL_CONST0           ;// 0x01010101
307        MOV      tVal7,  Above0123,  LSR #24         ;// {00,  00,  00,  U3 }
308        MOV      tVal11, tVal7,  LSL #24             ;// {U3,  00,  00,  00 }
309        MUL      Out3,   tVal7,  r0x01010101         ;// {U3,  U3,  U3,  U3 }
310        MOV      tVal8,  Above0123,  LSR #16         ;// {00,  00,  U3,  U2 }
311        MOV      tVal10, Above0123,  LSR #8          ;// {00,  U3,  U2,  U1 }
312        MVN      tVal10, tVal10                      ;// {00', U3', U2', U1'}
313        UHADD8   tVal8,  tVal8,  Above0123           ;// {xx,  xx,  d1,  d0 }
314        UHADD8   tVal6,  Above0123,  tVal9           ;// {xx,  d2,  xx,  xx }
315        UHSUB8   tVal8,  tVal8,  tVal10              ;// {xx,  xx,  e1,  e0 }
316        UHSUB8   tVal6,  tVal6,  tVal10              ;// {xx,  e2,  xx,  xx }
317        UADD8    tVal8,  tVal8,  r0x80808080         ;// {xx,  xx,  f1,  f0 }
318        UADD8    tVal6,  tVal6,  r0x80808080         ;// {xx,  f2,  xx,  xx }
319
320        ;// M_STALL ARM1136JS=1
321
322        PKHBT    tVal6,  tVal8,  tVal6               ;// {xx,  f2,  f1,  f0 }
323        BIC      tVal6,  tVal6,  #0xFF000000         ;// {00,  f2,  f1,  f0 }
324        ORR      Out0,   tVal6,  tVal11              ;// {U3,  f2,  f1,  f0 }
325
326        ;// M_STALL ARM1136JS=1
327
328        PKHTB    Out1,   Out3,   Out0,  ASR #8       ;// {U3,  U3,  f2,  f1 }
329        MOV      return, #OMX_Sts_NoErr
330        PKHTB    Out2,   Out3,   Out1,  ASR #8       ;// {U3,  U3,  U3,  f2 }
331
332        M_STR    Out0,   [pDst], dstStep             ;// store {f3 to f0} at pDst[3  to 0 ]
333        M_STR    Out1,   [pDst], dstStep             ;// store {f4 to f1} at pDst[7  to 4 ]
334        M_STR    Out2,   [pDst], dstStep             ;// store {f5 to f2} at pDst[11 to 8 ]
335        STR      Out3,   [pDst]                      ;// store {f6 to f3} at pDSt[15 to 12]
336        M_EXIT                                       ;// Macro to exit midway-break frm case
337
338DLUpperRightAvailable
339
340        MOV      tVal8,  Above0123,  LSR #24         ;// {00,  00,  00,  U3 }
341        MOV      tVal9,  Above0123,  LSR #16         ;// {00,  00,  U3,  U2 }
342        MOV      tVal10, Above0123,  LSR #8          ;// {00,  U3,  U2,  U1 }
343        ORR      tVal8,  tVal8,  Above4567, LSL #8   ;// {U6,  U5,  U4,  U3 }
344        ORR      tVal10, tVal10, Above4567, LSL #24  ;// {U4,  U3,  U2,  U1 }
345        PKHBT    tVal9,  tVal9,  Above4567, LSL #16  ;// {U5,  U4,  U3,  U2 }
346        MVN      tVal1,  tVal8                       ;// {U6', U5', U4', U3'}
347        MVN      tVal10, tVal10                      ;// {U4', U3', U2', U1'}
348        MVN      tVal2,  Above4567                   ;// {U7', U6', U5', U4'}
349        UHADD8   tVal6,  Above0123,  tVal9           ;// {d3,  d2,  d1,  d0 }
350        UHADD8   tVal9,  tVal9,  Above4567           ;// {d5,  d4,  d3,  d2 }
351        UHADD8   tVal8,  Above4567,  tVal8           ;// {d6,  xx,  xx,  xx }
352        UHSUB8   tVal6,  tVal6,  tVal10              ;// {e3,  e2,  e1,  e0 }
353        UHSUB8   tVal12, tVal9,  tVal1               ;// {e5,  e4,  e3,  e2 }
354        UHSUB8   tVal8,  tVal8,  tVal2               ;// {e6,  xx,  xx,  xx }
355        UADD8    Out0,   tVal6,  r0x80808080         ;// {f3,  f2,  f1,  f0 }
356        UADD8    tVal9,  tVal8,  r0x80808080         ;// {f6,  xx,  xx,  xx }
357        UADD8    Out2,   tVal12, r0x80808080         ;// {f5,  f4,  f3,  f2 }
358        MOV      tVal7,  Out0,   LSR #8              ;// {00,  f3,  f2,  f1 }
359        AND      tVal9,  tVal9,  #0xFF000000         ;// {f6,  00,  00,  00 }
360        PKHBT    Out1,   tVal7,  Out2,  LSL #8       ;// {f4,  f3,  f2,  f1 }
361        ORR      Out3,   tVal9,  Out2,  LSR #8       ;// {f6,  f5,  f4,  f3 }
362        M_STR    Out0,   [pDst], dstStep             ;// store {f3 to f0} at pDst[3  to 0 ]
363        M_STR    Out1,   [pDst], dstStep             ;// store {f4 to f1} at pDst[7  to 4 ]
364        M_STR    Out2,   [pDst], dstStep             ;// store {f5 to f2} at pDst[11 to 8 ]
365        STR      Out3,   [pDst]                      ;// store {f6 to f3} at pDSt[15 to 12]
366        MOV      return, #OMX_Sts_NoErr
367        M_EXIT                                       ;// Macro to exit midway-break frm case
368
369
370OMX_VC_4x4_DIAG_DR
371
372        ;// M_STALL ARM1136JS=4
373
374        M_LDRB   Left0,  [pSrcLeft],  leftStep       ;// Left0 = pSrcLeft[0]
375        M_LDRB   Left1,  [pSrcLeft],  leftStep       ;// Left1 = pSrcLeft[1]
376        M_LDRB   Left2,  [pSrcLeft],  leftStep       ;// Left2 = pSrcLeft[2]
377        LDRB     Left3,  [pSrcLeft]                  ;// Left3 = pSrcLeft[3]
378        LDRB     AboveLeft, [pSrcAboveLeft]          ;// AboveLeft = pSrcAboveLeft[0]
379        ORR      tVal7,  Left1,  Left0,  LSL #8      ;// tVal7 = 00 00 L0 L1
380        LDR      Above0123,  [pSrcAbove]             ;// Above0123 = U3 U2 U1 U0
381        LDR      r0x80808080, =ADD_CONST1            ;// 0x80808080
382        ORR      tVal8,  Left3,  Left2,  LSL #8      ;// tVal8 = 00 00 L2 L3
383        PKHBT    tVal7,  tVal8,  tVal7,  LSL #16     ;// tVal7 = L0 L1 L2 L3
384        MOV      tVal8,  Above0123,  LSL #8          ;// tVal8 = U2 U1 U0 00
385        MOV      tVal9,  tVal7,  LSR #8              ;// tVal9 = 00 L0 L1 L2
386        ORR      tVal8,  tVal8,  AboveLeft           ;// tVal8 = U2 U1 U0 UL
387        ORR      tVal9,  tVal9,  AboveLeft, LSL #24  ;// tVal9 = UL L0 L1 L2
388        MOV      tVal10, Above0123,  LSL #24         ;// tVal10= U0 00 00 00
389        UXTB     tVal11, tVal7,  ROR #24             ;// tVal11= 00 00 00 L0
390        ORR      tVal10, tVal10, tVal9,  LSR #8      ;// tVal10= U0 UL L0 L1
391        ORR      tVal11, tVal11, tVal8,  LSL #8      ;// tVal11= U1 U0 UL L0
392        UHADD8   tVal11, Above0123,  tVal11          ;// tVal11= d1 d0 dL g0
393        UHADD8   tVal10, tVal7,  tVal10              ;// tVal10= g0 g1 g2 g3
394        MVN      tVal8,  tVal8                       ;// tVal8 = U2'U1'U0'UL'
395        MVN      tVal9,  tVal9                       ;// tVal9 = UL'L0'L1'L2'
396        UHSUB8   tVal11, tVal11, tVal8               ;// tVal11= e1 e0 eL h0
397        UHSUB8   tVal10, tVal10, tVal9               ;// tVal10= h0 h1 h2 h3
398        UADD8    Out3,   tVal10, r0x80808080         ;// Out3  = i0 i1 i2 i3
399        UADD8    Out0,   tVal11, r0x80808080         ;// Out0  = f1 f0 fL i0
400        UXTH     tVal11, Out3,   ROR #8              ;// tVal11= 00 00 i1 i2
401        MOV      tVal7,  Out0,   LSL #8              ;// tVal7 = f0 fL i0 00
402        ORR      Out1,   tVal7,  tVal11,  LSR #8     ;// Out1  = f0 fL i0 i1
403        PKHBT    Out2,   tVal11, Out0,    LSL #16    ;// Out2  = fL i0 i1 i2
404        M_STR    Out0,   [pDst], dstStep             ;// store {f1 to i0} at pDst[3  to 0 ]
405        M_STR    Out1,   [pDst], dstStep             ;// store {f0 to i1} at pDst[7  to 4 ]
406        M_STR    Out2,   [pDst], dstStep             ;// store {fL to i2} at pDst[11 to 8 ]
407        STR      Out3,   [pDst]                      ;// store {i0 to i3} at pDst[15 to 12]
408        MOV      return,  #OMX_Sts_NoErr
409        M_EXIT                                       ;// Macro to exit midway-break frm case
410
411OMX_VC_4x4_VR
412
413        ;// M_STALL ARM1136JS=4
414
415        LDR      Above0123,  [pSrcAbove]             ;// Above0123 = U3 U2 U1 U0
416        LDRB     AboveLeft,  [pSrcAboveLeft]         ;// AboveLeft = 00 00 00 UL
417        M_LDRB   Left0,  [pSrcLeft],  leftStep       ;// Left0     = 00 00 00 L0
418        M_LDRB   Left1,  [pSrcLeft],  leftStep       ;// Left1     = 00 00 00 L1
419        LDRB     Left2,  [pSrcLeft]                  ;// Left2     = 00 00 00 L2
420        MOV      tVal0,  Above0123,  LSL #8          ;// tVal0     = U2 U1 U0 00
421        MOV      tVal9,  Above0123                   ;// tVal9     = U3 U2 U1 U0
422        ORR      tVal14, tVal0,   AboveLeft          ;// tVal14    = U2 U1 U0 UL
423        MVN      tVal11, tVal14                      ;// tVal11    = U2'U1'U0'UL'
424        MOV      tVal2,  tVal14,  LSL #8             ;// tVal2     = U1 U0 UL 00
425        UHSUB8   tVal1,  Above0123,  tVal11          ;// tVal1     = d2 d1 d0 dL
426        UHADD8   tVal10, AboveLeft, Left1            ;// tVal10    = 00 00 00 j1
427        MVN      tVal4,  Left0                       ;// tVal4     = 00 00 00 L0'
428        UHSUB8   tVal4,  tVal10,  tVal4              ;// tVal4     = 00 00 00 k1
429        ORR      tVal12, tVal0,   Left0              ;// tVal12    = U2 U1 U0 L0
430        ORR      tVal14, tVal2,   Left0              ;// tVal14    = U1 U0 UL L0
431        LDR      r0x80808080,  =ADD_CONST1           ;// 0x80808080
432        UHADD8   tVal10, tVal9,   tVal14             ;// tVal10    = g3 g2 g1 g0
433        UADD8    Out0,   tVal1,   r0x80808080        ;// Out0      = e2 e1 e0 eL
434        UHSUB8   tVal10, tVal10,  tVal11             ;// tVal10    = h3 h2 h1 h0
435        M_STR    Out0,   [pDst],  dstStep            ;// store {e2 to eL} at pDst[3  to 0 ]
436        MOV      tVal1,  tVal14,  LSL #8             ;// tVal1     = U0 UL L0 00
437        MOV      tVal6,  Out0,    LSL #8             ;// tVal6     = e1 e0 eL 00
438        ORR      tVal2,  tVal2,   Left1              ;// tVal2     = U1 U0 UL L1
439        UADD8    tVal4,  tVal4,   r0x80808080        ;// tVal4     = 00 00 00 l1
440        UADD8    Out1,   tVal10,  r0x80808080        ;// Out1      = i3 i2 i1 i0
441        MVN      tVal2,  tVal2                       ;// tVal14    = U1'U0'UL'L1'
442        ORR      tVal1,  tVal1,   Left2              ;// tVal1     = U0 UL L0 L2
443        ORR      Out2,   tVal6,   tVal4              ;// Out2      = e1 e0 eL l1
444        UHADD8   tVal1,  tVal1,   tVal12             ;// tVal1     = g2 g1 g0 j2
445        M_STR    Out1,   [pDst],  dstStep            ;// store {i3 to i0} at pDst[7  to 4 ]
446        M_STR    Out2,   [pDst],  dstStep            ;// store {e1 to l1} at pDst[11 to 8 ]
447        UHSUB8   tVal9,  tVal1,   tVal2              ;// tVal9     = h2 h1 h0 k2
448        UADD8    Out3,   tVal9,   r0x80808080        ;// Out3      = i2 i1 i0 l2
449        STR      Out3,   [pDst]                      ;// store {i2 to l2} at pDst[15 to 12]
450        MOV      return,  #OMX_Sts_NoErr
451        M_EXIT                                       ;// Macro to exit midway-break frm case
452
453OMX_VC_4x4_HD
454
455        ;// M_STALL ARM1136JS=4
456
457        LDR      Above0123,  [pSrcAbove]             ;// Above0123 = U3 U2 U1 U0
458        LDRB     AboveLeft,  [pSrcAboveLeft]         ;// AboveLeft = 00 00 00 UL
459        M_LDRB   Left0,  [pSrcLeft],  leftStep       ;// Left0 = 00 00 00 L0
460        M_LDRB   Left1,  [pSrcLeft],  leftStep       ;// Left1 = 00 00 00 L1
461        M_LDRB   Left2,  [pSrcLeft],  leftStep       ;// Left2 = 00 00 00 L2
462        LDRB     Left3,  [pSrcLeft]                  ;// Left3 = 00 00 00 L3
463        LDR      r0x80808080,  =ADD_CONST1           ;// 0x80808080
464        ORR      tVal2,  AboveLeft, Above0123, LSL #8;// tVal2 = U2 U1 U0 UL
465        MVN      tVal1,  Left0                       ;// tVal1 = 00 00 00 L0'
466        ORR      tVal4,  Left0,  tVal2,  LSL #8      ;// tVal4 = U1 U0 UL L0
467        MVN      tVal2,  tVal2                       ;// tVal2 = U2'U1'U0'UL'
468        UHADD8   tVal4,  tVal4,  Above0123           ;// tVal4 = g3 g2 g1 g0
469        UHSUB8   tVal1,  AboveLeft,  tVal1           ;// tVal1 = 00 00 00 dL
470        UHSUB8   tVal4,  tVal4,  tVal2               ;// tVal4 = h3 h2 h1 h0
471        UADD8    tVal1,  tVal1,  r0x80808080         ;// tVal1 = 00 00 00 eL
472        UADD8    tVal4,  tVal4,  r0x80808080         ;// tVal4 = i3 i2 i1 i0
473        ORR      tVal2,  Left0,  AboveLeft,  LSL #16 ;// tVal2 = 00 UL 00 L0
474        MOV      tVal4,  tVal4,  LSL #8              ;// tVal4 = i2 i1 i0 00
475        ORR      tVal11, Left1,  Left0,  LSL #16     ;// tVal11= 00 L0 00 L1
476        ORR      tVal7,  Left2,  Left1,  LSL #16     ;// tVal7 = 00 L1 00 L2
477        ORR      tVal10, Left3,  Left2,  LSL #16     ;// tVal10= 00 L2 00 L3
478        ORR      Out0,   tVal4,  tVal1               ;// Out0  = i2 i1 i0 eL
479        M_STR    Out0,   [pDst], dstStep             ;// store {Out0}  at pDst [0  to 3 ]
480        MOV      tVal4,  Out0,   LSL #16             ;// tVal4 = i1 i0 00 00
481        UHADD8   tVal2,  tVal2,  tVal7               ;// tVal2 = 00 j1 00 j2
482        UHADD8   tVal6,  tVal11, tVal10              ;// tVal11= 00 j2 00 j3
483        MVN      tVal12, tVal11                      ;// tVal12= 00 L0'00 L1'
484        MVN      tVal14, tVal7                       ;// tVal14= 00 L1'00 L2'
485        UHSUB8   tVal2,  tVal2,  tVal12              ;// tVal2 = 00 k1 00 k2
486        UHSUB8   tVal8,  tVal7,  tVal12              ;// tVal8 = 00 d1 00 d2
487        UHSUB8   tVal11, tVal6,  tVal14              ;// tVal11= 00 k2 00 k3
488        UHSUB8   tVal9,  tVal10, tVal14              ;// tVal9 = 00 d2 00 d3
489        UADD8    tVal2,  tVal2,  r0x80808080         ;// tVal2 = 00 l1 00 l2
490        UADD8    tVal8,  tVal8,  r0x80808080         ;// tVal8 = 00 e1 00 e2
491        UADD8    tVal11, tVal11, r0x80808080         ;// tVal11= 00 l2 00 l3
492        UADD8    tVal9,  tVal9,  r0x80808080         ;// tVal9 = 00 e2 00 e3
493        ORR      Out2,   tVal8,  tVal2,  LSL #8      ;// Out2  = l1 e1 l2 e2
494        ORR      Out3,   tVal9,  tVal11, LSL #8      ;// Out3  = l2 e2 l3 e3
495        PKHTB    Out1,   tVal4,  Out2,   ASR #16     ;// Out1  = i1 i0 l1 e1
496        M_STR    Out1,   [pDst], dstStep             ;// store {Out1}  at pDst [4  to 7 ]
497        M_STR    Out2,   [pDst], dstStep             ;// store {Out2}  at pDst [8  to 11]
498        STR      Out3,   [pDst]                      ;// store {Out3}  at pDst [12 to 15]
499        MOV      return,  #OMX_Sts_NoErr
500        M_EXIT                                       ;// Macro to exit midway-break frm case
501
502OMX_VC_4x4_VL
503
504        ;// M_STALL ARM1136JS=3
505
506        LDMIA    pSrcAbove, {Above0123, Above4567}   ;// Above0123, Above4567 = pSrcAbove[0 to 7]
507        TST      availability, #OMX_VC_UPPER_RIGHT
508        LDR      r0x80808080,  =ADD_CONST1           ;// 0x80808080
509        LDR      r0x01010101,  =MUL_CONST0           ;// 0x01010101
510        MOV      tVal11, Above0123,  LSR #24         ;// tVal11= 00 00 00 U3
511        MULEQ    Above4567, tVal11, r0x01010101      ;// Above4567 = U3 U3 U3 U3
512        MOV      tVal9,  Above0123,  LSR #8          ;// tVal9 = 00 U3 U2 U1
513        MVN      tVal10, Above0123                   ;// tVal10= U3'U2'U1'U0'
514        ORR      tVal2,  tVal9,  Above4567,  LSL #24 ;// tVal2 = U4 U3 U2 U1
515        UHSUB8   tVal8,  tVal2,  tVal10              ;// tVal8 = d4 d3 d2 d1
516        UADD8    Out0,   tVal8,  r0x80808080         ;// Out0 = e4 e3 e2 e1
517        M_STR    Out0,   [pDst], dstStep             ;// store {Out0}  at pDst [0  to 3 ]
518        MOV      tVal9,  tVal9,  LSR #8              ;// tVal9 = 00 00 U3 U2
519        MOV      tVal10, Above4567,  LSL #8          ;// tVal10= U6 U5 U4 00
520        PKHBT    tVal9,  tVal9,  Above4567, LSL #16  ;// tVal9 = U5 U4 U3 U2
521        ORR      tVal10, tVal10, tVal11              ;// tVal10= U6 U5 U4 U3
522        UHADD8   tVal11, tVal9,  Above0123           ;// tVal11= g5 g4 g3 g2
523        UHADD8   tVal14, tVal2,  tVal10              ;// tVal14= g6 g5 g4 g3
524        MVN      tVal8,  tVal2                       ;// tVal8 = U4'U3'U2'U1'
525        MVN      tVal7,  tVal9                       ;// tVal7 = U5'U4'U3'U2'
526        UHSUB8   tVal12, tVal9,  tVal8               ;// tVal12= d5 d4 d3 d2
527        UHSUB8   tVal11, tVal11, tVal8               ;// tVal11= h5 h4 h3 h2
528        UHSUB8   tVal2,  tVal14, tVal7               ;// tVal2 = h6 h5 h4 h3
529        UADD8    Out1,   tVal11, r0x80808080         ;// Out1  = i5 i4 i3 i2
530        UADD8    Out2,   tVal12, r0x80808080         ;// Out2  = e5 e4 e3 e2
531        UADD8    Out3,   tVal2,  r0x80808080         ;// Out3  = i6 i5 i4 i3
532        M_STR    Out1,   [pDst], dstStep             ;// store {Out1} at pDst [4  to 7 ]
533        M_STR    Out2,   [pDst], dstStep             ;// store {Out2} at pDst [8  to 11]
534        M_STR    Out3,   [pDst], dstStep             ;// store {Out3} at pDst [12 to 15]
535        MOV      return, #OMX_Sts_NoErr
536        M_EXIT                                       ;// Macro to exit midway-break frm case
537
538OMX_VC_4x4_HU
539
540        ;// M_STALL ARM1136JS=2
541
542        LDR      r0x01010101,  =MUL_CONST0           ;// 0x01010101
543        M_LDRB   Left0,  [pSrcLeft],  leftStep       ;// Left0 = pSrcLeft[0]
544        M_LDRB   Left1,  [pSrcLeft],  leftStep       ;// Left1 = pSrcLeft[1]
545        M_LDRB   Left2,  [pSrcLeft],  leftStep       ;// Left2 = pSrcLeft[2]
546        LDRB     Left3,  [pSrcLeft]                  ;// Left3 = pSrcLeft[3]
547        MOV      r0x80808080,  r0x01010101, LSL #7   ;// 0x80808080
548        ORR      tVal6,  Left0,  Left1,  LSL #16     ;// tVal6 = 00 L1 00 L0
549        ORR      tVal7,  Left1,  Left2,  LSL #16     ;// tVal7 = 00 L2 00 L1
550        ORR      tVal11, Left2,  Left3,  LSL #16     ;// tVal11= 00 L3 00 L2
551        MUL      Out3,   Left3,  r0x01010101         ;// Out3  = L3 L3 L3 L3
552        MVN      tVal8,  tVal7                       ;// tVal8 = 00 L2'00 L1'
553        MVN      tVal10, tVal11                      ;// tVal10= 00 L3'00 L2'
554        UHADD8   tVal4,  tVal6,  tVal11              ;// tVal4 = 00 g3 00 g2
555        UXTB16   tVal12, Out3                        ;// tVal12= 00 L3 00 L3
556        UHSUB8   tVal4,  tVal4,  tVal8               ;// tVal4 = 00 h3 00 h2
557        UHSUB8   tVal6,  tVal6,  tVal8               ;// tVal6 = 00 d2 00 d1
558        UHSUB8   tVal11, tVal11, tVal8               ;// tVal11= 00 d3 00 d2
559        UHADD8   tVal12, tVal12, tVal7               ;// tVal12= 00 g4 00 g3
560        UADD8    tVal4,  tVal4,  r0x80808080         ;// tVal4 = 00 i3 00 i2
561        UHSUB8   tVal12, tVal12, tVal10              ;// tVal12= 00 h4 00 h3
562        UADD8    tVal8,  tVal6,  r0x80808080         ;// tVal8 = 00 e2 00 e1
563        UADD8    tVal11, tVal11, r0x80808080         ;// tVal11= 00 e3 00 e2
564        UADD8    tVal12, tVal12, r0x80808080         ;// tVal12= 00 i4 00 i3
565        ORR      Out0,   tVal8,  tVal4,  LSL #8      ;// Out0  = i3 e2 i2 e1
566        ORR      Out1,   tVal11, tVal12, LSL #8      ;// Out1  = i4 e3 i3 e2
567        M_STR    Out0,   [pDst], dstStep             ;// store {Out0}  at pDst [0  to 3 ]
568        PKHTB    Out2,   Out3,   Out1,   ASR #16     ;// Out2  = L3 L3 i4 e3
569        M_STR    Out1,   [pDst], dstStep             ;// store {Out1}  at pDst [4  to 7 ]
570        M_STR    Out2,   [pDst], dstStep             ;// store {Out2}  at pDst [8  to 11]
571        STR      Out3,   [pDst]                      ;// store {Out3}  at pDst [12 to 15]
572        MOV      return,  #OMX_Sts_NoErr
573        M_END
574
575        ENDIF ;// ARM1136JS
576
577
578        END
579;//-----------------------------------------------------------------------------------------------
580;// omxVCM4P10_PredictIntra_4x4 ends
581;//-----------------------------------------------------------------------------------------------
582