omxVCM4P10_PredictIntra_4x4_s.s revision 0c1bc742181ded4930842b46e9507372f0b1b963
1;//
2;//
3;// File Name:  omxVCM4P10_PredictIntra_4x4_s.s
4;// OpenMAX DL: v1.0.2
5;// Revision:   9641
6;// Date:       Thursday, February 7, 2008
7;//
8;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
9;//
10;//
11;//
12
13
14        INCLUDE omxtypes_s.h
15        INCLUDE armCOMM_s.h
16
17;// Define the processor variants supported by this file
18
19         M_VARIANTS ARM1136JS
20
21;//-------------------------------------------------------
22;// This table for implementing switch case of C in asm by
23;// the mehtod of two levels of indexing.
24;//-------------------------------------------------------
25
26    M_TABLE armVCM4P10_pSwitchTable4x4
27    DCD  OMX_VC_4x4_VERT,     OMX_VC_4x4_HOR
28    DCD  OMX_VC_4x4_DC,       OMX_VC_4x4_DIAG_DL
29    DCD  OMX_VC_4x4_DIAG_DR,  OMX_VC_4x4_VR
30    DCD  OMX_VC_4x4_HD,       OMX_VC_4x4_VL
31    DCD  OMX_VC_4x4_HU
32
33    IF ARM1136JS
34
35;//--------------------------------------------
36;// Constants
37;//--------------------------------------------
38BLK_SIZE              EQU 0x8
39MUL_CONST0            EQU 0x01010101
40ADD_CONST1            EQU 0x80808080
41
42;//--------------------------------------------
43;// Scratch variable
44;//--------------------------------------------
45return          RN 0
46pTable          RN 9
47pc              RN 15
48r0x01010101     RN 1
49r0x80808080     RN 0
50
51tVal0           RN 0
52tVal1           RN 1
53tVal2           RN 2
54tVal4           RN 4
55tVal6           RN 6
56tVal7           RN 7
57tVal8           RN 8
58tVal9           RN 9
59tVal10          RN 10
60tVal11          RN 11
61tVal12          RN 12
62tVal14          RN 14
63
64Out0            RN 6
65Out1            RN 7
66Out2            RN 8
67Out3            RN 9
68
69Left0           RN 6
70Left1           RN 7
71Left2           RN 8
72Left3           RN 9
73
74Above0123       RN 12
75Above4567       RN 14
76
77AboveLeft       RN 10
78
79;//--------------------------------------------
80;// Declare input registers
81;//--------------------------------------------
82pSrcLeft        RN 0    ;// input pointer
83pSrcAbove       RN 1    ;// input pointer
84pSrcAboveLeft   RN 2    ;// input pointer
85pDst            RN 3    ;// output pointer
86leftStep        RN 4    ;// input variable
87dstStep         RN 5    ;// input variable
88predMode        RN 6    ;// input variable
89availability    RN 7    ;// input variable
90
91;//-----------------------------------------------------------------------------------------------
92;// omxVCM4P10_PredictIntra_4x4 starts
93;//-----------------------------------------------------------------------------------------------
94
95        ;// Write function header
96        M_START omxVCM4P10_PredictIntra_4x4, r11
97
98        ;// Define stack arguments
99        M_ARG    LeftStep,     4
100        M_ARG    DstStep,      4
101        M_ARG    PredMode,     4
102        M_ARG    Availability, 4
103
104        ;// M_STALL ARM1136JS=4
105
106        LDR      pTable,=armVCM4P10_pSwitchTable4x4  ;// Load index table for switch case
107
108        ;// Load argument from the stack
109        M_LDR    predMode, PredMode                  ;// Arg predMode loaded from stack to reg
110        M_LDR    leftStep, LeftStep                  ;// Arg leftStep loaded from stack to reg
111        M_LDR    dstStep,  DstStep                   ;// Arg dstStep loaded from stack to reg
112        M_LDR    availability, Availability          ;// Arg availability loaded from stack to reg
113
114        LDR      pc, [pTable, predMode, LSL #2]      ;// Branch to the case based on preMode
115
116OMX_VC_4x4_VERT
117
118        LDR      Above0123,  [pSrcAbove]             ;// Above0123 = pSrcAbove[0 to 3]
119        M_STR    Above0123,  [pDst],  dstStep        ;// pDst[0  to 3]  = Above0123
120        M_STR    Above0123,  [pDst],  dstStep        ;// pDst[4  to 7]  = Above0123
121        M_STR    Above0123,  [pDst],  dstStep        ;// pDst[8  to 11] = Above0123
122        STR      Above0123,  [pDst]                  ;// pDst[12 to 15] = Above0123
123        MOV      return, #OMX_Sts_NoErr
124        M_EXIT                                      ;// Macro to exit midway-break frm case
125
126OMX_VC_4x4_HOR
127
128        ;// M_STALL ARM1136JS=6
129
130        LDR      r0x01010101,  =MUL_CONST0           ;// Const to repeat the byte in reg 4 times
131        M_LDRB   Left0,  [pSrcLeft],  leftStep       ;// Left0 = pSrcLeft[0]
132        M_LDRB   Left1,  [pSrcLeft],  leftStep       ;// Left1 = pSrcLeft[1]
133        M_LDRB   Left2,  [pSrcLeft],  leftStep       ;// Left2 = pSrcLeft[2]
134        LDRB     Left3,  [pSrcLeft]                  ;// Left3 = pSrcLeft[3]
135        MUL      Out0,   Left0,   r0x01010101        ;// replicate the val in all the bytes
136        MUL      Out1,   Left1,   r0x01010101        ;// replicate the val in all the bytes
137        MUL      Out2,   Left2,   r0x01010101        ;// replicate the val in all the bytes
138        MUL      Out3,   Left3,   r0x01010101        ;// replicate the val in all the bytes
139        M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [0  to 3 ]
140        M_STR    Out1,   [pDst],  dstStep            ;// store {Out1} at pDst [4  to 7 ]
141        M_STR    Out2,   [pDst],  dstStep            ;// store {Out2} at pDst [8  to 11]
142        STR      Out3,   [pDst]                      ;// store {Out3} at pDst [12 to 15]
143        MOV      return, #OMX_Sts_NoErr
144        M_EXIT                                       ;// Macro to exit midway-break frm case
145
146OMX_VC_4x4_DC
147
148        ;// M_STALL ARM1136JS=6
149
150        AND      availability,  availability,  #(OMX_VC_UPPER + OMX_VC_LEFT)
151        CMP      availability,  #(OMX_VC_UPPER + OMX_VC_LEFT)
152        BNE      UpperOrLeftOrNoneAvailable          ;// Jump to Upper if not both
153        LDR      Above0123,  [pSrcAbove]             ;// Above0123  = pSrcAbove[0 to 3]
154
155        ;// M_STALL ARM1136JS=1
156
157        UXTB16   tVal7,  Above0123                   ;// pSrcAbove[0, 2]
158        UXTB16   tVal6,  Above0123,  ROR #8          ;// pSrcAbove[1, 3]
159        UADD16   tVal11, tVal6,   tVal7              ;// pSrcAbove[0, 2] + pSrcAbove[1, 3]
160        M_LDRB   Left0,  [pSrcLeft],  leftStep       ;// Left0 = pSrcLeft[0]
161        M_LDRB   Left1,  [pSrcLeft],  leftStep       ;// Left1 = pSrcLeft[1]
162        ADD      tVal11, tVal11,  LSR #16            ;// sum(pSrcAbove[0] to pSrcAbove[3])
163        M_LDRB   Left2,  [pSrcLeft],  leftStep       ;// Left2 = pSrcLeft[2]
164        LDRB     Left3,  [pSrcLeft]                  ;// Left3 = pSrcLeft[3]
165        UXTH     tVal11, tVal11                      ;// upsum1 (Clear the top junk bits)
166        ADD      tVal6,  Left0,  Left1               ;// tVal6 = Left0 + Left1
167        ADD      tVal7,  Left2,  Left3               ;// tVal7 = Left2 + Left3
168        ADD      tVal6,  tVal6,  tVal7               ;// tVal6 = tVal6 + tVal7
169        ADD      Out0,   tVal6,  tVal11              ;// Out0  = tVal6 + tVal11
170        ADD      Out0,   Out0,   #4                  ;// Out0  = Out0 + 4
171        LDR      r0x01010101,   =MUL_CONST0          ;// 0x01010101
172        MOV      Out0,   Out0,  LSR #3               ;// Out0 = (Out0 + 4)>>3
173
174        ;// M_STALL ARM1136JS=1
175
176        MUL      Out0,   Out0,  r0x01010101          ;// replicate the val in all the bytes
177
178        ;// M_STALL ARM1136JS=1
179
180        MOV      return,  #OMX_Sts_NoErr
181        M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [0  to 3 ]
182        M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [4  to 7 ]
183        M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [8  to 11]
184        STR      Out0,   [pDst]                      ;// store {Out0} at pDst [12 to 15]
185        M_EXIT                                       ;// Macro to exit midway-break frm case
186
187UpperOrLeftOrNoneAvailable
188        ;// M_STALL ARM1136JS=3
189
190        CMP      availability,  #OMX_VC_UPPER        ;// if(availability & #OMX_VC_UPPER)
191        BNE      LeftOrNoneAvailable                 ;// Jump to Left if not upper
192        LDR      Above0123,  [pSrcAbove]             ;// Above0123  = pSrcAbove[0 to 3]
193
194        ;// M_STALL ARM1136JS=3
195
196        UXTB16   tVal7,  Above0123                   ;// pSrcAbove[0, 2]
197        UXTB16   tVal6,  Above0123,  ROR #8          ;// pSrcAbove[1, 3]
198        UADD16   Out0,   tVal6,  tVal7               ;// pSrcAbove[0, 2] + pSrcAbove[1, 3]
199        LDR      r0x01010101,   =MUL_CONST0          ;// 0x01010101
200        ADD      Out0,   Out0,   LSR #16             ;// sum(pSrcAbove[0] to pSrcAbove[3])
201
202        ;// M_STALL ARM1136JS=1
203
204        UXTH     Out0,   Out0                        ;// upsum1 (Clear the top junk bits)
205        ADD      Out0,   Out0,   #2                  ;// Out0  = Out0 + 2
206
207        ;// M_STALL ARM1136JS=1
208
209        MOV      Out0,   Out0,   LSR #2              ;// Out0  = (Out0 + 2)>>2
210
211        ;// M_STALL ARM1136JS=1
212
213        MUL      Out0,   Out0,   r0x01010101         ;// replicate the val in all the bytes
214
215        ;// M_STALL ARM1136JS=1
216
217        MOV      return, #OMX_Sts_NoErr
218        M_STR    Out0,   [pDst],  dstStep            ;// store {tVal6} at pDst [0  to 3 ]
219        M_STR    Out0,   [pDst],  dstStep            ;// store {tVal6} at pDst [4  to 7 ]
220        M_STR    Out0,   [pDst],  dstStep            ;// store {tVal6} at pDst [8  to 11]
221        STR      Out0,   [pDst]                      ;// store {tVal6} at pDst [12 to 15]
222
223        M_EXIT                                       ;// Macro to exit midway-break frm case
224
225LeftOrNoneAvailable
226        ;// M_STALL ARM1136JS=3
227
228        LDR      r0x01010101,   =MUL_CONST0          ;// 0x01010101
229        CMP      availability, #OMX_VC_LEFT
230        BNE      NoneAvailable
231        M_LDRB   Left0,  [pSrcLeft],  leftStep       ;// Left0 = pSrcLeft[0]
232        M_LDRB   Left1,  [pSrcLeft],  leftStep       ;// Left1 = pSrcLeft[1]
233        M_LDRB   Left2,  [pSrcLeft],  leftStep       ;// Left2 = pSrcLeft[2]
234        LDRB     Left3,  [pSrcLeft]                  ;// Left3 = pSrcLeft[3]
235        ADD      Out0,   Left0,  Left1               ;// Out0  = Left0 + Left1
236
237        ;// M_STALL ARM1136JS=1
238
239        ADD      Out1,   Left2,  Left3               ;// Out1  = Left2 + Left3
240        ADD      Out0,   Out0,   Out1                ;// Out0  = Out0  + Out1
241        ADD      Out0,   Out0,   #2                  ;// Out0  = Out0 + 2
242
243        ;// M_STALL ARM1136JS=1
244
245        MOV      Out0,   Out0,   LSR #2              ;// Out0  = (Out0 + 2)>>2
246
247        ;// M_STALL ARM1136JS=1
248
249        MUL      Out0,   Out0,   r0x01010101         ;// replicate the val in all the bytes
250
251        ;// M_STALL ARM1136JS=1
252
253        MOV      return, #OMX_Sts_NoErr
254        M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [0  to 3 ]
255        M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [4  to 7 ]
256        M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [8  to 11]
257        STR      Out0,   [pDst]                      ;// store {Out0} at pDst [12 to 15]
258        M_EXIT                                       ;// Macro to exit midway-break frm case
259
260NoneAvailable
261        MOV      Out0,   #128                        ;// Out0 = 128 if(count == 0)
262
263        ;// M_STALL ARM1136JS=5
264
265        MUL      Out0,   Out0,  r0x01010101          ;// replicate the val in all the bytes
266
267        ;// M_STALL ARM1136JS=1
268
269        MOV      return, #OMX_Sts_NoErr
270        M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [0  to 3 ]
271        M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [4  to 7 ]
272        M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [8  to 11]
273        STR      Out0,   [pDst]                      ;// store {Out0} at pDst [12 to 15]
274        M_EXIT                                       ;// Macro to exit midway-break frm case
275
276OMX_VC_4x4_DIAG_DL
277
278        ;//------------------------------------------------------------------
279        ;// f = (a+2*b+c+2)>>2
280        ;// Calculate as:
281        ;// d = (a + c )>>1
282        ;// e = (d - b')>>1
283        ;// f = e + 128
284        ;//------------------------------------------------------------------
285
286        ;// M_STALL ARM1136JS=3
287
288        TST      availability, #OMX_VC_UPPER_RIGHT
289        LDMIA    pSrcAbove,  {Above0123, Above4567}  ;// Above0123, Above4567 = pSrcAbove[0 to 7]
290        LDR      r0x80808080,  =ADD_CONST1           ;// 0x80808080
291        BNE      DLUpperRightAvailable
292        LDR      r0x01010101,  =MUL_CONST0           ;// 0x01010101
293        MOV      tVal7,  Above0123,  LSR #24         ;// {00,  00,  00,  U3 }
294        MOV      tVal11, tVal7,  LSL #24             ;// {U3,  00,  00,  00 }
295        MUL      Out3,   tVal7,  r0x01010101         ;// {U3,  U3,  U3,  U3 }
296        MOV      tVal8,  Above0123,  LSR #16         ;// {00,  00,  U3,  U2 }
297        MOV      tVal10, Above0123,  LSR #8          ;// {00,  U3,  U2,  U1 }
298        MVN      tVal10, tVal10                      ;// {00', U3', U2', U1'}
299        UHADD8   tVal8,  tVal8,  Above0123           ;// {xx,  xx,  d1,  d0 }
300        UHADD8   tVal6,  Above0123,  tVal9           ;// {xx,  d2,  xx,  xx }
301        UHSUB8   tVal8,  tVal8,  tVal10              ;// {xx,  xx,  e1,  e0 }
302        UHSUB8   tVal6,  tVal6,  tVal10              ;// {xx,  e2,  xx,  xx }
303        UADD8    tVal8,  tVal8,  r0x80808080         ;// {xx,  xx,  f1,  f0 }
304        UADD8    tVal6,  tVal6,  r0x80808080         ;// {xx,  f2,  xx,  xx }
305
306        ;// M_STALL ARM1136JS=1
307
308        PKHBT    tVal6,  tVal8,  tVal6               ;// {xx,  f2,  f1,  f0 }
309        BIC      tVal6,  tVal6,  #0xFF000000         ;// {00,  f2,  f1,  f0 }
310        ORR      Out0,   tVal6,  tVal11              ;// {U3,  f2,  f1,  f0 }
311
312        ;// M_STALL ARM1136JS=1
313
314        PKHTB    Out1,   Out3,   Out0,  ASR #8       ;// {U3,  U3,  f2,  f1 }
315        MOV      return, #OMX_Sts_NoErr
316        PKHTB    Out2,   Out3,   Out1,  ASR #8       ;// {U3,  U3,  U3,  f2 }
317
318        M_STR    Out0,   [pDst], dstStep             ;// store {f3 to f0} at pDst[3  to 0 ]
319        M_STR    Out1,   [pDst], dstStep             ;// store {f4 to f1} at pDst[7  to 4 ]
320        M_STR    Out2,   [pDst], dstStep             ;// store {f5 to f2} at pDst[11 to 8 ]
321        STR      Out3,   [pDst]                      ;// store {f6 to f3} at pDSt[15 to 12]
322        M_EXIT                                       ;// Macro to exit midway-break frm case
323
324DLUpperRightAvailable
325
326        MOV      tVal8,  Above0123,  LSR #24         ;// {00,  00,  00,  U3 }
327        MOV      tVal9,  Above0123,  LSR #16         ;// {00,  00,  U3,  U2 }
328        MOV      tVal10, Above0123,  LSR #8          ;// {00,  U3,  U2,  U1 }
329        ORR      tVal8,  tVal8,  Above4567, LSL #8   ;// {U6,  U5,  U4,  U3 }
330        ORR      tVal10, tVal10, Above4567, LSL #24  ;// {U4,  U3,  U2,  U1 }
331        PKHBT    tVal9,  tVal9,  Above4567, LSL #16  ;// {U5,  U4,  U3,  U2 }
332        MVN      tVal1,  tVal8                       ;// {U6', U5', U4', U3'}
333        MVN      tVal10, tVal10                      ;// {U4', U3', U2', U1'}
334        MVN      tVal2,  Above4567                   ;// {U7', U6', U5', U4'}
335        UHADD8   tVal6,  Above0123,  tVal9           ;// {d3,  d2,  d1,  d0 }
336        UHADD8   tVal9,  tVal9,  Above4567           ;// {d5,  d4,  d3,  d2 }
337        UHADD8   tVal8,  Above4567,  tVal8           ;// {d6,  xx,  xx,  xx }
338        UHSUB8   tVal6,  tVal6,  tVal10              ;// {e3,  e2,  e1,  e0 }
339        UHSUB8   tVal12, tVal9,  tVal1               ;// {e5,  e4,  e3,  e2 }
340        UHSUB8   tVal8,  tVal8,  tVal2               ;// {e6,  xx,  xx,  xx }
341        UADD8    Out0,   tVal6,  r0x80808080         ;// {f3,  f2,  f1,  f0 }
342        UADD8    tVal9,  tVal8,  r0x80808080         ;// {f6,  xx,  xx,  xx }
343        UADD8    Out2,   tVal12, r0x80808080         ;// {f5,  f4,  f3,  f2 }
344        MOV      tVal7,  Out0,   LSR #8              ;// {00,  f3,  f2,  f1 }
345        AND      tVal9,  tVal9,  #0xFF000000         ;// {f6,  00,  00,  00 }
346        PKHBT    Out1,   tVal7,  Out2,  LSL #8       ;// {f4,  f3,  f2,  f1 }
347        ORR      Out3,   tVal9,  Out2,  LSR #8       ;// {f6,  f5,  f4,  f3 }
348        M_STR    Out0,   [pDst], dstStep             ;// store {f3 to f0} at pDst[3  to 0 ]
349        M_STR    Out1,   [pDst], dstStep             ;// store {f4 to f1} at pDst[7  to 4 ]
350        M_STR    Out2,   [pDst], dstStep             ;// store {f5 to f2} at pDst[11 to 8 ]
351        STR      Out3,   [pDst]                      ;// store {f6 to f3} at pDSt[15 to 12]
352        MOV      return, #OMX_Sts_NoErr
353        M_EXIT                                       ;// Macro to exit midway-break frm case
354
355
356OMX_VC_4x4_DIAG_DR
357
358        ;// M_STALL ARM1136JS=4
359
360        M_LDRB   Left0,  [pSrcLeft],  leftStep       ;// Left0 = pSrcLeft[0]
361        M_LDRB   Left1,  [pSrcLeft],  leftStep       ;// Left1 = pSrcLeft[1]
362        M_LDRB   Left2,  [pSrcLeft],  leftStep       ;// Left2 = pSrcLeft[2]
363        LDRB     Left3,  [pSrcLeft]                  ;// Left3 = pSrcLeft[3]
364        LDRB     AboveLeft, [pSrcAboveLeft]          ;// AboveLeft = pSrcAboveLeft[0]
365        ORR      tVal7,  Left1,  Left0,  LSL #8      ;// tVal7 = 00 00 L0 L1
366        LDR      Above0123,  [pSrcAbove]             ;// Above0123 = U3 U2 U1 U0
367        LDR      r0x80808080, =ADD_CONST1            ;// 0x80808080
368        ORR      tVal8,  Left3,  Left2,  LSL #8      ;// tVal8 = 00 00 L2 L3
369        PKHBT    tVal7,  tVal8,  tVal7,  LSL #16     ;// tVal7 = L0 L1 L2 L3
370        MOV      tVal8,  Above0123,  LSL #8          ;// tVal8 = U2 U1 U0 00
371        MOV      tVal9,  tVal7,  LSR #8              ;// tVal9 = 00 L0 L1 L2
372        ORR      tVal8,  tVal8,  AboveLeft           ;// tVal8 = U2 U1 U0 UL
373        ORR      tVal9,  tVal9,  AboveLeft, LSL #24  ;// tVal9 = UL L0 L1 L2
374        MOV      tVal10, Above0123,  LSL #24         ;// tVal10= U0 00 00 00
375        UXTB     tVal11, tVal7,  ROR #24             ;// tVal11= 00 00 00 L0
376        ORR      tVal10, tVal10, tVal9,  LSR #8      ;// tVal10= U0 UL L0 L1
377        ORR      tVal11, tVal11, tVal8,  LSL #8      ;// tVal11= U1 U0 UL L0
378        UHADD8   tVal11, Above0123,  tVal11          ;// tVal11= d1 d0 dL g0
379        UHADD8   tVal10, tVal7,  tVal10              ;// tVal10= g0 g1 g2 g3
380        MVN      tVal8,  tVal8                       ;// tVal8 = U2'U1'U0'UL'
381        MVN      tVal9,  tVal9                       ;// tVal9 = UL'L0'L1'L2'
382        UHSUB8   tVal11, tVal11, tVal8               ;// tVal11= e1 e0 eL h0
383        UHSUB8   tVal10, tVal10, tVal9               ;// tVal10= h0 h1 h2 h3
384        UADD8    Out3,   tVal10, r0x80808080         ;// Out3  = i0 i1 i2 i3
385        UADD8    Out0,   tVal11, r0x80808080         ;// Out0  = f1 f0 fL i0
386        UXTH     tVal11, Out3,   ROR #8              ;// tVal11= 00 00 i1 i2
387        MOV      tVal7,  Out0,   LSL #8              ;// tVal7 = f0 fL i0 00
388        ORR      Out1,   tVal7,  tVal11,  LSR #8     ;// Out1  = f0 fL i0 i1
389        PKHBT    Out2,   tVal11, Out0,    LSL #16    ;// Out2  = fL i0 i1 i2
390        M_STR    Out0,   [pDst], dstStep             ;// store {f1 to i0} at pDst[3  to 0 ]
391        M_STR    Out1,   [pDst], dstStep             ;// store {f0 to i1} at pDst[7  to 4 ]
392        M_STR    Out2,   [pDst], dstStep             ;// store {fL to i2} at pDst[11 to 8 ]
393        STR      Out3,   [pDst]                      ;// store {i0 to i3} at pDst[15 to 12]
394        MOV      return,  #OMX_Sts_NoErr
395        M_EXIT                                       ;// Macro to exit midway-break frm case
396
397OMX_VC_4x4_VR
398
399        ;// M_STALL ARM1136JS=4
400
401        LDR      Above0123,  [pSrcAbove]             ;// Above0123 = U3 U2 U1 U0
402        LDRB     AboveLeft,  [pSrcAboveLeft]         ;// AboveLeft = 00 00 00 UL
403        M_LDRB   Left0,  [pSrcLeft],  leftStep       ;// Left0     = 00 00 00 L0
404        M_LDRB   Left1,  [pSrcLeft],  leftStep       ;// Left1     = 00 00 00 L1
405        LDRB     Left2,  [pSrcLeft]                  ;// Left2     = 00 00 00 L2
406        MOV      tVal0,  Above0123,  LSL #8          ;// tVal0     = U2 U1 U0 00
407        MOV      tVal9,  Above0123                   ;// tVal9     = U3 U2 U1 U0
408        ORR      tVal14, tVal0,   AboveLeft          ;// tVal14    = U2 U1 U0 UL
409        MVN      tVal11, tVal14                      ;// tVal11    = U2'U1'U0'UL'
410        MOV      tVal2,  tVal14,  LSL #8             ;// tVal2     = U1 U0 UL 00
411        UHSUB8   tVal1,  Above0123,  tVal11          ;// tVal1     = d2 d1 d0 dL
412        UHADD8   tVal10, AboveLeft, Left1            ;// tVal10    = 00 00 00 j1
413        MVN      tVal4,  Left0                       ;// tVal4     = 00 00 00 L0'
414        UHSUB8   tVal4,  tVal10,  tVal4              ;// tVal4     = 00 00 00 k1
415        ORR      tVal12, tVal0,   Left0              ;// tVal12    = U2 U1 U0 L0
416        ORR      tVal14, tVal2,   Left0              ;// tVal14    = U1 U0 UL L0
417        LDR      r0x80808080,  =ADD_CONST1           ;// 0x80808080
418        UHADD8   tVal10, tVal9,   tVal14             ;// tVal10    = g3 g2 g1 g0
419        UADD8    Out0,   tVal1,   r0x80808080        ;// Out0      = e2 e1 e0 eL
420        UHSUB8   tVal10, tVal10,  tVal11             ;// tVal10    = h3 h2 h1 h0
421        M_STR    Out0,   [pDst],  dstStep            ;// store {e2 to eL} at pDst[3  to 0 ]
422        MOV      tVal1,  tVal14,  LSL #8             ;// tVal1     = U0 UL L0 00
423        MOV      tVal6,  Out0,    LSL #8             ;// tVal6     = e1 e0 eL 00
424        ORR      tVal2,  tVal2,   Left1              ;// tVal2     = U1 U0 UL L1
425        UADD8    tVal4,  tVal4,   r0x80808080        ;// tVal4     = 00 00 00 l1
426        UADD8    Out1,   tVal10,  r0x80808080        ;// Out1      = i3 i2 i1 i0
427        MVN      tVal2,  tVal2                       ;// tVal14    = U1'U0'UL'L1'
428        ORR      tVal1,  tVal1,   Left2              ;// tVal1     = U0 UL L0 L2
429        ORR      Out2,   tVal6,   tVal4              ;// Out2      = e1 e0 eL l1
430        UHADD8   tVal1,  tVal1,   tVal12             ;// tVal1     = g2 g1 g0 j2
431        M_STR    Out1,   [pDst],  dstStep            ;// store {i3 to i0} at pDst[7  to 4 ]
432        M_STR    Out2,   [pDst],  dstStep            ;// store {e1 to l1} at pDst[11 to 8 ]
433        UHSUB8   tVal9,  tVal1,   tVal2              ;// tVal9     = h2 h1 h0 k2
434        UADD8    Out3,   tVal9,   r0x80808080        ;// Out3      = i2 i1 i0 l2
435        STR      Out3,   [pDst]                      ;// store {i2 to l2} at pDst[15 to 12]
436        MOV      return,  #OMX_Sts_NoErr
437        M_EXIT                                       ;// Macro to exit midway-break frm case
438
439OMX_VC_4x4_HD
440
441        ;// M_STALL ARM1136JS=4
442
443        LDR      Above0123,  [pSrcAbove]             ;// Above0123 = U3 U2 U1 U0
444        LDRB     AboveLeft,  [pSrcAboveLeft]         ;// AboveLeft = 00 00 00 UL
445        M_LDRB   Left0,  [pSrcLeft],  leftStep       ;// Left0 = 00 00 00 L0
446        M_LDRB   Left1,  [pSrcLeft],  leftStep       ;// Left1 = 00 00 00 L1
447        M_LDRB   Left2,  [pSrcLeft],  leftStep       ;// Left2 = 00 00 00 L2
448        LDRB     Left3,  [pSrcLeft]                  ;// Left3 = 00 00 00 L3
449        LDR      r0x80808080,  =ADD_CONST1           ;// 0x80808080
450        ORR      tVal2,  AboveLeft, Above0123, LSL #8;// tVal2 = U2 U1 U0 UL
451        MVN      tVal1,  Left0                       ;// tVal1 = 00 00 00 L0'
452        ORR      tVal4,  Left0,  tVal2,  LSL #8      ;// tVal4 = U1 U0 UL L0
453        MVN      tVal2,  tVal2                       ;// tVal2 = U2'U1'U0'UL'
454        UHADD8   tVal4,  tVal4,  Above0123           ;// tVal4 = g3 g2 g1 g0
455        UHSUB8   tVal1,  AboveLeft,  tVal1           ;// tVal1 = 00 00 00 dL
456        UHSUB8   tVal4,  tVal4,  tVal2               ;// tVal4 = h3 h2 h1 h0
457        UADD8    tVal1,  tVal1,  r0x80808080         ;// tVal1 = 00 00 00 eL
458        UADD8    tVal4,  tVal4,  r0x80808080         ;// tVal4 = i3 i2 i1 i0
459        ORR      tVal2,  Left0,  AboveLeft,  LSL #16 ;// tVal2 = 00 UL 00 L0
460        MOV      tVal4,  tVal4,  LSL #8              ;// tVal4 = i2 i1 i0 00
461        ORR      tVal11, Left1,  Left0,  LSL #16     ;// tVal11= 00 L0 00 L1
462        ORR      tVal7,  Left2,  Left1,  LSL #16     ;// tVal7 = 00 L1 00 L2
463        ORR      tVal10, Left3,  Left2,  LSL #16     ;// tVal10= 00 L2 00 L3
464        ORR      Out0,   tVal4,  tVal1               ;// Out0  = i2 i1 i0 eL
465        M_STR    Out0,   [pDst], dstStep             ;// store {Out0}  at pDst [0  to 3 ]
466        MOV      tVal4,  Out0,   LSL #16             ;// tVal4 = i1 i0 00 00
467        UHADD8   tVal2,  tVal2,  tVal7               ;// tVal2 = 00 j1 00 j2
468        UHADD8   tVal6,  tVal11, tVal10              ;// tVal11= 00 j2 00 j3
469        MVN      tVal12, tVal11                      ;// tVal12= 00 L0'00 L1'
470        MVN      tVal14, tVal7                       ;// tVal14= 00 L1'00 L2'
471        UHSUB8   tVal2,  tVal2,  tVal12              ;// tVal2 = 00 k1 00 k2
472        UHSUB8   tVal8,  tVal7,  tVal12              ;// tVal8 = 00 d1 00 d2
473        UHSUB8   tVal11, tVal6,  tVal14              ;// tVal11= 00 k2 00 k3
474        UHSUB8   tVal9,  tVal10, tVal14              ;// tVal9 = 00 d2 00 d3
475        UADD8    tVal2,  tVal2,  r0x80808080         ;// tVal2 = 00 l1 00 l2
476        UADD8    tVal8,  tVal8,  r0x80808080         ;// tVal8 = 00 e1 00 e2
477        UADD8    tVal11, tVal11, r0x80808080         ;// tVal11= 00 l2 00 l3
478        UADD8    tVal9,  tVal9,  r0x80808080         ;// tVal9 = 00 e2 00 e3
479        ORR      Out2,   tVal8,  tVal2,  LSL #8      ;// Out2  = l1 e1 l2 e2
480        ORR      Out3,   tVal9,  tVal11, LSL #8      ;// Out3  = l2 e2 l3 e3
481        PKHTB    Out1,   tVal4,  Out2,   ASR #16     ;// Out1  = i1 i0 l1 e1
482        M_STR    Out1,   [pDst], dstStep             ;// store {Out1}  at pDst [4  to 7 ]
483        M_STR    Out2,   [pDst], dstStep             ;// store {Out2}  at pDst [8  to 11]
484        STR      Out3,   [pDst]                      ;// store {Out3}  at pDst [12 to 15]
485        MOV      return,  #OMX_Sts_NoErr
486        M_EXIT                                       ;// Macro to exit midway-break frm case
487
488OMX_VC_4x4_VL
489
490        ;// M_STALL ARM1136JS=3
491
492        LDMIA    pSrcAbove, {Above0123, Above4567}   ;// Above0123, Above4567 = pSrcAbove[0 to 7]
493        TST      availability, #OMX_VC_UPPER_RIGHT
494        LDR      r0x80808080,  =ADD_CONST1           ;// 0x80808080
495        LDR      r0x01010101,  =MUL_CONST0           ;// 0x01010101
496        MOV      tVal11, Above0123,  LSR #24         ;// tVal11= 00 00 00 U3
497        MULEQ    Above4567, tVal11, r0x01010101      ;// Above4567 = U3 U3 U3 U3
498        MOV      tVal9,  Above0123,  LSR #8          ;// tVal9 = 00 U3 U2 U1
499        MVN      tVal10, Above0123                   ;// tVal10= U3'U2'U1'U0'
500        ORR      tVal2,  tVal9,  Above4567,  LSL #24 ;// tVal2 = U4 U3 U2 U1
501        UHSUB8   tVal8,  tVal2,  tVal10              ;// tVal8 = d4 d3 d2 d1
502        UADD8    Out0,   tVal8,  r0x80808080         ;// Out0 = e4 e3 e2 e1
503        M_STR    Out0,   [pDst], dstStep             ;// store {Out0}  at pDst [0  to 3 ]
504        MOV      tVal9,  tVal9,  LSR #8              ;// tVal9 = 00 00 U3 U2
505        MOV      tVal10, Above4567,  LSL #8          ;// tVal10= U6 U5 U4 00
506        PKHBT    tVal9,  tVal9,  Above4567, LSL #16  ;// tVal9 = U5 U4 U3 U2
507        ORR      tVal10, tVal10, tVal11              ;// tVal10= U6 U5 U4 U3
508        UHADD8   tVal11, tVal9,  Above0123           ;// tVal11= g5 g4 g3 g2
509        UHADD8   tVal14, tVal2,  tVal10              ;// tVal14= g6 g5 g4 g3
510        MVN      tVal8,  tVal2                       ;// tVal8 = U4'U3'U2'U1'
511        MVN      tVal7,  tVal9                       ;// tVal7 = U5'U4'U3'U2'
512        UHSUB8   tVal12, tVal9,  tVal8               ;// tVal12= d5 d4 d3 d2
513        UHSUB8   tVal11, tVal11, tVal8               ;// tVal11= h5 h4 h3 h2
514        UHSUB8   tVal2,  tVal14, tVal7               ;// tVal2 = h6 h5 h4 h3
515        UADD8    Out1,   tVal11, r0x80808080         ;// Out1  = i5 i4 i3 i2
516        UADD8    Out2,   tVal12, r0x80808080         ;// Out2  = e5 e4 e3 e2
517        UADD8    Out3,   tVal2,  r0x80808080         ;// Out3  = i6 i5 i4 i3
518        M_STR    Out1,   [pDst], dstStep             ;// store {Out1} at pDst [4  to 7 ]
519        M_STR    Out2,   [pDst], dstStep             ;// store {Out2} at pDst [8  to 11]
520        M_STR    Out3,   [pDst], dstStep             ;// store {Out3} at pDst [12 to 15]
521        MOV      return, #OMX_Sts_NoErr
522        M_EXIT                                       ;// Macro to exit midway-break frm case
523
524OMX_VC_4x4_HU
525
526        ;// M_STALL ARM1136JS=2
527
528        LDR      r0x01010101,  =MUL_CONST0           ;// 0x01010101
529        M_LDRB   Left0,  [pSrcLeft],  leftStep       ;// Left0 = pSrcLeft[0]
530        M_LDRB   Left1,  [pSrcLeft],  leftStep       ;// Left1 = pSrcLeft[1]
531        M_LDRB   Left2,  [pSrcLeft],  leftStep       ;// Left2 = pSrcLeft[2]
532        LDRB     Left3,  [pSrcLeft]                  ;// Left3 = pSrcLeft[3]
533        MOV      r0x80808080,  r0x01010101, LSL #7   ;// 0x80808080
534        ORR      tVal6,  Left0,  Left1,  LSL #16     ;// tVal6 = 00 L1 00 L0
535        ORR      tVal7,  Left1,  Left2,  LSL #16     ;// tVal7 = 00 L2 00 L1
536        ORR      tVal11, Left2,  Left3,  LSL #16     ;// tVal11= 00 L3 00 L2
537        MUL      Out3,   Left3,  r0x01010101         ;// Out3  = L3 L3 L3 L3
538        MVN      tVal8,  tVal7                       ;// tVal8 = 00 L2'00 L1'
539        MVN      tVal10, tVal11                      ;// tVal10= 00 L3'00 L2'
540        UHADD8   tVal4,  tVal6,  tVal11              ;// tVal4 = 00 g3 00 g2
541        UXTB16   tVal12, Out3                        ;// tVal12= 00 L3 00 L3
542        UHSUB8   tVal4,  tVal4,  tVal8               ;// tVal4 = 00 h3 00 h2
543        UHSUB8   tVal6,  tVal6,  tVal8               ;// tVal6 = 00 d2 00 d1
544        UHSUB8   tVal11, tVal11, tVal8               ;// tVal11= 00 d3 00 d2
545        UHADD8   tVal12, tVal12, tVal7               ;// tVal12= 00 g4 00 g3
546        UADD8    tVal4,  tVal4,  r0x80808080         ;// tVal4 = 00 i3 00 i2
547        UHSUB8   tVal12, tVal12, tVal10              ;// tVal12= 00 h4 00 h3
548        UADD8    tVal8,  tVal6,  r0x80808080         ;// tVal8 = 00 e2 00 e1
549        UADD8    tVal11, tVal11, r0x80808080         ;// tVal11= 00 e3 00 e2
550        UADD8    tVal12, tVal12, r0x80808080         ;// tVal12= 00 i4 00 i3
551        ORR      Out0,   tVal8,  tVal4,  LSL #8      ;// Out0  = i3 e2 i2 e1
552        ORR      Out1,   tVal11, tVal12, LSL #8      ;// Out1  = i4 e3 i3 e2
553        M_STR    Out0,   [pDst], dstStep             ;// store {Out0}  at pDst [0  to 3 ]
554        PKHTB    Out2,   Out3,   Out1,   ASR #16     ;// Out2  = L3 L3 i4 e3
555        M_STR    Out1,   [pDst], dstStep             ;// store {Out1}  at pDst [4  to 7 ]
556        M_STR    Out2,   [pDst], dstStep             ;// store {Out2}  at pDst [8  to 11]
557        STR      Out3,   [pDst]                      ;// store {Out3}  at pDst [12 to 15]
558        MOV      return,  #OMX_Sts_NoErr
559        M_END
560
561        ENDIF ;// ARM1136JS
562
563
564        END
565;//-----------------------------------------------------------------------------------------------
566;// omxVCM4P10_PredictIntra_4x4 ends
567;//-----------------------------------------------------------------------------------------------
568