10c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
20c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
30c1bc742181ded4930842b46e9507372f0b1b963James Dong;// File Name:  omxVCM4P10_PredictIntra_4x4_s.s
40c1bc742181ded4930842b46e9507372f0b1b963James Dong;// OpenMAX DL: v1.0.2
50c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Revision:   9641
60c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Date:       Thursday, February 7, 2008
70c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
80c1bc742181ded4930842b46e9507372f0b1b963James Dong;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
90c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
100c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
110c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
120c1bc742181ded4930842b46e9507372f0b1b963James Dong
130c1bc742181ded4930842b46e9507372f0b1b963James Dong
140c1bc742181ded4930842b46e9507372f0b1b963James Dong        INCLUDE omxtypes_s.h
150c1bc742181ded4930842b46e9507372f0b1b963James Dong        INCLUDE armCOMM_s.h
160c1bc742181ded4930842b46e9507372f0b1b963James Dong
170c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Define the processor variants supported by this file
180c1bc742181ded4930842b46e9507372f0b1b963James Dong
190c1bc742181ded4930842b46e9507372f0b1b963James Dong         M_VARIANTS ARM1136JS
200c1bc742181ded4930842b46e9507372f0b1b963James Dong
210c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-------------------------------------------------------
220c1bc742181ded4930842b46e9507372f0b1b963James Dong;// This table for implementing switch case of C in asm by
230c1bc742181ded4930842b46e9507372f0b1b963James Dong;// the mehtod of two levels of indexing.
240c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-------------------------------------------------------
250c1bc742181ded4930842b46e9507372f0b1b963James Dong
260c1bc742181ded4930842b46e9507372f0b1b963James Dong    M_TABLE armVCM4P10_pSwitchTable4x4
270c1bc742181ded4930842b46e9507372f0b1b963James Dong    DCD  OMX_VC_4x4_VERT,     OMX_VC_4x4_HOR
280c1bc742181ded4930842b46e9507372f0b1b963James Dong    DCD  OMX_VC_4x4_DC,       OMX_VC_4x4_DIAG_DL
290c1bc742181ded4930842b46e9507372f0b1b963James Dong    DCD  OMX_VC_4x4_DIAG_DR,  OMX_VC_4x4_VR
300c1bc742181ded4930842b46e9507372f0b1b963James Dong    DCD  OMX_VC_4x4_HD,       OMX_VC_4x4_VL
310c1bc742181ded4930842b46e9507372f0b1b963James Dong    DCD  OMX_VC_4x4_HU
320c1bc742181ded4930842b46e9507372f0b1b963James Dong
330c1bc742181ded4930842b46e9507372f0b1b963James Dong    IF ARM1136JS
340c1bc742181ded4930842b46e9507372f0b1b963James Dong
350c1bc742181ded4930842b46e9507372f0b1b963James Dong;//--------------------------------------------
360c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Constants
370c1bc742181ded4930842b46e9507372f0b1b963James Dong;//--------------------------------------------
380c1bc742181ded4930842b46e9507372f0b1b963James DongBLK_SIZE              EQU 0x8
390c1bc742181ded4930842b46e9507372f0b1b963James DongMUL_CONST0            EQU 0x01010101
400c1bc742181ded4930842b46e9507372f0b1b963James DongADD_CONST1            EQU 0x80808080
410c1bc742181ded4930842b46e9507372f0b1b963James Dong
420c1bc742181ded4930842b46e9507372f0b1b963James Dong;//--------------------------------------------
430c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Scratch variable
440c1bc742181ded4930842b46e9507372f0b1b963James Dong;//--------------------------------------------
450c1bc742181ded4930842b46e9507372f0b1b963James Dongreturn          RN 0
460c1bc742181ded4930842b46e9507372f0b1b963James DongpTable          RN 9
470c1bc742181ded4930842b46e9507372f0b1b963James Dongpc              RN 15
480c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x01010101     RN 1
490c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x80808080     RN 0
500c1bc742181ded4930842b46e9507372f0b1b963James Dong
510c1bc742181ded4930842b46e9507372f0b1b963James DongtVal0           RN 0
520c1bc742181ded4930842b46e9507372f0b1b963James DongtVal1           RN 1
530c1bc742181ded4930842b46e9507372f0b1b963James DongtVal2           RN 2
540c1bc742181ded4930842b46e9507372f0b1b963James DongtVal4           RN 4
550c1bc742181ded4930842b46e9507372f0b1b963James DongtVal6           RN 6
560c1bc742181ded4930842b46e9507372f0b1b963James DongtVal7           RN 7
570c1bc742181ded4930842b46e9507372f0b1b963James DongtVal8           RN 8
580c1bc742181ded4930842b46e9507372f0b1b963James DongtVal9           RN 9
590c1bc742181ded4930842b46e9507372f0b1b963James DongtVal10          RN 10
600c1bc742181ded4930842b46e9507372f0b1b963James DongtVal11          RN 11
610c1bc742181ded4930842b46e9507372f0b1b963James DongtVal12          RN 12
620c1bc742181ded4930842b46e9507372f0b1b963James DongtVal14          RN 14
630c1bc742181ded4930842b46e9507372f0b1b963James Dong
640c1bc742181ded4930842b46e9507372f0b1b963James DongOut0            RN 6
650c1bc742181ded4930842b46e9507372f0b1b963James DongOut1            RN 7
660c1bc742181ded4930842b46e9507372f0b1b963James DongOut2            RN 8
670c1bc742181ded4930842b46e9507372f0b1b963James DongOut3            RN 9
680c1bc742181ded4930842b46e9507372f0b1b963James Dong
690c1bc742181ded4930842b46e9507372f0b1b963James DongLeft0           RN 6
700c1bc742181ded4930842b46e9507372f0b1b963James DongLeft1           RN 7
710c1bc742181ded4930842b46e9507372f0b1b963James DongLeft2           RN 8
720c1bc742181ded4930842b46e9507372f0b1b963James DongLeft3           RN 9
730c1bc742181ded4930842b46e9507372f0b1b963James Dong
740c1bc742181ded4930842b46e9507372f0b1b963James DongAbove0123       RN 12
750c1bc742181ded4930842b46e9507372f0b1b963James DongAbove4567       RN 14
760c1bc742181ded4930842b46e9507372f0b1b963James Dong
770c1bc742181ded4930842b46e9507372f0b1b963James DongAboveLeft       RN 10
780c1bc742181ded4930842b46e9507372f0b1b963James Dong
790c1bc742181ded4930842b46e9507372f0b1b963James Dong;//--------------------------------------------
800c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare input registers
810c1bc742181ded4930842b46e9507372f0b1b963James Dong;//--------------------------------------------
820c1bc742181ded4930842b46e9507372f0b1b963James DongpSrcLeft        RN 0    ;// input pointer
830c1bc742181ded4930842b46e9507372f0b1b963James DongpSrcAbove       RN 1    ;// input pointer
840c1bc742181ded4930842b46e9507372f0b1b963James DongpSrcAboveLeft   RN 2    ;// input pointer
850c1bc742181ded4930842b46e9507372f0b1b963James DongpDst            RN 3    ;// output pointer
860c1bc742181ded4930842b46e9507372f0b1b963James DongleftStep        RN 4    ;// input variable
870c1bc742181ded4930842b46e9507372f0b1b963James DongdstStep         RN 5    ;// input variable
880c1bc742181ded4930842b46e9507372f0b1b963James DongpredMode        RN 6    ;// input variable
890c1bc742181ded4930842b46e9507372f0b1b963James Dongavailability    RN 7    ;// input variable
900c1bc742181ded4930842b46e9507372f0b1b963James Dong
910c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-----------------------------------------------------------------------------------------------
920c1bc742181ded4930842b46e9507372f0b1b963James Dong;// omxVCM4P10_PredictIntra_4x4 starts
930c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-----------------------------------------------------------------------------------------------
940c1bc742181ded4930842b46e9507372f0b1b963James Dong
950c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Write function header
960c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_START omxVCM4P10_PredictIntra_4x4, r11
970c1bc742181ded4930842b46e9507372f0b1b963James Dong
980c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Define stack arguments
990c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ARG    LeftStep,     4
1000c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ARG    DstStep,      4
1010c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ARG    PredMode,     4
1020c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ARG    Availability, 4
1030c1bc742181ded4930842b46e9507372f0b1b963James Dong
1040c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=4
1050c1bc742181ded4930842b46e9507372f0b1b963James Dong
1060c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR      pTable,=armVCM4P10_pSwitchTable4x4  ;// Load index table for switch case
1070c1bc742181ded4930842b46e9507372f0b1b963James Dong
1080c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Load argument from the stack
1090c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR    predMode, PredMode                  ;// Arg predMode loaded from stack to reg
1100c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR    leftStep, LeftStep                  ;// Arg leftStep loaded from stack to reg
1110c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR    dstStep,  DstStep                   ;// Arg dstStep loaded from stack to reg
1120c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR    availability, Availability          ;// Arg availability loaded from stack to reg
1130c1bc742181ded4930842b46e9507372f0b1b963James Dong
1140c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR      pc, [pTable, predMode, LSL #2]      ;// Branch to the case based on preMode
1150c1bc742181ded4930842b46e9507372f0b1b963James Dong
1160c1bc742181ded4930842b46e9507372f0b1b963James DongOMX_VC_4x4_VERT
1170c1bc742181ded4930842b46e9507372f0b1b963James Dong
1180c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR      Above0123,  [pSrcAbove]             ;// Above0123 = pSrcAbove[0 to 3]
1190c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    Above0123,  [pDst],  dstStep        ;// pDst[0  to 3]  = Above0123
1200c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    Above0123,  [pDst],  dstStep        ;// pDst[4  to 7]  = Above0123
1210c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    Above0123,  [pDst],  dstStep        ;// pDst[8  to 11] = Above0123
1220c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      Above0123,  [pDst]                  ;// pDst[12 to 15] = Above0123
1230c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      return, #OMX_Sts_NoErr
1240c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_EXIT                                      ;// Macro to exit midway-break frm case
1250c1bc742181ded4930842b46e9507372f0b1b963James Dong
1260c1bc742181ded4930842b46e9507372f0b1b963James DongOMX_VC_4x4_HOR
1270c1bc742181ded4930842b46e9507372f0b1b963James Dong
1280c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=6
1290c1bc742181ded4930842b46e9507372f0b1b963James Dong
1300c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR      r0x01010101,  =MUL_CONST0           ;// Const to repeat the byte in reg 4 times
1310c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   Left0,  [pSrcLeft],  leftStep       ;// Left0 = pSrcLeft[0]
1320c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   Left1,  [pSrcLeft],  leftStep       ;// Left1 = pSrcLeft[1]
1330c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   Left2,  [pSrcLeft],  leftStep       ;// Left2 = pSrcLeft[2]
1340c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB     Left3,  [pSrcLeft]                  ;// Left3 = pSrcLeft[3]
1350c1bc742181ded4930842b46e9507372f0b1b963James Dong        MUL      Out0,   Left0,   r0x01010101        ;// replicate the val in all the bytes
1360c1bc742181ded4930842b46e9507372f0b1b963James Dong        MUL      Out1,   Left1,   r0x01010101        ;// replicate the val in all the bytes
1370c1bc742181ded4930842b46e9507372f0b1b963James Dong        MUL      Out2,   Left2,   r0x01010101        ;// replicate the val in all the bytes
1380c1bc742181ded4930842b46e9507372f0b1b963James Dong        MUL      Out3,   Left3,   r0x01010101        ;// replicate the val in all the bytes
1390c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [0  to 3 ]
1400c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    Out1,   [pDst],  dstStep            ;// store {Out1} at pDst [4  to 7 ]
1410c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    Out2,   [pDst],  dstStep            ;// store {Out2} at pDst [8  to 11]
1420c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      Out3,   [pDst]                      ;// store {Out3} at pDst [12 to 15]
1430c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      return, #OMX_Sts_NoErr
1440c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_EXIT                                       ;// Macro to exit midway-break frm case
1450c1bc742181ded4930842b46e9507372f0b1b963James Dong
1460c1bc742181ded4930842b46e9507372f0b1b963James DongOMX_VC_4x4_DC
1470c1bc742181ded4930842b46e9507372f0b1b963James Dong
1480c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=6
1490c1bc742181ded4930842b46e9507372f0b1b963James Dong
1500c1bc742181ded4930842b46e9507372f0b1b963James Dong        AND      availability,  availability,  #(OMX_VC_UPPER + OMX_VC_LEFT)
1510c1bc742181ded4930842b46e9507372f0b1b963James Dong        CMP      availability,  #(OMX_VC_UPPER + OMX_VC_LEFT)
1520c1bc742181ded4930842b46e9507372f0b1b963James Dong        BNE      UpperOrLeftOrNoneAvailable          ;// Jump to Upper if not both
1530c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR      Above0123,  [pSrcAbove]             ;// Above0123  = pSrcAbove[0 to 3]
1540c1bc742181ded4930842b46e9507372f0b1b963James Dong
1550c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=1
1560c1bc742181ded4930842b46e9507372f0b1b963James Dong
1570c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTB16   tVal7,  Above0123                   ;// pSrcAbove[0, 2]
1580c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTB16   tVal6,  Above0123,  ROR #8          ;// pSrcAbove[1, 3]
1590c1bc742181ded4930842b46e9507372f0b1b963James Dong        UADD16   tVal11, tVal6,   tVal7              ;// pSrcAbove[0, 2] + pSrcAbove[1, 3]
1600c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   Left0,  [pSrcLeft],  leftStep       ;// Left0 = pSrcLeft[0]
1610c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   Left1,  [pSrcLeft],  leftStep       ;// Left1 = pSrcLeft[1]
1620c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal11, tVal11,  LSR #16            ;// sum(pSrcAbove[0] to pSrcAbove[3])
1630c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   Left2,  [pSrcLeft],  leftStep       ;// Left2 = pSrcLeft[2]
1640c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB     Left3,  [pSrcLeft]                  ;// Left3 = pSrcLeft[3]
1650c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTH     tVal11, tVal11                      ;// upsum1 (Clear the top junk bits)
1660c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal6,  Left0,  Left1               ;// tVal6 = Left0 + Left1
1670c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal7,  Left2,  Left3               ;// tVal7 = Left2 + Left3
1680c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal6,  tVal6,  tVal7               ;// tVal6 = tVal6 + tVal7
1690c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      Out0,   tVal6,  tVal11              ;// Out0  = tVal6 + tVal11
1700c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      Out0,   Out0,   #4                  ;// Out0  = Out0 + 4
1710c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR      r0x01010101,   =MUL_CONST0          ;// 0x01010101
1720c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      Out0,   Out0,  LSR #3               ;// Out0 = (Out0 + 4)>>3
1730c1bc742181ded4930842b46e9507372f0b1b963James Dong
1740c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=1
1750c1bc742181ded4930842b46e9507372f0b1b963James Dong
1760c1bc742181ded4930842b46e9507372f0b1b963James Dong        MUL      Out0,   Out0,  r0x01010101          ;// replicate the val in all the bytes
1770c1bc742181ded4930842b46e9507372f0b1b963James Dong
1780c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=1
1790c1bc742181ded4930842b46e9507372f0b1b963James Dong
1800c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      return,  #OMX_Sts_NoErr
1810c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [0  to 3 ]
1820c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [4  to 7 ]
1830c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [8  to 11]
1840c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      Out0,   [pDst]                      ;// store {Out0} at pDst [12 to 15]
1850c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_EXIT                                       ;// Macro to exit midway-break frm case
1860c1bc742181ded4930842b46e9507372f0b1b963James Dong
1870c1bc742181ded4930842b46e9507372f0b1b963James DongUpperOrLeftOrNoneAvailable
1880c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=3
1890c1bc742181ded4930842b46e9507372f0b1b963James Dong
1900c1bc742181ded4930842b46e9507372f0b1b963James Dong        CMP      availability,  #OMX_VC_UPPER        ;// if(availability & #OMX_VC_UPPER)
1910c1bc742181ded4930842b46e9507372f0b1b963James Dong        BNE      LeftOrNoneAvailable                 ;// Jump to Left if not upper
1920c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR      Above0123,  [pSrcAbove]             ;// Above0123  = pSrcAbove[0 to 3]
1930c1bc742181ded4930842b46e9507372f0b1b963James Dong
1940c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=3
1950c1bc742181ded4930842b46e9507372f0b1b963James Dong
1960c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTB16   tVal7,  Above0123                   ;// pSrcAbove[0, 2]
1970c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTB16   tVal6,  Above0123,  ROR #8          ;// pSrcAbove[1, 3]
1980c1bc742181ded4930842b46e9507372f0b1b963James Dong        UADD16   Out0,   tVal6,  tVal7               ;// pSrcAbove[0, 2] + pSrcAbove[1, 3]
1990c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR      r0x01010101,   =MUL_CONST0          ;// 0x01010101
2000c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      Out0,   Out0,   LSR #16             ;// sum(pSrcAbove[0] to pSrcAbove[3])
2010c1bc742181ded4930842b46e9507372f0b1b963James Dong
2020c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=1
2030c1bc742181ded4930842b46e9507372f0b1b963James Dong
2040c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTH     Out0,   Out0                        ;// upsum1 (Clear the top junk bits)
2050c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      Out0,   Out0,   #2                  ;// Out0  = Out0 + 2
2060c1bc742181ded4930842b46e9507372f0b1b963James Dong
2070c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=1
2080c1bc742181ded4930842b46e9507372f0b1b963James Dong
2090c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      Out0,   Out0,   LSR #2              ;// Out0  = (Out0 + 2)>>2
2100c1bc742181ded4930842b46e9507372f0b1b963James Dong
2110c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=1
2120c1bc742181ded4930842b46e9507372f0b1b963James Dong
2130c1bc742181ded4930842b46e9507372f0b1b963James Dong        MUL      Out0,   Out0,   r0x01010101         ;// replicate the val in all the bytes
2140c1bc742181ded4930842b46e9507372f0b1b963James Dong
2150c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=1
2160c1bc742181ded4930842b46e9507372f0b1b963James Dong
2170c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      return, #OMX_Sts_NoErr
2180c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    Out0,   [pDst],  dstStep            ;// store {tVal6} at pDst [0  to 3 ]
2190c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    Out0,   [pDst],  dstStep            ;// store {tVal6} at pDst [4  to 7 ]
2200c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    Out0,   [pDst],  dstStep            ;// store {tVal6} at pDst [8  to 11]
2210c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      Out0,   [pDst]                      ;// store {tVal6} at pDst [12 to 15]
2220c1bc742181ded4930842b46e9507372f0b1b963James Dong
2230c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_EXIT                                       ;// Macro to exit midway-break frm case
2240c1bc742181ded4930842b46e9507372f0b1b963James Dong
2250c1bc742181ded4930842b46e9507372f0b1b963James DongLeftOrNoneAvailable
2260c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=3
2270c1bc742181ded4930842b46e9507372f0b1b963James Dong
2280c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR      r0x01010101,   =MUL_CONST0          ;// 0x01010101
2290c1bc742181ded4930842b46e9507372f0b1b963James Dong        CMP      availability, #OMX_VC_LEFT
2300c1bc742181ded4930842b46e9507372f0b1b963James Dong        BNE      NoneAvailable
2310c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   Left0,  [pSrcLeft],  leftStep       ;// Left0 = pSrcLeft[0]
2320c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   Left1,  [pSrcLeft],  leftStep       ;// Left1 = pSrcLeft[1]
2330c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   Left2,  [pSrcLeft],  leftStep       ;// Left2 = pSrcLeft[2]
2340c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB     Left3,  [pSrcLeft]                  ;// Left3 = pSrcLeft[3]
2350c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      Out0,   Left0,  Left1               ;// Out0  = Left0 + Left1
2360c1bc742181ded4930842b46e9507372f0b1b963James Dong
2370c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=1
2380c1bc742181ded4930842b46e9507372f0b1b963James Dong
2390c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      Out1,   Left2,  Left3               ;// Out1  = Left2 + Left3
2400c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      Out0,   Out0,   Out1                ;// Out0  = Out0  + Out1
2410c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      Out0,   Out0,   #2                  ;// Out0  = Out0 + 2
2420c1bc742181ded4930842b46e9507372f0b1b963James Dong
2430c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=1
2440c1bc742181ded4930842b46e9507372f0b1b963James Dong
2450c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      Out0,   Out0,   LSR #2              ;// Out0  = (Out0 + 2)>>2
2460c1bc742181ded4930842b46e9507372f0b1b963James Dong
2470c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=1
2480c1bc742181ded4930842b46e9507372f0b1b963James Dong
2490c1bc742181ded4930842b46e9507372f0b1b963James Dong        MUL      Out0,   Out0,   r0x01010101         ;// replicate the val in all the bytes
2500c1bc742181ded4930842b46e9507372f0b1b963James Dong
2510c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=1
2520c1bc742181ded4930842b46e9507372f0b1b963James Dong
2530c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      return, #OMX_Sts_NoErr
2540c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [0  to 3 ]
2550c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [4  to 7 ]
2560c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [8  to 11]
2570c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      Out0,   [pDst]                      ;// store {Out0} at pDst [12 to 15]
2580c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_EXIT                                       ;// Macro to exit midway-break frm case
2590c1bc742181ded4930842b46e9507372f0b1b963James Dong
2600c1bc742181ded4930842b46e9507372f0b1b963James DongNoneAvailable
2610c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      Out0,   #128                        ;// Out0 = 128 if(count == 0)
2620c1bc742181ded4930842b46e9507372f0b1b963James Dong
2630c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=5
2640c1bc742181ded4930842b46e9507372f0b1b963James Dong
2650c1bc742181ded4930842b46e9507372f0b1b963James Dong        MUL      Out0,   Out0,  r0x01010101          ;// replicate the val in all the bytes
2660c1bc742181ded4930842b46e9507372f0b1b963James Dong
2670c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=1
2680c1bc742181ded4930842b46e9507372f0b1b963James Dong
2690c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      return, #OMX_Sts_NoErr
2700c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [0  to 3 ]
2710c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [4  to 7 ]
2720c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [8  to 11]
2730c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      Out0,   [pDst]                      ;// store {Out0} at pDst [12 to 15]
2740c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_EXIT                                       ;// Macro to exit midway-break frm case
2750c1bc742181ded4930842b46e9507372f0b1b963James Dong
2760c1bc742181ded4930842b46e9507372f0b1b963James DongOMX_VC_4x4_DIAG_DL
2770c1bc742181ded4930842b46e9507372f0b1b963James Dong
2780c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//------------------------------------------------------------------
2790c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// f = (a+2*b+c+2)>>2
2800c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Calculate as:
2810c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// d = (a + c )>>1
2820c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// e = (d - b')>>1
2830c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// f = e + 128
2840c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//------------------------------------------------------------------
2850c1bc742181ded4930842b46e9507372f0b1b963James Dong
2860c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=3
2870c1bc742181ded4930842b46e9507372f0b1b963James Dong
2880c1bc742181ded4930842b46e9507372f0b1b963James Dong        TST      availability, #OMX_VC_UPPER_RIGHT
2890c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDMIA    pSrcAbove,  {Above0123, Above4567}  ;// Above0123, Above4567 = pSrcAbove[0 to 7]
2900c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR      r0x80808080,  =ADD_CONST1           ;// 0x80808080
2910c1bc742181ded4930842b46e9507372f0b1b963James Dong        BNE      DLUpperRightAvailable
2920c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR      r0x01010101,  =MUL_CONST0           ;// 0x01010101
2930c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      tVal7,  Above0123,  LSR #24         ;// {00,  00,  00,  U3 }
2940c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      tVal11, tVal7,  LSL #24             ;// {U3,  00,  00,  00 }
2950c1bc742181ded4930842b46e9507372f0b1b963James Dong        MUL      Out3,   tVal7,  r0x01010101         ;// {U3,  U3,  U3,  U3 }
2960c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      tVal8,  Above0123,  LSR #16         ;// {00,  00,  U3,  U2 }
2970c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      tVal10, Above0123,  LSR #8          ;// {00,  U3,  U2,  U1 }
2980c1bc742181ded4930842b46e9507372f0b1b963James Dong        MVN      tVal10, tVal10                      ;// {00', U3', U2', U1'}
2990c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHADD8   tVal8,  tVal8,  Above0123           ;// {xx,  xx,  d1,  d0 }
3000c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHADD8   tVal6,  Above0123,  tVal9           ;// {xx,  d2,  xx,  xx }
3010c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHSUB8   tVal8,  tVal8,  tVal10              ;// {xx,  xx,  e1,  e0 }
3020c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHSUB8   tVal6,  tVal6,  tVal10              ;// {xx,  e2,  xx,  xx }
3030c1bc742181ded4930842b46e9507372f0b1b963James Dong        UADD8    tVal8,  tVal8,  r0x80808080         ;// {xx,  xx,  f1,  f0 }
3040c1bc742181ded4930842b46e9507372f0b1b963James Dong        UADD8    tVal6,  tVal6,  r0x80808080         ;// {xx,  f2,  xx,  xx }
3050c1bc742181ded4930842b46e9507372f0b1b963James Dong
3060c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=1
3070c1bc742181ded4930842b46e9507372f0b1b963James Dong
3080c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHBT    tVal6,  tVal8,  tVal6               ;// {xx,  f2,  f1,  f0 }
3090c1bc742181ded4930842b46e9507372f0b1b963James Dong        BIC      tVal6,  tVal6,  #0xFF000000         ;// {00,  f2,  f1,  f0 }
3100c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      Out0,   tVal6,  tVal11              ;// {U3,  f2,  f1,  f0 }
3110c1bc742181ded4930842b46e9507372f0b1b963James Dong
3120c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=1
3130c1bc742181ded4930842b46e9507372f0b1b963James Dong
3140c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHTB    Out1,   Out3,   Out0,  ASR #8       ;// {U3,  U3,  f2,  f1 }
3150c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      return, #OMX_Sts_NoErr
3160c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHTB    Out2,   Out3,   Out1,  ASR #8       ;// {U3,  U3,  U3,  f2 }
3170c1bc742181ded4930842b46e9507372f0b1b963James Dong
3180c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    Out0,   [pDst], dstStep             ;// store {f3 to f0} at pDst[3  to 0 ]
3190c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    Out1,   [pDst], dstStep             ;// store {f4 to f1} at pDst[7  to 4 ]
3200c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    Out2,   [pDst], dstStep             ;// store {f5 to f2} at pDst[11 to 8 ]
3210c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      Out3,   [pDst]                      ;// store {f6 to f3} at pDSt[15 to 12]
3220c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_EXIT                                       ;// Macro to exit midway-break frm case
3230c1bc742181ded4930842b46e9507372f0b1b963James Dong
3240c1bc742181ded4930842b46e9507372f0b1b963James DongDLUpperRightAvailable
3250c1bc742181ded4930842b46e9507372f0b1b963James Dong
3260c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      tVal8,  Above0123,  LSR #24         ;// {00,  00,  00,  U3 }
3270c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      tVal9,  Above0123,  LSR #16         ;// {00,  00,  U3,  U2 }
3280c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      tVal10, Above0123,  LSR #8          ;// {00,  U3,  U2,  U1 }
3290c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      tVal8,  tVal8,  Above4567, LSL #8   ;// {U6,  U5,  U4,  U3 }
3300c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      tVal10, tVal10, Above4567, LSL #24  ;// {U4,  U3,  U2,  U1 }
3310c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHBT    tVal9,  tVal9,  Above4567, LSL #16  ;// {U5,  U4,  U3,  U2 }
3320c1bc742181ded4930842b46e9507372f0b1b963James Dong        MVN      tVal1,  tVal8                       ;// {U6', U5', U4', U3'}
3330c1bc742181ded4930842b46e9507372f0b1b963James Dong        MVN      tVal10, tVal10                      ;// {U4', U3', U2', U1'}
3340c1bc742181ded4930842b46e9507372f0b1b963James Dong        MVN      tVal2,  Above4567                   ;// {U7', U6', U5', U4'}
3350c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHADD8   tVal6,  Above0123,  tVal9           ;// {d3,  d2,  d1,  d0 }
3360c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHADD8   tVal9,  tVal9,  Above4567           ;// {d5,  d4,  d3,  d2 }
3370c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHADD8   tVal8,  Above4567,  tVal8           ;// {d6,  xx,  xx,  xx }
3380c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHSUB8   tVal6,  tVal6,  tVal10              ;// {e3,  e2,  e1,  e0 }
3390c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHSUB8   tVal12, tVal9,  tVal1               ;// {e5,  e4,  e3,  e2 }
3400c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHSUB8   tVal8,  tVal8,  tVal2               ;// {e6,  xx,  xx,  xx }
3410c1bc742181ded4930842b46e9507372f0b1b963James Dong        UADD8    Out0,   tVal6,  r0x80808080         ;// {f3,  f2,  f1,  f0 }
3420c1bc742181ded4930842b46e9507372f0b1b963James Dong        UADD8    tVal9,  tVal8,  r0x80808080         ;// {f6,  xx,  xx,  xx }
3430c1bc742181ded4930842b46e9507372f0b1b963James Dong        UADD8    Out2,   tVal12, r0x80808080         ;// {f5,  f4,  f3,  f2 }
3440c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      tVal7,  Out0,   LSR #8              ;// {00,  f3,  f2,  f1 }
3450c1bc742181ded4930842b46e9507372f0b1b963James Dong        AND      tVal9,  tVal9,  #0xFF000000         ;// {f6,  00,  00,  00 }
3460c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHBT    Out1,   tVal7,  Out2,  LSL #8       ;// {f4,  f3,  f2,  f1 }
3470c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      Out3,   tVal9,  Out2,  LSR #8       ;// {f6,  f5,  f4,  f3 }
3480c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    Out0,   [pDst], dstStep             ;// store {f3 to f0} at pDst[3  to 0 ]
3490c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    Out1,   [pDst], dstStep             ;// store {f4 to f1} at pDst[7  to 4 ]
3500c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    Out2,   [pDst], dstStep             ;// store {f5 to f2} at pDst[11 to 8 ]
3510c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      Out3,   [pDst]                      ;// store {f6 to f3} at pDSt[15 to 12]
3520c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      return, #OMX_Sts_NoErr
3530c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_EXIT                                       ;// Macro to exit midway-break frm case
3540c1bc742181ded4930842b46e9507372f0b1b963James Dong
3550c1bc742181ded4930842b46e9507372f0b1b963James Dong
3560c1bc742181ded4930842b46e9507372f0b1b963James DongOMX_VC_4x4_DIAG_DR
3570c1bc742181ded4930842b46e9507372f0b1b963James Dong
3580c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=4
3590c1bc742181ded4930842b46e9507372f0b1b963James Dong
3600c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   Left0,  [pSrcLeft],  leftStep       ;// Left0 = pSrcLeft[0]
3610c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   Left1,  [pSrcLeft],  leftStep       ;// Left1 = pSrcLeft[1]
3620c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   Left2,  [pSrcLeft],  leftStep       ;// Left2 = pSrcLeft[2]
3630c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB     Left3,  [pSrcLeft]                  ;// Left3 = pSrcLeft[3]
3640c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB     AboveLeft, [pSrcAboveLeft]          ;// AboveLeft = pSrcAboveLeft[0]
3650c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      tVal7,  Left1,  Left0,  LSL #8      ;// tVal7 = 00 00 L0 L1
3660c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR      Above0123,  [pSrcAbove]             ;// Above0123 = U3 U2 U1 U0
3670c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR      r0x80808080, =ADD_CONST1            ;// 0x80808080
3680c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      tVal8,  Left3,  Left2,  LSL #8      ;// tVal8 = 00 00 L2 L3
3690c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHBT    tVal7,  tVal8,  tVal7,  LSL #16     ;// tVal7 = L0 L1 L2 L3
3700c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      tVal8,  Above0123,  LSL #8          ;// tVal8 = U2 U1 U0 00
3710c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      tVal9,  tVal7,  LSR #8              ;// tVal9 = 00 L0 L1 L2
3720c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      tVal8,  tVal8,  AboveLeft           ;// tVal8 = U2 U1 U0 UL
3730c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      tVal9,  tVal9,  AboveLeft, LSL #24  ;// tVal9 = UL L0 L1 L2
3740c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      tVal10, Above0123,  LSL #24         ;// tVal10= U0 00 00 00
3750c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTB     tVal11, tVal7,  ROR #24             ;// tVal11= 00 00 00 L0
3760c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      tVal10, tVal10, tVal9,  LSR #8      ;// tVal10= U0 UL L0 L1
3770c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      tVal11, tVal11, tVal8,  LSL #8      ;// tVal11= U1 U0 UL L0
3780c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHADD8   tVal11, Above0123,  tVal11          ;// tVal11= d1 d0 dL g0
3790c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHADD8   tVal10, tVal7,  tVal10              ;// tVal10= g0 g1 g2 g3
3800c1bc742181ded4930842b46e9507372f0b1b963James Dong        MVN      tVal8,  tVal8                       ;// tVal8 = U2'U1'U0'UL'
3810c1bc742181ded4930842b46e9507372f0b1b963James Dong        MVN      tVal9,  tVal9                       ;// tVal9 = UL'L0'L1'L2'
3820c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHSUB8   tVal11, tVal11, tVal8               ;// tVal11= e1 e0 eL h0
3830c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHSUB8   tVal10, tVal10, tVal9               ;// tVal10= h0 h1 h2 h3
3840c1bc742181ded4930842b46e9507372f0b1b963James Dong        UADD8    Out3,   tVal10, r0x80808080         ;// Out3  = i0 i1 i2 i3
3850c1bc742181ded4930842b46e9507372f0b1b963James Dong        UADD8    Out0,   tVal11, r0x80808080         ;// Out0  = f1 f0 fL i0
3860c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTH     tVal11, Out3,   ROR #8              ;// tVal11= 00 00 i1 i2
3870c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      tVal7,  Out0,   LSL #8              ;// tVal7 = f0 fL i0 00
3880c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      Out1,   tVal7,  tVal11,  LSR #8     ;// Out1  = f0 fL i0 i1
3890c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHBT    Out2,   tVal11, Out0,    LSL #16    ;// Out2  = fL i0 i1 i2
3900c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    Out0,   [pDst], dstStep             ;// store {f1 to i0} at pDst[3  to 0 ]
3910c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    Out1,   [pDst], dstStep             ;// store {f0 to i1} at pDst[7  to 4 ]
3920c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    Out2,   [pDst], dstStep             ;// store {fL to i2} at pDst[11 to 8 ]
3930c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      Out3,   [pDst]                      ;// store {i0 to i3} at pDst[15 to 12]
3940c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      return,  #OMX_Sts_NoErr
3950c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_EXIT                                       ;// Macro to exit midway-break frm case
3960c1bc742181ded4930842b46e9507372f0b1b963James Dong
3970c1bc742181ded4930842b46e9507372f0b1b963James DongOMX_VC_4x4_VR
3980c1bc742181ded4930842b46e9507372f0b1b963James Dong
3990c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=4
4000c1bc742181ded4930842b46e9507372f0b1b963James Dong
4010c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR      Above0123,  [pSrcAbove]             ;// Above0123 = U3 U2 U1 U0
4020c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB     AboveLeft,  [pSrcAboveLeft]         ;// AboveLeft = 00 00 00 UL
4030c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   Left0,  [pSrcLeft],  leftStep       ;// Left0     = 00 00 00 L0
4040c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   Left1,  [pSrcLeft],  leftStep       ;// Left1     = 00 00 00 L1
4050c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB     Left2,  [pSrcLeft]                  ;// Left2     = 00 00 00 L2
4060c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      tVal0,  Above0123,  LSL #8          ;// tVal0     = U2 U1 U0 00
4070c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      tVal9,  Above0123                   ;// tVal9     = U3 U2 U1 U0
4080c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      tVal14, tVal0,   AboveLeft          ;// tVal14    = U2 U1 U0 UL
4090c1bc742181ded4930842b46e9507372f0b1b963James Dong        MVN      tVal11, tVal14                      ;// tVal11    = U2'U1'U0'UL'
4100c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      tVal2,  tVal14,  LSL #8             ;// tVal2     = U1 U0 UL 00
4110c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHSUB8   tVal1,  Above0123,  tVal11          ;// tVal1     = d2 d1 d0 dL
4120c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHADD8   tVal10, AboveLeft, Left1            ;// tVal10    = 00 00 00 j1
4130c1bc742181ded4930842b46e9507372f0b1b963James Dong        MVN      tVal4,  Left0                       ;// tVal4     = 00 00 00 L0'
4140c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHSUB8   tVal4,  tVal10,  tVal4              ;// tVal4     = 00 00 00 k1
4150c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      tVal12, tVal0,   Left0              ;// tVal12    = U2 U1 U0 L0
4160c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      tVal14, tVal2,   Left0              ;// tVal14    = U1 U0 UL L0
4170c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR      r0x80808080,  =ADD_CONST1           ;// 0x80808080
4180c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHADD8   tVal10, tVal9,   tVal14             ;// tVal10    = g3 g2 g1 g0
4190c1bc742181ded4930842b46e9507372f0b1b963James Dong        UADD8    Out0,   tVal1,   r0x80808080        ;// Out0      = e2 e1 e0 eL
4200c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHSUB8   tVal10, tVal10,  tVal11             ;// tVal10    = h3 h2 h1 h0
4210c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    Out0,   [pDst],  dstStep            ;// store {e2 to eL} at pDst[3  to 0 ]
4220c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      tVal1,  tVal14,  LSL #8             ;// tVal1     = U0 UL L0 00
4230c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      tVal6,  Out0,    LSL #8             ;// tVal6     = e1 e0 eL 00
4240c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      tVal2,  tVal2,   Left1              ;// tVal2     = U1 U0 UL L1
4250c1bc742181ded4930842b46e9507372f0b1b963James Dong        UADD8    tVal4,  tVal4,   r0x80808080        ;// tVal4     = 00 00 00 l1
4260c1bc742181ded4930842b46e9507372f0b1b963James Dong        UADD8    Out1,   tVal10,  r0x80808080        ;// Out1      = i3 i2 i1 i0
4270c1bc742181ded4930842b46e9507372f0b1b963James Dong        MVN      tVal2,  tVal2                       ;// tVal14    = U1'U0'UL'L1'
4280c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      tVal1,  tVal1,   Left2              ;// tVal1     = U0 UL L0 L2
4290c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      Out2,   tVal6,   tVal4              ;// Out2      = e1 e0 eL l1
4300c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHADD8   tVal1,  tVal1,   tVal12             ;// tVal1     = g2 g1 g0 j2
4310c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    Out1,   [pDst],  dstStep            ;// store {i3 to i0} at pDst[7  to 4 ]
4320c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    Out2,   [pDst],  dstStep            ;// store {e1 to l1} at pDst[11 to 8 ]
4330c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHSUB8   tVal9,  tVal1,   tVal2              ;// tVal9     = h2 h1 h0 k2
4340c1bc742181ded4930842b46e9507372f0b1b963James Dong        UADD8    Out3,   tVal9,   r0x80808080        ;// Out3      = i2 i1 i0 l2
4350c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      Out3,   [pDst]                      ;// store {i2 to l2} at pDst[15 to 12]
4360c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      return,  #OMX_Sts_NoErr
4370c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_EXIT                                       ;// Macro to exit midway-break frm case
4380c1bc742181ded4930842b46e9507372f0b1b963James Dong
4390c1bc742181ded4930842b46e9507372f0b1b963James DongOMX_VC_4x4_HD
4400c1bc742181ded4930842b46e9507372f0b1b963James Dong
4410c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=4
4420c1bc742181ded4930842b46e9507372f0b1b963James Dong
4430c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR      Above0123,  [pSrcAbove]             ;// Above0123 = U3 U2 U1 U0
4440c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB     AboveLeft,  [pSrcAboveLeft]         ;// AboveLeft = 00 00 00 UL
4450c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   Left0,  [pSrcLeft],  leftStep       ;// Left0 = 00 00 00 L0
4460c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   Left1,  [pSrcLeft],  leftStep       ;// Left1 = 00 00 00 L1
4470c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   Left2,  [pSrcLeft],  leftStep       ;// Left2 = 00 00 00 L2
4480c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB     Left3,  [pSrcLeft]                  ;// Left3 = 00 00 00 L3
4490c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR      r0x80808080,  =ADD_CONST1           ;// 0x80808080
4500c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      tVal2,  AboveLeft, Above0123, LSL #8;// tVal2 = U2 U1 U0 UL
4510c1bc742181ded4930842b46e9507372f0b1b963James Dong        MVN      tVal1,  Left0                       ;// tVal1 = 00 00 00 L0'
4520c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      tVal4,  Left0,  tVal2,  LSL #8      ;// tVal4 = U1 U0 UL L0
4530c1bc742181ded4930842b46e9507372f0b1b963James Dong        MVN      tVal2,  tVal2                       ;// tVal2 = U2'U1'U0'UL'
4540c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHADD8   tVal4,  tVal4,  Above0123           ;// tVal4 = g3 g2 g1 g0
4550c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHSUB8   tVal1,  AboveLeft,  tVal1           ;// tVal1 = 00 00 00 dL
4560c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHSUB8   tVal4,  tVal4,  tVal2               ;// tVal4 = h3 h2 h1 h0
4570c1bc742181ded4930842b46e9507372f0b1b963James Dong        UADD8    tVal1,  tVal1,  r0x80808080         ;// tVal1 = 00 00 00 eL
4580c1bc742181ded4930842b46e9507372f0b1b963James Dong        UADD8    tVal4,  tVal4,  r0x80808080         ;// tVal4 = i3 i2 i1 i0
4590c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      tVal2,  Left0,  AboveLeft,  LSL #16 ;// tVal2 = 00 UL 00 L0
4600c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      tVal4,  tVal4,  LSL #8              ;// tVal4 = i2 i1 i0 00
4610c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      tVal11, Left1,  Left0,  LSL #16     ;// tVal11= 00 L0 00 L1
4620c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      tVal7,  Left2,  Left1,  LSL #16     ;// tVal7 = 00 L1 00 L2
4630c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      tVal10, Left3,  Left2,  LSL #16     ;// tVal10= 00 L2 00 L3
4640c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      Out0,   tVal4,  tVal1               ;// Out0  = i2 i1 i0 eL
4650c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    Out0,   [pDst], dstStep             ;// store {Out0}  at pDst [0  to 3 ]
4660c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      tVal4,  Out0,   LSL #16             ;// tVal4 = i1 i0 00 00
4670c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHADD8   tVal2,  tVal2,  tVal7               ;// tVal2 = 00 j1 00 j2
4680c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHADD8   tVal6,  tVal11, tVal10              ;// tVal11= 00 j2 00 j3
4690c1bc742181ded4930842b46e9507372f0b1b963James Dong        MVN      tVal12, tVal11                      ;// tVal12= 00 L0'00 L1'
4700c1bc742181ded4930842b46e9507372f0b1b963James Dong        MVN      tVal14, tVal7                       ;// tVal14= 00 L1'00 L2'
4710c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHSUB8   tVal2,  tVal2,  tVal12              ;// tVal2 = 00 k1 00 k2
4720c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHSUB8   tVal8,  tVal7,  tVal12              ;// tVal8 = 00 d1 00 d2
4730c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHSUB8   tVal11, tVal6,  tVal14              ;// tVal11= 00 k2 00 k3
4740c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHSUB8   tVal9,  tVal10, tVal14              ;// tVal9 = 00 d2 00 d3
4750c1bc742181ded4930842b46e9507372f0b1b963James Dong        UADD8    tVal2,  tVal2,  r0x80808080         ;// tVal2 = 00 l1 00 l2
4760c1bc742181ded4930842b46e9507372f0b1b963James Dong        UADD8    tVal8,  tVal8,  r0x80808080         ;// tVal8 = 00 e1 00 e2
4770c1bc742181ded4930842b46e9507372f0b1b963James Dong        UADD8    tVal11, tVal11, r0x80808080         ;// tVal11= 00 l2 00 l3
4780c1bc742181ded4930842b46e9507372f0b1b963James Dong        UADD8    tVal9,  tVal9,  r0x80808080         ;// tVal9 = 00 e2 00 e3
4790c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      Out2,   tVal8,  tVal2,  LSL #8      ;// Out2  = l1 e1 l2 e2
4800c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      Out3,   tVal9,  tVal11, LSL #8      ;// Out3  = l2 e2 l3 e3
4810c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHTB    Out1,   tVal4,  Out2,   ASR #16     ;// Out1  = i1 i0 l1 e1
4820c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    Out1,   [pDst], dstStep             ;// store {Out1}  at pDst [4  to 7 ]
4830c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    Out2,   [pDst], dstStep             ;// store {Out2}  at pDst [8  to 11]
4840c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      Out3,   [pDst]                      ;// store {Out3}  at pDst [12 to 15]
4850c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      return,  #OMX_Sts_NoErr
4860c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_EXIT                                       ;// Macro to exit midway-break frm case
4870c1bc742181ded4930842b46e9507372f0b1b963James Dong
4880c1bc742181ded4930842b46e9507372f0b1b963James DongOMX_VC_4x4_VL
4890c1bc742181ded4930842b46e9507372f0b1b963James Dong
4900c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=3
4910c1bc742181ded4930842b46e9507372f0b1b963James Dong
4920c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDMIA    pSrcAbove, {Above0123, Above4567}   ;// Above0123, Above4567 = pSrcAbove[0 to 7]
4930c1bc742181ded4930842b46e9507372f0b1b963James Dong        TST      availability, #OMX_VC_UPPER_RIGHT
4940c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR      r0x80808080,  =ADD_CONST1           ;// 0x80808080
4950c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR      r0x01010101,  =MUL_CONST0           ;// 0x01010101
4960c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      tVal11, Above0123,  LSR #24         ;// tVal11= 00 00 00 U3
4970c1bc742181ded4930842b46e9507372f0b1b963James Dong        MULEQ    Above4567, tVal11, r0x01010101      ;// Above4567 = U3 U3 U3 U3
4980c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      tVal9,  Above0123,  LSR #8          ;// tVal9 = 00 U3 U2 U1
4990c1bc742181ded4930842b46e9507372f0b1b963James Dong        MVN      tVal10, Above0123                   ;// tVal10= U3'U2'U1'U0'
5000c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      tVal2,  tVal9,  Above4567,  LSL #24 ;// tVal2 = U4 U3 U2 U1
5010c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHSUB8   tVal8,  tVal2,  tVal10              ;// tVal8 = d4 d3 d2 d1
5020c1bc742181ded4930842b46e9507372f0b1b963James Dong        UADD8    Out0,   tVal8,  r0x80808080         ;// Out0 = e4 e3 e2 e1
5030c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    Out0,   [pDst], dstStep             ;// store {Out0}  at pDst [0  to 3 ]
5040c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      tVal9,  tVal9,  LSR #8              ;// tVal9 = 00 00 U3 U2
5050c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      tVal10, Above4567,  LSL #8          ;// tVal10= U6 U5 U4 00
5060c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHBT    tVal9,  tVal9,  Above4567, LSL #16  ;// tVal9 = U5 U4 U3 U2
5070c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      tVal10, tVal10, tVal11              ;// tVal10= U6 U5 U4 U3
5080c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHADD8   tVal11, tVal9,  Above0123           ;// tVal11= g5 g4 g3 g2
5090c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHADD8   tVal14, tVal2,  tVal10              ;// tVal14= g6 g5 g4 g3
5100c1bc742181ded4930842b46e9507372f0b1b963James Dong        MVN      tVal8,  tVal2                       ;// tVal8 = U4'U3'U2'U1'
5110c1bc742181ded4930842b46e9507372f0b1b963James Dong        MVN      tVal7,  tVal9                       ;// tVal7 = U5'U4'U3'U2'
5120c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHSUB8   tVal12, tVal9,  tVal8               ;// tVal12= d5 d4 d3 d2
5130c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHSUB8   tVal11, tVal11, tVal8               ;// tVal11= h5 h4 h3 h2
5140c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHSUB8   tVal2,  tVal14, tVal7               ;// tVal2 = h6 h5 h4 h3
5150c1bc742181ded4930842b46e9507372f0b1b963James Dong        UADD8    Out1,   tVal11, r0x80808080         ;// Out1  = i5 i4 i3 i2
5160c1bc742181ded4930842b46e9507372f0b1b963James Dong        UADD8    Out2,   tVal12, r0x80808080         ;// Out2  = e5 e4 e3 e2
5170c1bc742181ded4930842b46e9507372f0b1b963James Dong        UADD8    Out3,   tVal2,  r0x80808080         ;// Out3  = i6 i5 i4 i3
5180c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    Out1,   [pDst], dstStep             ;// store {Out1} at pDst [4  to 7 ]
5190c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    Out2,   [pDst], dstStep             ;// store {Out2} at pDst [8  to 11]
5200c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    Out3,   [pDst], dstStep             ;// store {Out3} at pDst [12 to 15]
5210c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      return, #OMX_Sts_NoErr
5220c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_EXIT                                       ;// Macro to exit midway-break frm case
5230c1bc742181ded4930842b46e9507372f0b1b963James Dong
5240c1bc742181ded4930842b46e9507372f0b1b963James DongOMX_VC_4x4_HU
5250c1bc742181ded4930842b46e9507372f0b1b963James Dong
5260c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=2
5270c1bc742181ded4930842b46e9507372f0b1b963James Dong
5280c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR      r0x01010101,  =MUL_CONST0           ;// 0x01010101
5290c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   Left0,  [pSrcLeft],  leftStep       ;// Left0 = pSrcLeft[0]
5300c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   Left1,  [pSrcLeft],  leftStep       ;// Left1 = pSrcLeft[1]
5310c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   Left2,  [pSrcLeft],  leftStep       ;// Left2 = pSrcLeft[2]
5320c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB     Left3,  [pSrcLeft]                  ;// Left3 = pSrcLeft[3]
5330c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      r0x80808080,  r0x01010101, LSL #7   ;// 0x80808080
5340c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      tVal6,  Left0,  Left1,  LSL #16     ;// tVal6 = 00 L1 00 L0
5350c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      tVal7,  Left1,  Left2,  LSL #16     ;// tVal7 = 00 L2 00 L1
5360c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      tVal11, Left2,  Left3,  LSL #16     ;// tVal11= 00 L3 00 L2
5370c1bc742181ded4930842b46e9507372f0b1b963James Dong        MUL      Out3,   Left3,  r0x01010101         ;// Out3  = L3 L3 L3 L3
5380c1bc742181ded4930842b46e9507372f0b1b963James Dong        MVN      tVal8,  tVal7                       ;// tVal8 = 00 L2'00 L1'
5390c1bc742181ded4930842b46e9507372f0b1b963James Dong        MVN      tVal10, tVal11                      ;// tVal10= 00 L3'00 L2'
5400c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHADD8   tVal4,  tVal6,  tVal11              ;// tVal4 = 00 g3 00 g2
5410c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTB16   tVal12, Out3                        ;// tVal12= 00 L3 00 L3
5420c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHSUB8   tVal4,  tVal4,  tVal8               ;// tVal4 = 00 h3 00 h2
5430c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHSUB8   tVal6,  tVal6,  tVal8               ;// tVal6 = 00 d2 00 d1
5440c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHSUB8   tVal11, tVal11, tVal8               ;// tVal11= 00 d3 00 d2
5450c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHADD8   tVal12, tVal12, tVal7               ;// tVal12= 00 g4 00 g3
5460c1bc742181ded4930842b46e9507372f0b1b963James Dong        UADD8    tVal4,  tVal4,  r0x80808080         ;// tVal4 = 00 i3 00 i2
5470c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHSUB8   tVal12, tVal12, tVal10              ;// tVal12= 00 h4 00 h3
5480c1bc742181ded4930842b46e9507372f0b1b963James Dong        UADD8    tVal8,  tVal6,  r0x80808080         ;// tVal8 = 00 e2 00 e1
5490c1bc742181ded4930842b46e9507372f0b1b963James Dong        UADD8    tVal11, tVal11, r0x80808080         ;// tVal11= 00 e3 00 e2
5500c1bc742181ded4930842b46e9507372f0b1b963James Dong        UADD8    tVal12, tVal12, r0x80808080         ;// tVal12= 00 i4 00 i3
5510c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      Out0,   tVal8,  tVal4,  LSL #8      ;// Out0  = i3 e2 i2 e1
5520c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      Out1,   tVal11, tVal12, LSL #8      ;// Out1  = i4 e3 i3 e2
5530c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    Out0,   [pDst], dstStep             ;// store {Out0}  at pDst [0  to 3 ]
5540c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHTB    Out2,   Out3,   Out1,   ASR #16     ;// Out2  = L3 L3 i4 e3
5550c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    Out1,   [pDst], dstStep             ;// store {Out1}  at pDst [4  to 7 ]
5560c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    Out2,   [pDst], dstStep             ;// store {Out2}  at pDst [8  to 11]
5570c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      Out3,   [pDst]                      ;// store {Out3}  at pDst [12 to 15]
5580c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      return,  #OMX_Sts_NoErr
5590c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_END
5600c1bc742181ded4930842b46e9507372f0b1b963James Dong
5610c1bc742181ded4930842b46e9507372f0b1b963James Dong        ENDIF ;// ARM1136JS
5620c1bc742181ded4930842b46e9507372f0b1b963James Dong
5630c1bc742181ded4930842b46e9507372f0b1b963James Dong
5640c1bc742181ded4930842b46e9507372f0b1b963James Dong        END
5650c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-----------------------------------------------------------------------------------------------
5660c1bc742181ded4930842b46e9507372f0b1b963James Dong;// omxVCM4P10_PredictIntra_4x4 ends
5670c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-----------------------------------------------------------------------------------------------
568