10c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
20c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
30c1bc742181ded4930842b46e9507372f0b1b963James Dong;// File Name:  omxVCM4P10_PredictIntraChroma_8x8_s.s
40c1bc742181ded4930842b46e9507372f0b1b963James Dong;// OpenMAX DL: v1.0.2
50c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Revision:   9641
60c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Date:       Thursday, February 7, 2008
70c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
80c1bc742181ded4930842b46e9507372f0b1b963James Dong;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
90c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
100c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
110c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
120c1bc742181ded4930842b46e9507372f0b1b963James Dong
130c1bc742181ded4930842b46e9507372f0b1b963James Dong
140c1bc742181ded4930842b46e9507372f0b1b963James Dong        INCLUDE omxtypes_s.h
150c1bc742181ded4930842b46e9507372f0b1b963James Dong        INCLUDE armCOMM_s.h
160c1bc742181ded4930842b46e9507372f0b1b963James Dong
170c1bc742181ded4930842b46e9507372f0b1b963James Dong        EXPORT armVCM4P10_pIndexTable8x8
180c1bc742181ded4930842b46e9507372f0b1b963James Dong
190c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Define the processor variants supported by this file
200c1bc742181ded4930842b46e9507372f0b1b963James Dong
210c1bc742181ded4930842b46e9507372f0b1b963James Dong         M_VARIANTS ARM1136JS
220c1bc742181ded4930842b46e9507372f0b1b963James Dong
230c1bc742181ded4930842b46e9507372f0b1b963James Dong     AREA table, DATA
240c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-------------------------------------------------------
250c1bc742181ded4930842b46e9507372f0b1b963James Dong;// This table for implementing switch case of C in asm by
260c1bc742181ded4930842b46e9507372f0b1b963James Dong;// the mehtod of two levels of indexing.
270c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-------------------------------------------------------
280c1bc742181ded4930842b46e9507372f0b1b963James Dong
290c1bc742181ded4930842b46e9507372f0b1b963James Dong    M_TABLE armVCM4P10_pIndexTable8x8
300c1bc742181ded4930842b46e9507372f0b1b963James Dong    DCD  OMX_VC_CHROMA_DC,     OMX_VC_CHROMA_HOR
310c1bc742181ded4930842b46e9507372f0b1b963James Dong    DCD  OMX_VC_CHROMA_VERT,   OMX_VC_CHROMA_PLANE
320c1bc742181ded4930842b46e9507372f0b1b963James Dong
330c1bc742181ded4930842b46e9507372f0b1b963James Dong    M_TABLE armVCM4P10_MultiplierTableChroma8x8,1
340c1bc742181ded4930842b46e9507372f0b1b963James Dong    DCW   3, 2, 1,4
350c1bc742181ded4930842b46e9507372f0b1b963James Dong    DCW  -3,-2,-1,0
360c1bc742181ded4930842b46e9507372f0b1b963James Dong    DCW   1, 2, 3,4
370c1bc742181ded4930842b46e9507372f0b1b963James Dong
380c1bc742181ded4930842b46e9507372f0b1b963James Dong    IF ARM1136JS
390c1bc742181ded4930842b46e9507372f0b1b963James Dong
400c1bc742181ded4930842b46e9507372f0b1b963James Dong;//--------------------------------------------
410c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Constants
420c1bc742181ded4930842b46e9507372f0b1b963James Dong;//--------------------------------------------
430c1bc742181ded4930842b46e9507372f0b1b963James Dong
440c1bc742181ded4930842b46e9507372f0b1b963James DongBLK_SIZE        EQU 0x8
450c1bc742181ded4930842b46e9507372f0b1b963James DongMUL_CONST0      EQU 0x01010101
460c1bc742181ded4930842b46e9507372f0b1b963James DongMASK_CONST      EQU 0x00FF00FF
470c1bc742181ded4930842b46e9507372f0b1b963James DongMUL_CONST1      EQU 0x80808080
480c1bc742181ded4930842b46e9507372f0b1b963James Dong
490c1bc742181ded4930842b46e9507372f0b1b963James Dong;//--------------------------------------------
500c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Scratch variable
510c1bc742181ded4930842b46e9507372f0b1b963James Dong;//--------------------------------------------
520c1bc742181ded4930842b46e9507372f0b1b963James Dongy               RN 12
530c1bc742181ded4930842b46e9507372f0b1b963James Dongpc              RN 15
540c1bc742181ded4930842b46e9507372f0b1b963James Dongreturn          RN 0
550c1bc742181ded4930842b46e9507372f0b1b963James DongpSrcLeft2       RN 1
560c1bc742181ded4930842b46e9507372f0b1b963James DongpDst2           RN 2
570c1bc742181ded4930842b46e9507372f0b1b963James Dongsum1            RN 6
580c1bc742181ded4930842b46e9507372f0b1b963James Dongsum2            RN 7
590c1bc742181ded4930842b46e9507372f0b1b963James DongpTable          RN 9
600c1bc742181ded4930842b46e9507372f0b1b963James DongdstStepx2       RN 11
610c1bc742181ded4930842b46e9507372f0b1b963James DongleftStepx2      RN 14
620c1bc742181ded4930842b46e9507372f0b1b963James DongouterCount      RN 14
630c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x01010101     RN 10
640c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x00FF00FF     RN 11
650c1bc742181ded4930842b46e9507372f0b1b963James Dong
660c1bc742181ded4930842b46e9507372f0b1b963James DongtVal0           RN 0
670c1bc742181ded4930842b46e9507372f0b1b963James DongtVal1           RN 1
680c1bc742181ded4930842b46e9507372f0b1b963James DongtVal2           RN 2
690c1bc742181ded4930842b46e9507372f0b1b963James DongtVal3           RN 3
700c1bc742181ded4930842b46e9507372f0b1b963James DongtVal4           RN 4
710c1bc742181ded4930842b46e9507372f0b1b963James DongtVal5           RN 5
720c1bc742181ded4930842b46e9507372f0b1b963James DongtVal6           RN 6
730c1bc742181ded4930842b46e9507372f0b1b963James DongtVal7           RN 7
740c1bc742181ded4930842b46e9507372f0b1b963James DongtVal8           RN 8
750c1bc742181ded4930842b46e9507372f0b1b963James DongtVal9           RN 9
760c1bc742181ded4930842b46e9507372f0b1b963James DongtVal10          RN 10
770c1bc742181ded4930842b46e9507372f0b1b963James DongtVal11          RN 11
780c1bc742181ded4930842b46e9507372f0b1b963James DongtVal12          RN 12
790c1bc742181ded4930842b46e9507372f0b1b963James DongtVal14          RN 14
800c1bc742181ded4930842b46e9507372f0b1b963James Dong
810c1bc742181ded4930842b46e9507372f0b1b963James Dongb               RN 14
820c1bc742181ded4930842b46e9507372f0b1b963James Dongc               RN 12
830c1bc742181ded4930842b46e9507372f0b1b963James Dong
840c1bc742181ded4930842b46e9507372f0b1b963James Dongp2p0            RN 0
850c1bc742181ded4930842b46e9507372f0b1b963James Dongp3p1            RN 1
860c1bc742181ded4930842b46e9507372f0b1b963James Dongp6p4            RN 2
870c1bc742181ded4930842b46e9507372f0b1b963James Dongp7p5            RN 4
880c1bc742181ded4930842b46e9507372f0b1b963James Dong
890c1bc742181ded4930842b46e9507372f0b1b963James Dongpp2pp0          RN 6
900c1bc742181ded4930842b46e9507372f0b1b963James Dongpp3pp1          RN 7
910c1bc742181ded4930842b46e9507372f0b1b963James Dongpp6pp4          RN 8
920c1bc742181ded4930842b46e9507372f0b1b963James Dongpp7pp5          RN 9
930c1bc742181ded4930842b46e9507372f0b1b963James Dong
940c1bc742181ded4930842b46e9507372f0b1b963James Dongp3210           RN 10
950c1bc742181ded4930842b46e9507372f0b1b963James Dongp7654           RN 10
960c1bc742181ded4930842b46e9507372f0b1b963James Dong
970c1bc742181ded4930842b46e9507372f0b1b963James Dong;//--------------------------------------------
980c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Input Arguments
990c1bc742181ded4930842b46e9507372f0b1b963James Dong;//--------------------------------------------
1000c1bc742181ded4930842b46e9507372f0b1b963James DongpSrcLeft        RN 0    ;// input pointer
1010c1bc742181ded4930842b46e9507372f0b1b963James DongpSrcAbove       RN 1    ;// input pointer
1020c1bc742181ded4930842b46e9507372f0b1b963James DongpSrcAboveLeft   RN 2    ;// input pointer
1030c1bc742181ded4930842b46e9507372f0b1b963James DongpDst            RN 3    ;// output pointer
1040c1bc742181ded4930842b46e9507372f0b1b963James DongleftStep        RN 4    ;// input variable
1050c1bc742181ded4930842b46e9507372f0b1b963James DongdstStep         RN 5    ;// input variable
1060c1bc742181ded4930842b46e9507372f0b1b963James DongpredMode        RN 6    ;// input variable
1070c1bc742181ded4930842b46e9507372f0b1b963James Dongavailability    RN 7    ;// input variable
1080c1bc742181ded4930842b46e9507372f0b1b963James Dong
1090c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-----------------------------------------------------------------------------------------------
1100c1bc742181ded4930842b46e9507372f0b1b963James Dong;// omxVCM4P10_PredictIntraChroma_8x8 starts
1110c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-----------------------------------------------------------------------------------------------
1120c1bc742181ded4930842b46e9507372f0b1b963James Dong
1130c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Write function header
1140c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_START omxVCM4P10_PredictIntraChroma_8x8, r11
1150c1bc742181ded4930842b46e9507372f0b1b963James Dong
1160c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Define stack arguments
1170c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ARG    LeftStep,     4
1180c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ARG    DstStep,      4
1190c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ARG    PredMode,     4
1200c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ARG    Availability, 4
1210c1bc742181ded4930842b46e9507372f0b1b963James Dong
1220c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=4
1230c1bc742181ded4930842b46e9507372f0b1b963James Dong
1240c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR      pTable,=armVCM4P10_pIndexTable8x8   ;// Load index table for switch case
1250c1bc742181ded4930842b46e9507372f0b1b963James Dong
1260c1bc742181ded4930842b46e9507372f0b1b963James Dong
1270c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Load argument from the stack
1280c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR    predMode, PredMode                  ;// Arg predMode loaded from stack to reg
1290c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR    leftStep, LeftStep                  ;// Arg leftStep loaded from stack to reg
1300c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR    dstStep,  DstStep                   ;// Arg dstStep loaded from stack to reg
1310c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR    availability, Availability          ;// Arg availability loaded from stack to reg
1320c1bc742181ded4930842b46e9507372f0b1b963James Dong
1330c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      y, #BLK_SIZE                        ;// Outer Loop Count
1340c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR      pc, [pTable, predMode, LSL #2]      ;// Branch to the case based on preMode
1350c1bc742181ded4930842b46e9507372f0b1b963James Dong
1360c1bc742181ded4930842b46e9507372f0b1b963James DongOMX_VC_CHROMA_DC
1370c1bc742181ded4930842b46e9507372f0b1b963James Dong        AND      availability, availability,#(OMX_VC_UPPER + OMX_VC_LEFT)
1380c1bc742181ded4930842b46e9507372f0b1b963James Dong        CMP      availability, #(OMX_VC_UPPER + OMX_VC_LEFT) ;// if(availability & (#OMX_VC_UPPER | #OMX_VC_LEFT))
1390c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR      r0x01010101, =MUL_CONST0
1400c1bc742181ded4930842b46e9507372f0b1b963James Dong        BNE      TST_UPPER                           ;// Jump to Upper if not both
1410c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDM      pSrcAbove,{tVal8,tVal9}             ;// tVal 8 to 9 = pSrcAbove[0 to 7]
1420c1bc742181ded4930842b46e9507372f0b1b963James Dong
1430c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      leftStepx2, leftStep,leftStep       ;// leftStepx2 = 2 * leftStep
1440c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      pSrcLeft2, pSrcLeft, leftStep       ;// pSrcLeft2 = pSrcLeft + leftStep
1450c1bc742181ded4930842b46e9507372f0b1b963James Dong
1460c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=1
1470c1bc742181ded4930842b46e9507372f0b1b963James Dong
1480c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTB16   tVal7, tVal8                        ;// pSrcAbove[0, 2]
1490c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTB16   tVal8, tVal8, ROR #8                ;// pSrcAbove[1, 3]
1500c1bc742181ded4930842b46e9507372f0b1b963James Dong        UADD16   sum1, tVal7, tVal8                  ;// pSrcAbove[0, 2] + pSrcAbove[1, 3]
1510c1bc742181ded4930842b46e9507372f0b1b963James Dong
1520c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTB16   tVal7, tVal9                        ;// pSrcAbove[4, 6]
1530c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTB16   tVal9, tVal9, ROR #8                ;// pSrcAbove[5, 7]
1540c1bc742181ded4930842b46e9507372f0b1b963James Dong        UADD16   sum2, tVal7, tVal9                  ;// pSrcAbove[0, 2] + pSrcAbove[4, 6]
1550c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      sum1, sum1, sum1, LSR #16           ;// sum(pSrcAbove[0] to pSrcAbove[3])
1560c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      sum2, sum2, sum2, LSR #16           ;// sum(pSrcAbove[4] to pSrcAbove[7])
1570c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTH     sum1, sum1                          ;// upsum1 (Clear the top junk bits)
1580c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTH     sum2, sum2                          ;// upsum2 (Clear the top junk bits)
1590c1bc742181ded4930842b46e9507372f0b1b963James Dong
1600c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal8, [pSrcLeft],  +leftStepx2     ;// tVal8 = pSrcLeft[0]
1610c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal9, [pSrcLeft2], +leftStepx2     ;// tVal9 = pSrcLeft[1]
1620c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal4, [pSrcLeft],  +leftStepx2     ;// tVal4 = pSrcLeft[2]
1630c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal12,[pSrcLeft2], +leftStepx2     ;// tVal12= pSrcLeft[3]
1640c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal2, tVal8, tVal9                 ;// tVal14 = tVal8 + tVal9
1650c1bc742181ded4930842b46e9507372f0b1b963James Dong
1660c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal8, [pSrcLeft],  +leftStepx2     ;// tVal8 = pSrcLeft[4]
1670c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal9, [pSrcLeft2], +leftStepx2     ;// tVal9 = pSrcLeft[5]
1680c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal14, tVal4, tVal12               ;// tVal14 = tVal4 + tVal12
1690c1bc742181ded4930842b46e9507372f0b1b963James Dong
1700c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB     tVal4, [pSrcLeft]                   ;// tVal4 = pSrcLeft[6]
1710c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB     tVal12,[pSrcLeft2]                  ;// tVal12= pSrcLeft[7]
1720c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal8, tVal8, tVal9                 ;// tVal8 = tVal8 + tVal9
1730c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal2, tVal2, tVal14                ;// leftsum1  = sum(pSrcLeft[0] to pSrcLeft[3])
1740c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal4, tVal4, tVal12                ;// tVal4 = tVal4 + tVal12
1750c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal14, tVal8, tVal4                ;// leftsum2  = sum(pSrcLeft[4] to pSrcLeft[7])
1760c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal8, tVal14, #2                   ;// tVal8 = leftsum2 + 2
1770c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal9, sum2,   #2                   ;// tVal8 = upsum2 + 2
1780c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      sum1,  sum1, tVal2                  ;// sum1 = upsum1 + leftsum1
1790c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      sum2,  sum2, tVal14                 ;// sum2 = upsum2 + leftsum2
1800c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      sum1, sum1, #4                      ;// (sum1 + 4)
1810c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      sum2, sum2, #4                      ;// (sum2 + 4)
1820c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      sum1,  sum1,  LSR #3                ;// (sum1 + 4)>>3
1830c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      tVal9, tVal9, LSR #2                ;// (tVal9 + 2)>>2
1840c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      tVal8, tVal8, LSR #2                ;// (tVal8 + 2)>>2
1850c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      sum2,  sum2,  LSR #3                ;// (sum2 + 4)>>3
1860c1bc742181ded4930842b46e9507372f0b1b963James Dong
1870c1bc742181ded4930842b46e9507372f0b1b963James Dong        MUL      tVal0, sum1, r0x01010101            ;// replicate the val in all the bytes
1880c1bc742181ded4930842b46e9507372f0b1b963James Dong        MUL      tVal1, tVal9,r0x01010101            ;// replicate the val in all the bytes
1890c1bc742181ded4930842b46e9507372f0b1b963James Dong        MUL      tVal8, tVal8,r0x01010101            ;// replicate the val in all the bytes
1900c1bc742181ded4930842b46e9507372f0b1b963James Dong        MUL      tVal9, sum2, r0x01010101            ;// replicate the val in all the bytes
1910c1bc742181ded4930842b46e9507372f0b1b963James Dong
1920c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD   tVal0, tVal1, [pDst], dstStep       ;// pDst[0 to 7]   = tVal 0 to 1
1930c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD   tVal0, tVal1, [pDst], dstStep       ;// pDst[8 to 15]  = tVal 0 to 1
1940c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD   tVal0, tVal1, [pDst], dstStep       ;// pDst[16 to 23] = tVal 0 to 1
1950c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD   tVal0, tVal1, [pDst], dstStep       ;// pDst[24 to 31] = tVal 0 to 1
1960c1bc742181ded4930842b46e9507372f0b1b963James Dong
1970c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD   tVal8, tVal9, [pDst], dstStep       ;// pDst[32 to 39] = tVal 8 to 9
1980c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD   tVal8, tVal9, [pDst], dstStep       ;// pDst[40 to 47] = tVal 8 to 9
1990c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD   tVal8, tVal9, [pDst], dstStep       ;// pDst[48 to 55] = tVal 8 to 9
2000c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD   tVal8, tVal9, [pDst], dstStep       ;// pDst[56 to 63] = tVal 8 to 9
2010c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      return, #OMX_Sts_NoErr
2020c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_EXIT
2030c1bc742181ded4930842b46e9507372f0b1b963James Dong
2040c1bc742181ded4930842b46e9507372f0b1b963James DongTST_UPPER
2050c1bc742181ded4930842b46e9507372f0b1b963James Dong
2060c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=3
2070c1bc742181ded4930842b46e9507372f0b1b963James Dong
2080c1bc742181ded4930842b46e9507372f0b1b963James Dong        CMP      availability, #OMX_VC_UPPER         ;// if(availability & #OMX_VC_UPPER)
2090c1bc742181ded4930842b46e9507372f0b1b963James Dong
2100c1bc742181ded4930842b46e9507372f0b1b963James Dong        BNE      TST_LEFT                            ;// Jump to Left if not upper
2110c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDM      pSrcAbove,{tVal8,tVal9}             ;// tVal 8 to 9 = pSrcAbove[0 to 7]
2120c1bc742181ded4930842b46e9507372f0b1b963James Dong
2130c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=3
2140c1bc742181ded4930842b46e9507372f0b1b963James Dong
2150c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTB16   tVal7, tVal8                        ;// pSrcAbove[0, 2]
2160c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTB16   tVal8, tVal8, ROR #8                ;// pSrcAbove[1, 3]
2170c1bc742181ded4930842b46e9507372f0b1b963James Dong        UADD16   sum1,  tVal7, tVal8                 ;// pSrcAbove[0, 2] + pSrcAbove[1, 3]
2180c1bc742181ded4930842b46e9507372f0b1b963James Dong
2190c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTB16   tVal7, tVal9                        ;// pSrcAbove[4, 6]
2200c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTB16   tVal9, tVal9, ROR #8                ;// pSrcAbove[5, 7]
2210c1bc742181ded4930842b46e9507372f0b1b963James Dong        UADD16   sum2,  tVal7, tVal9                 ;// pSrcAbove[0, 2] + pSrcAbove[4, 6]
2220c1bc742181ded4930842b46e9507372f0b1b963James Dong
2230c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      sum1, sum1, sum1, LSR #16           ;// sum(pSrcAbove[0] to pSrcAbove[3])
2240c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      sum2, sum2, sum2, LSR #16           ;// sum(pSrcAbove[4] to pSrcAbove[7])
2250c1bc742181ded4930842b46e9507372f0b1b963James Dong
2260c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTH     sum1, sum1                          ;// upsum1 (Clear the top junk bits)
2270c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTH     sum2, sum2                          ;// upsum2 (Clear the top junk bits)
2280c1bc742181ded4930842b46e9507372f0b1b963James Dong
2290c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      sum1, sum1, #2                      ;// sum1 + 2
2300c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      sum2, sum2, #2                      ;// sum2 + 2
2310c1bc742181ded4930842b46e9507372f0b1b963James Dong
2320c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      sum1, sum1, LSR #2                  ;// (sum1 + 2)>>2
2330c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      sum2, sum2, LSR #2                  ;// (sum2 + 2)>>2
2340c1bc742181ded4930842b46e9507372f0b1b963James Dong
2350c1bc742181ded4930842b46e9507372f0b1b963James Dong        MUL      sum1, sum1,r0x01010101              ;// replicate the val in all the bytes
2360c1bc742181ded4930842b46e9507372f0b1b963James Dong        MUL      sum2, sum2,r0x01010101              ;// replicate the val in all the bytes
2370c1bc742181ded4930842b46e9507372f0b1b963James Dong
2380c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[0 to 7]   = tVal 6 to 7
2390c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[8 to 15]  = tVal 6 to 7
2400c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[16 to 23] = tVal 6 to 7
2410c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[24 to 31] = tVal 6 to 7
2420c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[32 to 39] = tVal 6 to 7
2430c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[40 to 47] = tVal 6 to 7
2440c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[48 to 55] = tVal 6 to 7
2450c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[56 to 63] = tVal 6 to 7
2460c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      return, #OMX_Sts_NoErr
2470c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_EXIT
2480c1bc742181ded4930842b46e9507372f0b1b963James Dong
2490c1bc742181ded4930842b46e9507372f0b1b963James DongTST_LEFT
2500c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=3
2510c1bc742181ded4930842b46e9507372f0b1b963James Dong
2520c1bc742181ded4930842b46e9507372f0b1b963James Dong        CMP      availability, #OMX_VC_LEFT
2530c1bc742181ded4930842b46e9507372f0b1b963James Dong        BNE      TST_COUNT0
2540c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      leftStepx2, leftStep,leftStep       ;// leftStepx2 = 2 * leftStep
2550c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      pSrcLeft2, pSrcLeft, leftStep       ;// pSrcLeft2 = pSrcLeft + leftStep
2560c1bc742181ded4930842b46e9507372f0b1b963James Dong
2570c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal8, [pSrcLeft],  +leftStepx2     ;// tVal8 = pSrcLeft[0]
2580c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal9, [pSrcLeft2], +leftStepx2     ;// tVal9 = pSrcLeft[1]
2590c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal4, [pSrcLeft],  +leftStepx2     ;// tVal4 = pSrcLeft[2]
2600c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal12,[pSrcLeft2], +leftStepx2     ;// tVal12= pSrcLeft[3]
2610c1bc742181ded4930842b46e9507372f0b1b963James Dong
2620c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal6, tVal8, tVal9                 ;// tVal6 = tVal8 + tVal9
2630c1bc742181ded4930842b46e9507372f0b1b963James Dong
2640c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal8, [pSrcLeft],  +leftStepx2     ;// tVal8 = pSrcLeft[4]
2650c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal7, tVal4, tVal12                ;// tVal7 = tVal4 + tVal12
2660c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal9, [pSrcLeft2], +leftStepx2     ;// tVal9 = pSrcLeft[5]
2670c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal4, [pSrcLeft],  +leftStepx2     ;// tVal4 = pSrcLeft[6]
2680c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal12,[pSrcLeft2], +leftStepx2     ;// tVal12= pSrcLeft[7]
2690c1bc742181ded4930842b46e9507372f0b1b963James Dong
2700c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal8, tVal8, tVal9                 ;// tVal8 = tVal8 + tVal9
2710c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      sum1,  tVal6, tVal7                 ;// sum1  = sum(pSrcLeft[0] to pSrcLeft[3])
2720c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal4, tVal4, tVal12                ;// tVal4 = tVal4 + tVal12
2730c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      sum2,  tVal8, tVal4                 ;// sum2  = sum(pSrcLeft[4] to pSrcLeft[7])
2740c1bc742181ded4930842b46e9507372f0b1b963James Dong
2750c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      sum1, sum1, #2                      ;// sum1 + 2
2760c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      sum2, sum2, #2                      ;// sum2 + 2
2770c1bc742181ded4930842b46e9507372f0b1b963James Dong
2780c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      sum1, sum1, LSR #2                  ;// (sum1 + 2)>>2
2790c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      sum2, sum2, LSR #2                  ;// (sum2 + 2)>>2
2800c1bc742181ded4930842b46e9507372f0b1b963James Dong
2810c1bc742181ded4930842b46e9507372f0b1b963James Dong        MUL      tVal6, sum1,r0x01010101             ;// replicate the val in all the bytes
2820c1bc742181ded4930842b46e9507372f0b1b963James Dong        MUL      tVal8, sum2,r0x01010101             ;// replicate the val in all the bytes
2830c1bc742181ded4930842b46e9507372f0b1b963James Dong
2840c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=1
2850c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      tVal7,tVal6                         ;// tVal7 = sum1
2860c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      tVal9,tVal8                         ;// tVal9 = sum2
2870c1bc742181ded4930842b46e9507372f0b1b963James Dong
2880c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[0 to 7]   = tVal 6 to 7
2890c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[8 to 15]  = tVal 6 to 7
2900c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[16 to 23] = tVal 6 to 7
2910c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[24 to 31] = tVal 6 to 7
2920c1bc742181ded4930842b46e9507372f0b1b963James Dong
2930c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD   tVal8, tVal9, [pDst], dstStep       ;// pDst[32 to 39] = tVal 8 to 9
2940c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD   tVal8, tVal9, [pDst], dstStep       ;// pDst[40 to 47] = tVal 8 to 9
2950c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD   tVal8, tVal9, [pDst], dstStep       ;// pDst[48 to 55] = tVal 8 to 9
2960c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD   tVal8, tVal9, [pDst], dstStep       ;// pDst[56 to 63] = tVal 8 to 9
2970c1bc742181ded4930842b46e9507372f0b1b963James Dong
2980c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      return, #OMX_Sts_NoErr
2990c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_EXIT                                       ;// Macro to exit midway-break frm case
3000c1bc742181ded4930842b46e9507372f0b1b963James Dong
3010c1bc742181ded4930842b46e9507372f0b1b963James DongTST_COUNT0
3020c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR      sum1, =MUL_CONST1                  ;// sum1 = 0x80808080 if(count == 0)
3030c1bc742181ded4930842b46e9507372f0b1b963James Dong
3040c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=2
3050c1bc742181ded4930842b46e9507372f0b1b963James Dong
3060c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      tVal7, sum1                         ;// tVal7 = sum1
3070c1bc742181ded4930842b46e9507372f0b1b963James Dong
3080c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[0 to 7]   = tVal 6 to 7
3090c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[8 to 15]  = tVal 6 to 7
3100c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[16 to 23] = tVal 6 to 7
3110c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[24 to 31] = tVal 6 to 7
3120c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[32 to 39] = tVal 6 to 7
3130c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[40 to 47] = tVal 6 to 7
3140c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[48 to 55] = tVal 6 to 7
3150c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[56 to 63] = tVal 6 to 7
3160c1bc742181ded4930842b46e9507372f0b1b963James Dong
3170c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      return, #OMX_Sts_NoErr
3180c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_EXIT                                       ;// Macro to exit midway-break frm case
3190c1bc742181ded4930842b46e9507372f0b1b963James Dong
3200c1bc742181ded4930842b46e9507372f0b1b963James DongOMX_VC_CHROMA_HOR
3210c1bc742181ded4930842b46e9507372f0b1b963James Dong
3220c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=2
3230c1bc742181ded4930842b46e9507372f0b1b963James Dong
3240c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      pSrcLeft2, pSrcLeft, leftStep       ;// pSrcLeft2 = pSrcLeft + leftStep
3250c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      leftStepx2, leftStep, leftStep      ;// leftStepx2 = leftStep * 2
3260c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      pDst2, pDst, dstStep                ;// pDst2 = pDst + dstStep
3270c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      dstStepx2, dstStep, dstStep         ;// double dstStep
3280c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB      dstStepx2, dstStepx2, #4            ;// double dstStep  minus 4
3290c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR      r0x01010101, =MUL_CONST0            ;// Const to repeat the byte in reg 4 times
3300c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal6, [pSrcLeft], +leftStepx2      ;// tVal6 = pSrcLeft[0]
3310c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal7, [pSrcLeft2],+leftStepx2      ;// tVal7 = pSrcLeft[1]
3320c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal8, [pSrcLeft], +leftStepx2      ;// tVal8 = pSrcLeft[2]
3330c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal9, [pSrcLeft2],+leftStepx2      ;// tVal9 = pSrcLeft[3]
3340c1bc742181ded4930842b46e9507372f0b1b963James Dong        MUL      tVal6, tVal6, r0x01010101           ;// replicate the val in all the bytes
3350c1bc742181ded4930842b46e9507372f0b1b963James Dong        MUL      tVal7, tVal7, r0x01010101           ;// replicate the val in all the bytes
3360c1bc742181ded4930842b46e9507372f0b1b963James Dong        MUL      tVal8, tVal8, r0x01010101           ;// replicate the val in all the bytes
3370c1bc742181ded4930842b46e9507372f0b1b963James Dong        MUL      tVal9, tVal9, r0x01010101           ;// replicate the val in all the bytes
3380c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      tVal6, [pDst],  #+4                 ;// store {tVal6} at pDst [0 to 3]
3390c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      tVal7, [pDst2], #+4                 ;// store {tVal7} at pDst2[0 to 3]
3400c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    tVal6, [pDst],  dstStepx2           ;// store {tVal6} at pDst [4 to 7]
3410c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    tVal7, [pDst2], dstStepx2           ;// store {tVal7} at pDst2[4 to 7]
3420c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      tVal8, [pDst],  #+4                 ;// store {tVal6} at pDst [0 to 3]
3430c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      tVal9, [pDst2], #+4                 ;// store {tVal7} at pDst2[0 to 3]
3440c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    tVal8, [pDst],  dstStepx2           ;// store {tVal6} at pDst [4 to 7]
3450c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    tVal9, [pDst2], dstStepx2           ;// store {tVal7} at pDst2[4 to 7]
3460c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal6, [pSrcLeft], +leftStepx2      ;// tVal6 = pSrcLeft[4]
3470c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal7, [pSrcLeft2],+leftStepx2      ;// tVal7 = pSrcLeft[5]
3480c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal8, [pSrcLeft], +leftStepx2      ;// tVal8 = pSrcLeft[6]
3490c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal9, [pSrcLeft2],+leftStepx2      ;// tVal9 = pSrcLeft[7]
3500c1bc742181ded4930842b46e9507372f0b1b963James Dong        MUL      tVal6, tVal6, r0x01010101           ;// replicate the val in all the bytes
3510c1bc742181ded4930842b46e9507372f0b1b963James Dong        MUL      tVal7, tVal7, r0x01010101           ;// replicate the val in all the bytes
3520c1bc742181ded4930842b46e9507372f0b1b963James Dong        MUL      tVal8, tVal8, r0x01010101           ;// replicate the val in all the bytes
3530c1bc742181ded4930842b46e9507372f0b1b963James Dong        MUL      tVal9, tVal9, r0x01010101           ;// replicate the val in all the bytes
3540c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      tVal6, [pDst],  #+4                 ;// store {tVal6} at pDst [0 to 3]
3550c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      tVal7, [pDst2], #+4                 ;// store {tVal7} at pDst2[0 to 3]
3560c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    tVal6, [pDst],  dstStepx2           ;// store {tVal6} at pDst [4 to 7]
3570c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    tVal7, [pDst2], dstStepx2           ;// store {tVal7} at pDst2[4 to 7]
3580c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      tVal8, [pDst],  #+4                 ;// store {tVal6} at pDst [0 to 3]
3590c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      tVal9, [pDst2], #+4                 ;// store {tVal7} at pDst2[0 to 3]
3600c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    tVal8, [pDst],  dstStepx2           ;// store {tVal6} at pDst [4 to 7]
3610c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    tVal9, [pDst2], dstStepx2           ;// store {tVal7} at pDst2[4 to 7]
3620c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      return, #OMX_Sts_NoErr
3630c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_EXIT
3640c1bc742181ded4930842b46e9507372f0b1b963James Dong
3650c1bc742181ded4930842b46e9507372f0b1b963James DongOMX_VC_CHROMA_VERT
3660c1bc742181ded4930842b46e9507372f0b1b963James Dong
3670c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=4
3680c1bc742181ded4930842b46e9507372f0b1b963James Dong
3690c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDMIA    pSrcAbove, {tVal6,tVal7}            ;// tVal 6 to 7 = pSrcAbove[0 to 7]
3700c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      return, #OMX_Sts_NoErr
3710c1bc742181ded4930842b46e9507372f0b1b963James Dong
3720c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[0 to 7]   = tVal 6 to 7
3730c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[8 to 15]  = tVal 6 to 7
3740c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[16 to 23] = tVal 6 to 7
3750c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[24 to 31] = tVal 6 to 7
3760c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[32 to 39] = tVal 6 to 7
3770c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[40 to 47] = tVal 6 to 7
3780c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[48 to 55] = tVal 6 to 7
3790c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[56 to 63] = tVal 6 to 7
3800c1bc742181ded4930842b46e9507372f0b1b963James Dong
3810c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_EXIT                                       ;// Macro to exit midway-break frm case
3820c1bc742181ded4930842b46e9507372f0b1b963James Dong
3830c1bc742181ded4930842b46e9507372f0b1b963James DongOMX_VC_CHROMA_PLANE
3840c1bc742181ded4930842b46e9507372f0b1b963James Dong
3850c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=3
3860c1bc742181ded4930842b46e9507372f0b1b963James Dong
3870c1bc742181ded4930842b46e9507372f0b1b963James Dong        RSB      tVal14, leftStep, leftStep, LSL #3  ;// 7*leftStep
3880c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB     tVal7, [pSrcAbove, #+7]             ;// pSrcAbove[7]
3890c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB     tVal6, [pSrcLeft, +tVal14]          ;// pSrcLeft[7*leftStep]
3900c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB     tVal8, [pSrcAboveLeft]              ;// pSrcAboveLeft[0]
3910c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB     tVal9, [pSrcAbove, #+6 ]            ;// pSrcAbove[6]
3920c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB     tVal10,[pSrcAbove]                  ;// pSrcAbove[0]
3930c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal2, tVal7, tVal6                 ;// pSrcAbove[7] + pSrcLeft[7*leftStep]
3940c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB      tVal6, tVal6, tVal8                 ;// V0 = pSrcLeft[7*leftStep] - pSrcAboveLeft[0]
3950c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB      tVal7, tVal7, tVal8                 ;// H0 = pSrcAbove[7] - pSrcAboveLeft[0]
3960c1bc742181ded4930842b46e9507372f0b1b963James Dong        LSL      tVal2, tVal2, #4                    ;// a = 16 * (pSrcAbove[15] + pSrcLeft[15*lS])
3970c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal2, tVal2, #16                   ;// a + 16
3980c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB      tVal9, tVal9,tVal10                 ;// pSrcAbove[6] - pSrcAbove[0]
3990c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB     tVal8, [pSrcAbove,#+5]              ;// pSrcAbove[5]
4000c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB     tVal10,[pSrcAbove,#+1]              ;// pSrcAbove[1]
4010c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal9, tVal9, tVal9, LSL #1         ;// H1 = 3 * (pSrcAbove[6] - pSrcAbove[0])
4020c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal7, tVal9, tVal7, LSL #2         ;// H = H1 + H0
4030c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB      tVal8, tVal8, tVal10                ;// pSrcAbove[5] - pSrcAbove[1]
4040c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB     tVal9, [pSrcAbove,#+4]              ;// pSrcAbove[4]
4050c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB     tVal10,[pSrcAbove,#+2]              ;// pSrcAbove[2]
4060c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal7, tVal7, tVal8, LSL #1         ;// H = H + H2
4070c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB      tVal11, tVal14,leftStep             ;// 6*leftStep
4080c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal11, pSrcLeft, tVal11            ;// pSrcLeft + 6*leftStep
4090c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      tVal12, pSrcLeft                    ;// pSrcLeft
4100c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB      tVal9, tVal9, tVal10                ;// pSrcAbove[4] - pSrcAbove[2]
4110c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal7, tVal7, tVal9                 ;// H = H + H3
4120c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal8, [tVal11],-leftStep           ;// pSrcLeft[6*leftStep]
4130c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal10,[tVal12],+leftStep           ;// pSrcLeft[0]
4140c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal7, tVal7, tVal7, LSL #4         ;// 17 * H
4150c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal7, tVal7, #16                   ;// 17 * H + 16
4160c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB      tVal8, tVal8, tVal10                ;// pSrcLeft[6*leftStep] - pSrcLeft[0]
4170c1bc742181ded4930842b46e9507372f0b1b963James Dong        ASR      b, tVal7, #5                        ;// b = (17 * H + 16) >> 5
4180c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal8, tVal8, tVal8, LSL #1         ;// V1 = 3 * (pSrcLeft[6*leftStep] - pSrcLeft[0])
4190c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal6, tVal8, tVal6, LSL #2         ;// V = V0 +V1
4200c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal8, [tVal11],-leftStep           ;// pSrcLeft[5*leftStep]
4210c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal10,[tVal12],+leftStep           ;// pSrcLeft[leftStep]
4220c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal7, b, b, LSL #1                 ;// 3*b
4230c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB      tVal2, tVal2, tVal7                 ;// a + 16 - 3*b
4240c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB      tVal7, tVal8, tVal10                ;// pSrcLeft[5*leftStep] - pSrcLeft[leftStep]
4250c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal8, [tVal11],-leftStep           ;// pSrcLeft[4*leftStep]
4260c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal10,[tVal12],+leftStep           ;// pSrcLeft[2*leftStep]
4270c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal6, tVal6, tVal7, LSL #1         ;// V = V + V2
4280c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR      r0x00FF00FF, =MASK_CONST            ;// r0x00FF00FF = 0x00FF00FF
4290c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB      tVal7, tVal8, tVal10                ;// pSrcLeft[4*leftStep] - pSrcLeft[2*leftStep]
4300c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal6, tVal6, tVal7                 ;// V = V + V7
4310c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB      dstStep, dstStep, #4                ;// dstStep - 4
4320c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal6, tVal6, tVal6, LSL #4         ;// 17*V
4330c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal6, tVal6, #16                   ;// 17*V + 16
4340c1bc742181ded4930842b46e9507372f0b1b963James Dong
4350c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=1
4360c1bc742181ded4930842b46e9507372f0b1b963James Dong
4370c1bc742181ded4930842b46e9507372f0b1b963James Dong        ASR      c, tVal6, #5                        ;// c = (17*V + 16)>>5
4380c1bc742181ded4930842b46e9507372f0b1b963James Dong
4390c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=1
4400c1bc742181ded4930842b46e9507372f0b1b963James Dong
4410c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal6, c, c, LSL #1                 ;// 3*c
4420c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTH     c, c                                ;// only in half word
4430c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB      tVal6, tVal2, tVal6                 ;// a - 3*b - 3*c + 16
4440c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      c, c, c, LSL #16                    ;// c c
4450c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal7, b, b                         ;// 2b
4460c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal2, tVal6, tVal7                 ;// pp2 = d + 2*b
4470c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal7, tVal7, b                     ;// 3b
4480c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      p2p0,   tVal6,  tVal2,  LSL #16     ;// p2p0   = pack {p2, p0}
4490c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTH     b, b
4500c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTH     tVal7, tVal7
4510c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      b, b, b, LSL #16                    ;// {b,b}
4520c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      tVal7, tVal7, tVal7, LSL #16        ;// {3b,3b}
4530c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD16   p3p1,   p2p0, b                     ;// p3p1   = p2p0 + {b,b}
4540c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD16   p6p4,   p3p1, tVal7                 ;// p6p4   = p3p1 + {3b,3b}
4550c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD16   p7p5,   p6p4, b                     ;// p7p5   = p6p4 + {b,b}
4560c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      outerCount, #BLK_SIZE               ;// Outer Loop Count
4570c1bc742181ded4930842b46e9507372f0b1b963James Dong
4580c1bc742181ded4930842b46e9507372f0b1b963James DongLOOP_PLANE
4590c1bc742181ded4930842b46e9507372f0b1b963James Dong
4600c1bc742181ded4930842b46e9507372f0b1b963James Dong        USAT16   p7p5,   #13, p7p5                    ;// clip13(p7) clip13(p5)
4610c1bc742181ded4930842b46e9507372f0b1b963James Dong        USAT16   p6p4,   #13, p6p4                    ;// clip13(p6) clip13(p4)
4620c1bc742181ded4930842b46e9507372f0b1b963James Dong        USAT16   p3p1,   #13, p3p1                    ;// clip13(p3) clip13(p1)
4630c1bc742181ded4930842b46e9507372f0b1b963James Dong        USAT16   p2p0,   #13, p2p0                    ;// clip13(p2) clip13(p0)
4640c1bc742181ded4930842b46e9507372f0b1b963James Dong
4650c1bc742181ded4930842b46e9507372f0b1b963James Dong        AND      pp7pp5, r0x00FF00FF, p7p5, ASR #5    ;// clip8(p7) clip8(p5)
4660c1bc742181ded4930842b46e9507372f0b1b963James Dong        AND      pp6pp4, r0x00FF00FF, p6p4, ASR #5    ;// clip8(p6) clip8(p4)
4670c1bc742181ded4930842b46e9507372f0b1b963James Dong        AND      pp3pp1, r0x00FF00FF, p3p1, ASR #5    ;// clip8(p3) clip8(p1)
4680c1bc742181ded4930842b46e9507372f0b1b963James Dong        AND      pp2pp0, r0x00FF00FF, p2p0, ASR #5    ;// clip8(p2) clip8(p0)
4690c1bc742181ded4930842b46e9507372f0b1b963James Dong
4700c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUBS     outerCount, outerCount, #1           ;// outerCount--
4710c1bc742181ded4930842b46e9507372f0b1b963James Dong
4720c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      p3210, pp2pp0, pp3pp1, LSL #8        ;// pack {p3,p2, p1, p0}
4730c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      p3210, [pDst], #4                    ;// store {pDst[0] to pDst[3]}
4740c1bc742181ded4930842b46e9507372f0b1b963James Dong
4750c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      p7654, pp6pp4, pp7pp5, LSL #8        ;// pack {p7,p6, p5, p4}
4760c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    p7654, [pDst], dstStep               ;// store {pDst[4] to pDst[7]}
4770c1bc742181ded4930842b46e9507372f0b1b963James Dong
4780c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD16   p7p5,   p7p5,   c                    ;// {p7 + c}, {p5 + c}
4790c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD16   p6p4,   p6p4,   c                    ;// {p6 + c}, {p4 + c}
4800c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD16   p3p1,   p3p1,   c                    ;// {p3 + c}, {p1 + c}
4810c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD16   p2p0,   p2p0,   c                    ;// {p2 + c}, {p0 + c}
4820c1bc742181ded4930842b46e9507372f0b1b963James Dong
4830c1bc742181ded4930842b46e9507372f0b1b963James Dong        BNE      LOOP_PLANE                           ;// Loop for 8 times
4840c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      return, #OMX_Sts_NoErr
4850c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_END
4860c1bc742181ded4930842b46e9507372f0b1b963James Dong
4870c1bc742181ded4930842b46e9507372f0b1b963James Dong        ENDIF ;// ARM1136JS
4880c1bc742181ded4930842b46e9507372f0b1b963James Dong
4890c1bc742181ded4930842b46e9507372f0b1b963James Dong
4900c1bc742181ded4930842b46e9507372f0b1b963James Dong
4910c1bc742181ded4930842b46e9507372f0b1b963James Dong        END
4920c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-----------------------------------------------------------------------------------------------
4930c1bc742181ded4930842b46e9507372f0b1b963James Dong;// omxVCM4P10_PredictIntraChroma_8x8 ends
4940c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-----------------------------------------------------------------------------------------------
495