10c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
20c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
30c1bc742181ded4930842b46e9507372f0b1b963James Dong;// File Name:  omxVCM4P10_PredictIntra_16x16_s.s
40c1bc742181ded4930842b46e9507372f0b1b963James Dong;// OpenMAX DL: v1.0.2
50c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Revision:   9641
60c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Date:       Thursday, February 7, 2008
70c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
80c1bc742181ded4930842b46e9507372f0b1b963James Dong;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
90c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
100c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
110c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
120c1bc742181ded4930842b46e9507372f0b1b963James Dong
130c1bc742181ded4930842b46e9507372f0b1b963James Dong        INCLUDE omxtypes_s.h
140c1bc742181ded4930842b46e9507372f0b1b963James Dong        INCLUDE armCOMM_s.h
150c1bc742181ded4930842b46e9507372f0b1b963James Dong
160c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_VARIANTS ARM1136JS
170c1bc742181ded4930842b46e9507372f0b1b963James Dong
180c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-------------------------------------------------------
190c1bc742181ded4930842b46e9507372f0b1b963James Dong;// This table for implementing switch case of C in asm by
200c1bc742181ded4930842b46e9507372f0b1b963James Dong;// the mehtod of two levels of indexing.
210c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-------------------------------------------------------
220c1bc742181ded4930842b46e9507372f0b1b963James Dong
230c1bc742181ded4930842b46e9507372f0b1b963James Dong    M_TABLE armVCM4P10_pIndexTable16x16
240c1bc742181ded4930842b46e9507372f0b1b963James Dong    DCD  OMX_VC_16X16_VERT, OMX_VC_16X16_HOR
250c1bc742181ded4930842b46e9507372f0b1b963James Dong    DCD  OMX_VC_16X16_DC,   OMX_VC_16X16_PLANE
260c1bc742181ded4930842b46e9507372f0b1b963James Dong
270c1bc742181ded4930842b46e9507372f0b1b963James Dong    IF ARM1136JS
280c1bc742181ded4930842b46e9507372f0b1b963James Dong
290c1bc742181ded4930842b46e9507372f0b1b963James Dong;//--------------------------------------------
300c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Constants
310c1bc742181ded4930842b46e9507372f0b1b963James Dong;//--------------------------------------------
320c1bc742181ded4930842b46e9507372f0b1b963James DongBLK_SIZE        EQU 0x10
330c1bc742181ded4930842b46e9507372f0b1b963James DongMUL_CONST0      EQU 0x01010101
340c1bc742181ded4930842b46e9507372f0b1b963James DongMUL_CONST1      EQU 0x00060004
350c1bc742181ded4930842b46e9507372f0b1b963James DongMUL_CONST2      EQU 0x00070005
360c1bc742181ded4930842b46e9507372f0b1b963James DongMUL_CONST3      EQU 0x00030001
370c1bc742181ded4930842b46e9507372f0b1b963James DongMASK_CONST      EQU 0x00FF00FF
380c1bc742181ded4930842b46e9507372f0b1b963James Dong
390c1bc742181ded4930842b46e9507372f0b1b963James Dong;//--------------------------------------------
400c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Scratch variable
410c1bc742181ded4930842b46e9507372f0b1b963James Dong;//--------------------------------------------
420c1bc742181ded4930842b46e9507372f0b1b963James Dongy               RN 12
430c1bc742181ded4930842b46e9507372f0b1b963James Dongpc              RN 15
440c1bc742181ded4930842b46e9507372f0b1b963James Dong
450c1bc742181ded4930842b46e9507372f0b1b963James Dongreturn          RN 0
460c1bc742181ded4930842b46e9507372f0b1b963James DonginnerCount      RN 0
470c1bc742181ded4930842b46e9507372f0b1b963James DongouterCount      RN 1
480c1bc742181ded4930842b46e9507372f0b1b963James DongpSrcLeft2       RN 1
490c1bc742181ded4930842b46e9507372f0b1b963James DongpDst2           RN 2
500c1bc742181ded4930842b46e9507372f0b1b963James Dongsum             RN 6
510c1bc742181ded4930842b46e9507372f0b1b963James DongpTable          RN 9
520c1bc742181ded4930842b46e9507372f0b1b963James Dongtemp1           RN 10
530c1bc742181ded4930842b46e9507372f0b1b963James Dongtemp2           RN 12
540c1bc742181ded4930842b46e9507372f0b1b963James DongcMul1           RN 11
550c1bc742181ded4930842b46e9507372f0b1b963James DongcMul2           RN 12
560c1bc742181ded4930842b46e9507372f0b1b963James Dongcount           RN 12
570c1bc742181ded4930842b46e9507372f0b1b963James DongdstStepx2       RN 11
580c1bc742181ded4930842b46e9507372f0b1b963James DongleftStepx2      RN 14
590c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x01010101     RN 10
600c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x00FF00FF     RN 11
610c1bc742181ded4930842b46e9507372f0b1b963James Dong
620c1bc742181ded4930842b46e9507372f0b1b963James DongtVal0           RN 0
630c1bc742181ded4930842b46e9507372f0b1b963James DongtVal1           RN 1
640c1bc742181ded4930842b46e9507372f0b1b963James DongtVal2           RN 2
650c1bc742181ded4930842b46e9507372f0b1b963James DongtVal3           RN 3
660c1bc742181ded4930842b46e9507372f0b1b963James DongtVal4           RN 4
670c1bc742181ded4930842b46e9507372f0b1b963James DongtVal5           RN 5
680c1bc742181ded4930842b46e9507372f0b1b963James DongtVal6           RN 6
690c1bc742181ded4930842b46e9507372f0b1b963James DongtVal7           RN 7
700c1bc742181ded4930842b46e9507372f0b1b963James DongtVal8           RN 8
710c1bc742181ded4930842b46e9507372f0b1b963James DongtVal9           RN 9
720c1bc742181ded4930842b46e9507372f0b1b963James DongtVal10          RN 10
730c1bc742181ded4930842b46e9507372f0b1b963James DongtVal11          RN 11
740c1bc742181ded4930842b46e9507372f0b1b963James DongtVal12          RN 12
750c1bc742181ded4930842b46e9507372f0b1b963James DongtVal14          RN 14
760c1bc742181ded4930842b46e9507372f0b1b963James Dong
770c1bc742181ded4930842b46e9507372f0b1b963James Dongb               RN 12
780c1bc742181ded4930842b46e9507372f0b1b963James Dongc               RN 14
790c1bc742181ded4930842b46e9507372f0b1b963James Dong
800c1bc742181ded4930842b46e9507372f0b1b963James Dongp2p0            RN 0
810c1bc742181ded4930842b46e9507372f0b1b963James Dongp3p1            RN 1
820c1bc742181ded4930842b46e9507372f0b1b963James Dongp6p4            RN 2
830c1bc742181ded4930842b46e9507372f0b1b963James Dongp7p5            RN 4
840c1bc742181ded4930842b46e9507372f0b1b963James Dongp10p8           RN 6
850c1bc742181ded4930842b46e9507372f0b1b963James Dongp11p9           RN 7
860c1bc742181ded4930842b46e9507372f0b1b963James Dongp14p12          RN 8
870c1bc742181ded4930842b46e9507372f0b1b963James Dongp15p13          RN 9
880c1bc742181ded4930842b46e9507372f0b1b963James Dong
890c1bc742181ded4930842b46e9507372f0b1b963James Dongp3210           RN 10
900c1bc742181ded4930842b46e9507372f0b1b963James Dongp7654           RN 10
910c1bc742181ded4930842b46e9507372f0b1b963James Dongp111098         RN 10
920c1bc742181ded4930842b46e9507372f0b1b963James Dongp15141312       RN 10
930c1bc742181ded4930842b46e9507372f0b1b963James Dong
940c1bc742181ded4930842b46e9507372f0b1b963James Dong;//--------------------------------------------
950c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare input registers
960c1bc742181ded4930842b46e9507372f0b1b963James Dong;//--------------------------------------------
970c1bc742181ded4930842b46e9507372f0b1b963James DongpSrcLeft        RN 0    ;// input pointer
980c1bc742181ded4930842b46e9507372f0b1b963James DongpSrcAbove       RN 1    ;// input pointer
990c1bc742181ded4930842b46e9507372f0b1b963James DongpSrcAboveLeft   RN 2    ;// input pointer
1000c1bc742181ded4930842b46e9507372f0b1b963James DongpDst            RN 3    ;// output pointer
1010c1bc742181ded4930842b46e9507372f0b1b963James DongleftStep        RN 4    ;// input variable
1020c1bc742181ded4930842b46e9507372f0b1b963James DongdstStep         RN 5    ;// input variable
1030c1bc742181ded4930842b46e9507372f0b1b963James DongpredMode        RN 6    ;// input variable
1040c1bc742181ded4930842b46e9507372f0b1b963James Dongavailability    RN 7    ;// input variable
1050c1bc742181ded4930842b46e9507372f0b1b963James Dong
1060c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-----------------------------------------------------------------------------------------------
1070c1bc742181ded4930842b46e9507372f0b1b963James Dong;// omxVCM4P10_PredictIntra_16x16 starts
1080c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-----------------------------------------------------------------------------------------------
1090c1bc742181ded4930842b46e9507372f0b1b963James Dong
1100c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Write function header
1110c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_START omxVCM4P10_PredictIntra_16x16, r11
1120c1bc742181ded4930842b46e9507372f0b1b963James Dong
1130c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Define stack arguments
1140c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ARG    LeftStep,     4
1150c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ARG    DstStep,      4
1160c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ARG    PredMode,     4
1170c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ARG    Availability, 4
1180c1bc742181ded4930842b46e9507372f0b1b963James Dong
1190c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=4
1200c1bc742181ded4930842b46e9507372f0b1b963James Dong
1210c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR      pTable,=armVCM4P10_pIndexTable16x16 ;// Load index table for switch case
1220c1bc742181ded4930842b46e9507372f0b1b963James Dong
1230c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Load argument from the stack
1240c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR    predMode, PredMode                  ;// Arg predMode loaded from stack to reg
1250c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR    leftStep, LeftStep                  ;// Arg leftStep loaded from stack to reg
1260c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR    dstStep,  DstStep                   ;// Arg dstStep loaded from stack to reg
1270c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR    availability, Availability          ;// Arg availability loaded from stack to reg
1280c1bc742181ded4930842b46e9507372f0b1b963James Dong
1290c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      y, #BLK_SIZE                        ;// Outer Loop Count
1300c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR      pc, [pTable, predMode, LSL #2]      ;// Branch to the case based on preMode
1310c1bc742181ded4930842b46e9507372f0b1b963James Dong
1320c1bc742181ded4930842b46e9507372f0b1b963James DongOMX_VC_16X16_VERT
1330c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDM      pSrcAbove, {tVal6,tVal7,tVal8,tVal9};// tVal 6 to 9 = pSrcAbove[0 to 15]
1340c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      dstStepx2, dstStep, dstStep         ;// double dstStep
1350c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      pDst2, pDst, dstStep                ;// pDst2- pDst advanced by dstStep
1360c1bc742181ded4930842b46e9507372f0b1b963James Dong
1370c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=2                       ;// Stall outside the loop
1380c1bc742181ded4930842b46e9507372f0b1b963James Dong
1390c1bc742181ded4930842b46e9507372f0b1b963James DongLOOP_VERT
1400c1bc742181ded4930842b46e9507372f0b1b963James Dong        STM      pDst, {tVal6,tVal7,tVal8,tVal9}     ;// pDst[0 to 15] = tVal 6 to 9
1410c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUBS     y, y, #2                            ;// y--
1420c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      pDst, pDst, dstStepx2               ;// pDst advanced by dstStep
1430c1bc742181ded4930842b46e9507372f0b1b963James Dong        STM      pDst2, {tVal6,tVal7,tVal8,tVal9}    ;// pDst2[16 to 31] = tVal 6 to 9
1440c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      pDst2, pDst2, dstStepx2             ;// pDst advanced by dstStep
1450c1bc742181ded4930842b46e9507372f0b1b963James Dong        BNE      LOOP_VERT                           ;// Loop for 8 times
1460c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      return, #OMX_Sts_NoErr
1470c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_EXIT
1480c1bc742181ded4930842b46e9507372f0b1b963James Dong
1490c1bc742181ded4930842b46e9507372f0b1b963James Dong
1500c1bc742181ded4930842b46e9507372f0b1b963James DongOMX_VC_16X16_HOR
1510c1bc742181ded4930842b46e9507372f0b1b963James Dong
1520c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=6
1530c1bc742181ded4930842b46e9507372f0b1b963James Dong
1540c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR      r0x01010101, =MUL_CONST0            ;// Const to repeat the byte in reg 4 times
1550c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      y, #4                               ;// Outer Loop Count
1560c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal6, [pSrcLeft], +leftStep        ;// tVal6 = pSrcLeft[0 to 3]
1570c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      pDst2, pDst, dstStep                ;// pDst2- pDst advanced by dstStep
1580c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal7, [pSrcLeft], +leftStep        ;// tVal1 = pSrcLeft[4 to 7]
1590c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      dstStepx2, dstStep, dstStep         ;// double dstStep
1600c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB      dstStepx2, dstStepx2, #12           ;// double dstStep  minus 12
1610c1bc742181ded4930842b46e9507372f0b1b963James Dong
1620c1bc742181ded4930842b46e9507372f0b1b963James DongLOOP_HOR
1630c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal8, [pSrcLeft], +leftStep        ;// tVal8 = pSrcLeft[0 to 3]
1640c1bc742181ded4930842b46e9507372f0b1b963James Dong        MUL      tVal6, tVal6, r0x01010101           ;// replicate the val in all the bytes
1650c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal9, [pSrcLeft], +leftStep        ;// tVal9 = pSrcLeft[4 to 7]
1660c1bc742181ded4930842b46e9507372f0b1b963James Dong        MUL      tVal7, tVal7, r0x01010101           ;// replicate the val in all the bytes
1670c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUBS     y, y, #1                            ;// y--
1680c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      tVal6, [pDst],  #+4                 ;// store {tVal6} at pDst[0 to 3]
1690c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      tVal7, [pDst2], #+4                 ;// store {tVal7} at pDst2[0 to 3]
1700c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      tVal6, [pDst],  #+4                 ;// store {tVal6} at pDst[4 to 7]
1710c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      tVal7, [pDst2], #+4                 ;// store {tVal7} at pDst2[4 to 7]
1720c1bc742181ded4930842b46e9507372f0b1b963James Dong        MUL      tVal8, tVal8, r0x01010101           ;// replicate the val in all the bytes
1730c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      tVal6, [pDst],  #+4                 ;// store {tVal6} at pDst[8 to 11]
1740c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      tVal7, [pDst2], #+4                 ;// store {tVal7} at pDst2[8 to 11]
1750c1bc742181ded4930842b46e9507372f0b1b963James Dong        MUL      tVal9, tVal9, r0x01010101           ;// replicate the val in all the bytes
1760c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    tVal6, [pDst], dstStepx2            ;// store {tVal6} at pDst[12 to 15]
1770c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    tVal7, [pDst2], dstStepx2           ;// store {tVal7} at pDst2[12 to 15]
1780c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      tVal8, [pDst],  #+4                 ;// store {tVal6} at pDst[0 to 3]
1790c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      tVal9, [pDst2], #+4                 ;// store {tVal7} at pDst2[0 to 3]
1800c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      tVal8, [pDst],  #+4                 ;// store {tVal6} at pDst[4 to 7]
1810c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      tVal9, [pDst2], #+4                 ;// store {tVal7} at pDst2[4 to 7]
1820c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      tVal8, [pDst],  #+4                 ;// store {tVal6} at pDst[8 to 11]
1830c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      tVal9, [pDst2], #+4                 ;// store {tVal7} at pDst2[8 to 11]
1840c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    tVal8, [pDst], dstStepx2            ;// store {tVal6} at pDst[12 to 15]
1850c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal6, [pSrcLeft], +leftStep        ;// tVal6 = pSrcLeft[0 to 3]
1860c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    tVal9, [pDst2], dstStepx2           ;// store {tVal7} at pDst2[12 to 15]
1870c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal7, [pSrcLeft], +leftStep        ;// tVal7 = pSrcLeft[4 to 7]
1880c1bc742181ded4930842b46e9507372f0b1b963James Dong        BNE      LOOP_HOR                            ;// Loop for 3 times
1890c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      return, #OMX_Sts_NoErr
1900c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_EXIT
1910c1bc742181ded4930842b46e9507372f0b1b963James Dong
1920c1bc742181ded4930842b46e9507372f0b1b963James DongOMX_VC_16X16_DC
1930c1bc742181ded4930842b46e9507372f0b1b963James Dong
1940c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=2
1950c1bc742181ded4930842b46e9507372f0b1b963James Dong
1960c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      count, #0                           ;// count = 0
1970c1bc742181ded4930842b46e9507372f0b1b963James Dong        TST      availability, #OMX_VC_UPPER         ;// if(availability & #OMX_VC_UPPER)
1980c1bc742181ded4930842b46e9507372f0b1b963James Dong        BEQ      TST_LEFT                            ;// Jump to Left if not upper
1990c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDM      pSrcAbove,{tVal8,tVal9,tVal10,tVal11};// tVal 8 to 11 = pSrcAbove[0 to 15]
2000c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      count, count, #1                    ;// if upper inc count by 1
2010c1bc742181ded4930842b46e9507372f0b1b963James Dong
2020c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=2
2030c1bc742181ded4930842b46e9507372f0b1b963James Dong
2040c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTB16   tVal2, tVal8                        ;// pSrcAbove[0, 2]
2050c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTB16   tVal6, tVal9                        ;// pSrcAbove[4, 6]
2060c1bc742181ded4930842b46e9507372f0b1b963James Dong        UADD16   tVal2, tVal2, tVal6                 ;// pSrcAbove[0, 2] + pSrcAbove[4, 6]
2070c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTB16   tVal8, tVal8, ROR #8                ;// pSrcAbove[1, 3]
2080c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTB16   tVal9, tVal9, ROR #8                ;// pSrcAbove[5, 7]
2090c1bc742181ded4930842b46e9507372f0b1b963James Dong        UADD16   tVal8, tVal8, tVal9                 ;// pSrcAbove[1, 3] + pSrcAbove[5, 7]
2100c1bc742181ded4930842b46e9507372f0b1b963James Dong        UADD16   tVal2, tVal2, tVal8                 ;// sum(pSrcAbove[0] to pSrcAbove[7])
2110c1bc742181ded4930842b46e9507372f0b1b963James Dong
2120c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTB16   tVal8, tVal10                       ;// pSrcAbove[8, 10]
2130c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTB16   tVal9, tVal11                       ;// pSrcAbove[12, 14]
2140c1bc742181ded4930842b46e9507372f0b1b963James Dong        UADD16   tVal8, tVal8, tVal9                 ;// pSrcAbove[8, 10] + pSrcAbove[12, 14]
2150c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTB16   tVal10, tVal10, ROR #8              ;// pSrcAbove[9, 11]
2160c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTB16   tVal11, tVal11, ROR #8              ;// pSrcAbove[13, 15]
2170c1bc742181ded4930842b46e9507372f0b1b963James Dong        UADD16   tVal10, tVal10, tVal11              ;// pSrcAbove[9, 11] + pSrcAbove[13, 15]
2180c1bc742181ded4930842b46e9507372f0b1b963James Dong        UADD16   tVal8, tVal8, tVal10                ;// sum(pSrcAbove[8] to pSrcAbove[15])
2190c1bc742181ded4930842b46e9507372f0b1b963James Dong
2200c1bc742181ded4930842b46e9507372f0b1b963James Dong        UADD16   tVal2, tVal2, tVal8                 ;// sum(pSrcAbove[0] to pSrcAbove[15])
2210c1bc742181ded4930842b46e9507372f0b1b963James Dong
2220c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=1
2230c1bc742181ded4930842b46e9507372f0b1b963James Dong
2240c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal2, tVal2, tVal2, LSR #16        ;// sum(pSrcAbove[0] to pSrcAbove[15])
2250c1bc742181ded4930842b46e9507372f0b1b963James Dong
2260c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=1
2270c1bc742181ded4930842b46e9507372f0b1b963James Dong
2280c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTH     sum, tVal2                          ;// Extract the lower half for result
2290c1bc742181ded4930842b46e9507372f0b1b963James Dong
2300c1bc742181ded4930842b46e9507372f0b1b963James DongTST_LEFT
2310c1bc742181ded4930842b46e9507372f0b1b963James Dong        TST      availability, #OMX_VC_LEFT
2320c1bc742181ded4930842b46e9507372f0b1b963James Dong        BEQ      TST_COUNT
2330c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      leftStepx2, leftStep,leftStep       ;// leftStepx2 = 2 * leftStep
2340c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      pSrcLeft2, pSrcLeft, leftStep       ;// pSrcLeft2 = pSrcLeft + leftStep
2350c1bc742181ded4930842b46e9507372f0b1b963James Dong
2360c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal8, [pSrcLeft],  +leftStepx2     ;// tVal8 = pSrcLeft[0]
2370c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal9, [pSrcLeft2], +leftStepx2     ;// tVal9 = pSrcLeft[1]
2380c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal10, [pSrcLeft], +leftStepx2     ;// tVal10= pSrcLeft[2]
2390c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal11, [pSrcLeft2],+leftStepx2     ;// tVal11= pSrcLeft[3]
2400c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal7, tVal8, tVal9                 ;// tVal7 = tVal8 + tVal9
2410c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      count, count, #1                    ;// Inc Counter if Left is available
2420c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal6, tVal10, tVal11               ;// tVal6 = tVal10 + tVal11
2430c1bc742181ded4930842b46e9507372f0b1b963James Dong
2440c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal8, [pSrcLeft],  +leftStepx2     ;// tVal8 = pSrcLeft[0]
2450c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal9, [pSrcLeft2], +leftStepx2     ;// tVal9 = pSrcLeft[1]
2460c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal10, [pSrcLeft], +leftStepx2     ;// tVal10= pSrcLeft[2]
2470c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal11, [pSrcLeft2],+leftStepx2     ;// tVal11= pSrcLeft[3]
2480c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      sum, tVal7, tVal6                   ;// sum = tVal8 + tVal10
2490c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal8, tVal8, tVal9                 ;// tVal8 = tVal8 + tVal9
2500c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal10, tVal10, tVal11              ;// tVal10= tVal10 + tVal11
2510c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal7, tVal8, tVal10                ;// tVal7 = tVal8 + tVal10
2520c1bc742181ded4930842b46e9507372f0b1b963James Dong
2530c1bc742181ded4930842b46e9507372f0b1b963James Dong
2540c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal8, [pSrcLeft],  +leftStepx2     ;// tVal8 = pSrcLeft[0]
2550c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal9, [pSrcLeft2], +leftStepx2     ;// tVal9 = pSrcLeft[1]
2560c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal10, [pSrcLeft], +leftStepx2     ;// tVal10= pSrcLeft[2]
2570c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal11, [pSrcLeft2],+leftStepx2     ;// tVal11= pSrcLeft[3]
2580c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      sum, sum, tVal7                     ;// sum = sum + tVal7
2590c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal8, tVal8, tVal9                 ;// tVal8 = tVal8 + tVal9
2600c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal10, tVal10, tVal11              ;// tVal10= tVal10 + tVal11
2610c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal7, tVal8, tVal10                ;// tVal7 = tVal8 + tVal10
2620c1bc742181ded4930842b46e9507372f0b1b963James Dong
2630c1bc742181ded4930842b46e9507372f0b1b963James Dong
2640c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal8, [pSrcLeft],  +leftStepx2     ;// tVal8 = pSrcLeft[0]
2650c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal9, [pSrcLeft2], +leftStepx2     ;// tVal9 = pSrcLeft[1]
2660c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal10, [pSrcLeft], +leftStepx2     ;// tVal10= pSrcLeft[2]
2670c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal11, [pSrcLeft2],+leftStepx2     ;// tVal11= pSrcLeft[3]
2680c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      sum, sum, tVal7                     ;// sum = sum + tVal7
2690c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal8, tVal8, tVal9                 ;// tVal8 = tVal8 + tVal9
2700c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal10, tVal10, tVal11              ;// tVal10= tVal10 + tVal11
2710c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal7, tVal8, tVal10                ;// tVal7 = tVal8 + tVal10
2720c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      sum, sum, tVal7                     ;// sum = sum + tVal7
2730c1bc742181ded4930842b46e9507372f0b1b963James Dong
2740c1bc742181ded4930842b46e9507372f0b1b963James DongTST_COUNT
2750c1bc742181ded4930842b46e9507372f0b1b963James Dong        CMP      count, #0                           ;// if(count == 0)
2760c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOVEQ    sum, #128                           ;// sum = 128 if(count == 0)
2770c1bc742181ded4930842b46e9507372f0b1b963James Dong        BEQ      TST_COUNT0                          ;// if(count == 0)
2780c1bc742181ded4930842b46e9507372f0b1b963James Dong        CMP      count, #1                           ;// if(count == 1)
2790c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADDEQ    sum, sum, #8                        ;// sum += 8 if(count == 1)
2800c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADDNE    sum, sum, tVal2                     ;// sum = sumleft + sumupper
2810c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADDNE    sum, sum, #16                       ;// sum += 16 if(count == 2)
2820c1bc742181ded4930842b46e9507372f0b1b963James Dong
2830c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=1
2840c1bc742181ded4930842b46e9507372f0b1b963James Dong
2850c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTH     sum, sum                            ;// sum only byte rest cleared
2860c1bc742181ded4930842b46e9507372f0b1b963James Dong
2870c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=1
2880c1bc742181ded4930842b46e9507372f0b1b963James Dong
2890c1bc742181ded4930842b46e9507372f0b1b963James Dong        LSREQ    sum, sum, #4                        ;// sum >> 4 if(count == 1)
2900c1bc742181ded4930842b46e9507372f0b1b963James Dong
2910c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=1
2920c1bc742181ded4930842b46e9507372f0b1b963James Dong
2930c1bc742181ded4930842b46e9507372f0b1b963James Dong        LSRNE    sum, sum, #5                        ;// sum >> 5 if(count == 2)
2940c1bc742181ded4930842b46e9507372f0b1b963James Dong
2950c1bc742181ded4930842b46e9507372f0b1b963James DongTST_COUNT0
2960c1bc742181ded4930842b46e9507372f0b1b963James Dong
2970c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=1
2980c1bc742181ded4930842b46e9507372f0b1b963James Dong
2990c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      sum, sum, sum, LSL #8               ;// sum replicated in two halfword
3000c1bc742181ded4930842b46e9507372f0b1b963James Dong
3010c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=1
3020c1bc742181ded4930842b46e9507372f0b1b963James Dong
3030c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      tVal6, sum, sum, LSL #16            ;// sum  replicated in all bytes
3040c1bc742181ded4930842b46e9507372f0b1b963James Dong        CPY      tVal7, tVal6                        ;// tVal1 = tVal0
3050c1bc742181ded4930842b46e9507372f0b1b963James Dong        CPY      tVal8, tVal6                        ;// tVal2 = tVal0
3060c1bc742181ded4930842b46e9507372f0b1b963James Dong        CPY      tVal9, tVal6                        ;// tVal3 = tVal0
3070c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      dstStepx2, dstStep, dstStep         ;// double dstStep
3080c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      pDst2, pDst, dstStep                ;// pDst2- pDst advanced by dstStep
3090c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      y, #BLK_SIZE                        ;// Outer Loop Count
3100c1bc742181ded4930842b46e9507372f0b1b963James Dong
3110c1bc742181ded4930842b46e9507372f0b1b963James DongLOOP_DC
3120c1bc742181ded4930842b46e9507372f0b1b963James Dong        STM      pDst, {tVal6,tVal7,tVal8,tVal9}     ;// pDst[0 to 15] = tVal 6 to 9
3130c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUBS     y, y, #2                            ;// y--
3140c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      pDst, pDst, dstStepx2               ;// pDst advanced by dstStep
3150c1bc742181ded4930842b46e9507372f0b1b963James Dong        STM      pDst2, {tVal6,tVal7,tVal8,tVal9}    ;// pDst2[16 to 31] = tVal 6 to 9
3160c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      pDst2, pDst2, dstStepx2             ;// pDst advanced by dstStep
3170c1bc742181ded4930842b46e9507372f0b1b963James Dong        BNE      LOOP_DC                             ;// Loop for 8 times
3180c1bc742181ded4930842b46e9507372f0b1b963James Dong
3190c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      return, #OMX_Sts_NoErr
3200c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_EXIT
3210c1bc742181ded4930842b46e9507372f0b1b963James Dong
3220c1bc742181ded4930842b46e9507372f0b1b963James DongOMX_VC_16X16_PLANE
3230c1bc742181ded4930842b46e9507372f0b1b963James Dong
3240c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=3
3250c1bc742181ded4930842b46e9507372f0b1b963James Dong        RSB      tVal14, leftStep, leftStep, LSL #4  ;// tVal14 = 15*leftStep
3260c1bc742181ded4930842b46e9507372f0b1b963James Dong
3270c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=2
3280c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB     tVal10, [pSrcLeft,  tVal14]         ;// tVal10 = pSrcLeft[15*leftStep]
3290c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB     tVal11, [pSrcAboveLeft]             ;// tVal11 = pSrcAboveLeft[0]
3300c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB     tVal12, [pSrcAbove, #15]
3310c1bc742181ded4930842b46e9507372f0b1b963James Dong
3320c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal2,  tVal12,  tVal10             ;// tVal2  = pSrcAbove[15] + pSrcLeft[15*leftStep]
3330c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB      tVal10, tVal10,  tVal11             ;// tVal10 = V0 = pSrcLeft[15*leftStep] - pSrcAboveLeft[0]
3340c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB      tVal11, tVal12,  tVal11             ;// tVal11 = H0 = pSrcAbove[15] - pSrcAboveLeft[0]
3350c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      tVal2,  tVal2,   LSL #4             ;// tVal2  = a = 16 * (pSrcAbove[15] + pSrcLeft[15*leftStep])
3360c1bc742181ded4930842b46e9507372f0b1b963James Dong
3370c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     tVal11, tVal11, LSL #3              ;// 8*[15]-[-1]
3380c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB    tVal6, [pSrcAbove, #0]
3390c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB    tVal7, [pSrcAbove, #14]
3400c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB     tVal8, tVal7, tVal6
3410c1bc742181ded4930842b46e9507372f0b1b963James Dong        RSB     tVal8, tVal8, tVal8, LSL #3         ;// 7*[14]-[0]
3420c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     tVal11, tVal11, tVal8
3430c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB    tVal6, [pSrcAbove, #1]
3440c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB    tVal7, [pSrcAbove, #13]
3450c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB     tVal8, tVal7, tVal6
3460c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     tVal8, tVal8, tVal8
3470c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     tVal8, tVal8, tVal8, LSL #1         ;// 6*[13]-[1]
3480c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     tVal11, tVal11, tVal8
3490c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB    tVal6, [pSrcAbove, #2]
3500c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB    tVal7, [pSrcAbove, #12]
3510c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB     tVal8, tVal7, tVal6
3520c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     tVal8, tVal8, tVal8, LSL #2         ;// 5*[12]-[2]
3530c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     tVal11, tVal11, tVal8
3540c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB    tVal6, [pSrcAbove, #3]
3550c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB    tVal7, [pSrcAbove, #11]
3560c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB     tVal8, tVal7, tVal6
3570c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     tVal11, tVal11, tVal8, LSL #2       ;// + 4*[11]-[3]
3580c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB    tVal6, [pSrcAbove, #4]
3590c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB    tVal7, [pSrcAbove, #10]
3600c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB     tVal8, tVal7, tVal6
3610c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     tVal8, tVal8, tVal8, LSL #1         ;// 3*[10]-[4]
3620c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     tVal11, tVal11, tVal8
3630c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB    tVal6, [pSrcAbove, #5]
3640c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB    tVal7, [pSrcAbove, #9]
3650c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB     tVal8, tVal7, tVal6
3660c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     tVal11, tVal11, tVal8, LSL #1       ;// + 2*[9]-[5]
3670c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB    tVal6, [pSrcAbove, #6]
3680c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB    tVal7, [pSrcAbove, #8]
3690c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB     tVal8, tVal7, tVal6                 ;// 1*[8]-[6]
3700c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     tVal7, tVal11, tVal8
3710c1bc742181ded4930842b46e9507372f0b1b963James Dong
3720c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal2,  tVal2,   #16                ;// tVal2  = a + 16
3730c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      tVal1,  pSrcLeft                    ;// tVal4  = pSrcLeft
3740c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB      tVal9,  tVal14,   leftStep          ;// tVal9  = 14*leftStep
3750c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal9,  pSrcLeft, tVal9             ;// tVal9  = pSrcLeft + 14*leftStep
3760c1bc742181ded4930842b46e9507372f0b1b963James Dong
3770c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal8,  [tVal9], -leftStep          ;// tVal8  = pSrcLeft[14*leftStep]
3780c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal11, [tVal1], +leftStep          ;// tVal11 = pSrcLeft[0]
3790c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal7,  tVal7,  tVal7,  LSL #2      ;// tVal7  = 5 * H
3800c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal7,  tVal7,  #32                 ;// tVal7  = 5 * H + 32
3810c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB      tVal8,  tVal8,  tVal11              ;// tVal8  = pSrcLeft[14*leftStep] - pSrcLeft[0]
3820c1bc742181ded4930842b46e9507372f0b1b963James Dong        ASR      tVal12, tVal7,  #6                  ;// tVal12 = b = (5 * H + 32) >> 6
3830c1bc742181ded4930842b46e9507372f0b1b963James Dong
3840c1bc742181ded4930842b46e9507372f0b1b963James Dong        RSB      tVal8,  tVal8,  tVal8,  LSL #3      ;// tVal8  = V1 = 7* (pSrcLeft[14*leftStep]-pSrcLeft[0])
3850c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal6,  tVal8,  tVal10, LSL #3      ;// tVal6  = V = V0 +V1
3860c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal8,  [tVal9], -leftStep          ;// tVal8  = pSrcLeft[13*leftStep]
3870c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal10, [tVal1], +leftStep          ;// tVal10 = pSrcLeft[leftStep]
3880c1bc742181ded4930842b46e9507372f0b1b963James Dong        RSB      tVal7,  tVal12,  tVal12,  LSL #3    ;// tVal7  = 7*b
3890c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB      tVal2,  tVal2,   tVal7              ;// tVal2  = a + 16 - 7*b
3900c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB      tVal7,  tVal8,   tVal10             ;// tVal7  = pSrcLeft[13*leftStep] - pSrcLeft[leftStep]
3910c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal8,  [tVal9], -leftStep          ;// tVal8  = pSrcLeft[12*lS]
3920c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal7,  tVal7,   tVal7              ;// tVal7  = 2 * (pSrcLeft[13*leftStep] - pSrcLeft[leftStep])
3930c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal10, [tVal1], +leftStep          ;// tVal10 = pSrcLeft[2*leftStep]
3940c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal7,  tVal7,   tVal7,  LSL #1     ;// tVal7  = 6 * (pSrcLeft[13*leftStep] - pSrcLeft[leftStep])
3950c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal6,  tVal6,   tVal7              ;// tVal6  = V = V + V2
3960c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB      tVal7,  tVal8,   tVal10             ;// tVal7  = pSrcLeft[12*leftStep] - pSrcLeft[2*leftStep]
3970c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal8,  [tVal9], -leftStep          ;// tVal8  = pSrcLeft[11*leftStep]
3980c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal10, [tVal1], +leftStep          ;// tVal10 = pSrcLeft[3*leftStep]
3990c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal7,  tVal7,   tVal7,  LSL #2     ;// tVal7  = 5 * (pSrcLeft[12*leftStep] - pSrcLeft[2*leftStep])
4000c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal6,  tVal6,   tVal7              ;// tVal6  = V = V + V3
4010c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB      tVal7,  tVal8,   tVal10             ;// tVal7  = pSrcLeft[11*leftStep] - pSrcLeft[3*leftStep]
4020c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal8,  [tVal9], -leftStep          ;// tVal8  = pSrcLeft[10*leftStep]
4030c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal10, [tVal1], +leftStep          ;// tVal10 = pSrcLeft[4*leftStep]
4040c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal6,  tVal6,   tVal7,  LSL #2     ;// tVal6  = V = V + V4
4050c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB      dstStep, dstStep, #16               ;// tVal5  = dstStep - 16
4060c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB      tVal7,  tVal8,   tVal10             ;// tVal7  = pSrcLeft[10*leftStep] - pSrcLeft[4*leftStep]
4070c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal8,  [tVal9], -leftStep          ;// tVal8  = pSrcLeft[9*leftStep]
4080c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal10, [tVal1], +leftStep          ;// tVal10 = pSrcLeft[5*leftStep]
4090c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal7,  tVal7,   tVal7,  LSL #1     ;// tVal7  = 3 * (pSrcLeft[10*leftStep] - pSrcLeft[4*leftStep])
4100c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal6,  tVal6,   tVal7              ;// tVal6  = V = V + V5
4110c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB      tVal7,  tVal8,   tVal10             ;// tVal7  = pSrcLeft[9*leftStep] - pSrcLeft[5*leftStep]
4120c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal8,  [tVal9], -leftStep          ;// tVal8  = pSrcLeft[8*leftStep]
4130c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal10, [tVal1], +leftStep          ;// tVal10 = pSrcLeft[6*leftStep]
4140c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal6,  tVal6,   tVal7,  LSL #1     ;// tVal6  = V = V + V6
4150c1bc742181ded4930842b46e9507372f0b1b963James Dong
4160c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=1
4170c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB      tVal7,  tVal8,   tVal10             ;// tVal7  = pSrcLeft[8*leftStep] - pSrcLeft[6*leftStep]
4180c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal6,  tVal6,   tVal7              ;// tVal6  = V = V + V7
4190c1bc742181ded4930842b46e9507372f0b1b963James Dong
4200c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=1
4210c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal6,  tVal6,   tVal6,  LSL #2     ;// tVal6  = 5*V
4220c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal6,  tVal6,   #32                ;// tVal6  = 5*V + 32
4230c1bc742181ded4930842b46e9507372f0b1b963James Dong
4240c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=1
4250c1bc742181ded4930842b46e9507372f0b1b963James Dong        ASR      tVal14, tVal6,   #6                 ;// tVal14 = c = (5*V + 32)>>6
4260c1bc742181ded4930842b46e9507372f0b1b963James Dong
4270c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=1
4280c1bc742181ded4930842b46e9507372f0b1b963James Dong        RSB      tVal6,  tVal14,  tVal14, LSL #3     ;// tVal6  = 7*c
4290c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTH     tVal14, tVal14                      ;// tVal14 = Cleared the upper half word
4300c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal10, tVal12,  tVal12             ;// tVal10 = 2*b
4310c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      tVal14, tVal14,  tVal14, LSL #16    ;// tVal14 = {c  ,  c}
4320c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB      tVal6,  tVal2,   tVal6              ;// tVal6  = d = a - 7*b - 7*c + 16
4330c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal1,  tVal6,   tVal10             ;// tVal1  = pp2 = d + 2*b
4340c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal10, tVal10,  tVal12             ;// tVal10 =3*b
4350c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      tVal0,  tVal6,   tVal1,  LSL #16    ;// tval0  = p2p0   = pack {p2, p0}
4360c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTH     tVal12, tVal12                      ;// tVal12 = Cleared the upper half word
4370c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTH     tVal10, tVal10                      ;// tVal12 = Cleared the upper half word
4380c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      tVal12, tVal12,  tVal12, LSL #16    ;// tVal12 = {b  ,  b}
4390c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      tVal10, tVal10,  tVal10, LSL #16    ;// tVal10 = {3b , 3b}
4400c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD16   tVal1,  tVal0,   tVal12             ;// tVal1  = p3p1   = p2p0   + {b,b}
4410c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD16   tVal2,  tVal1,   tVal10             ;// tVal2  = p6p4   = p3p1   + {3b,3b}
4420c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD16   tVal4,  tVal2,   tVal12             ;// tVal4  = p7p5   = p6p4   + {b,b}
4430c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD16   tVal6,  tVal4,   tVal10             ;// tVal6  = p10p8  = p7p5   + {3b,3b}
4440c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD16   tVal7,  tVal6,   tVal12             ;// tVal7  = p11p9  = p10p8  + {b,b}
4450c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD16   tVal8,  tVal7,   tVal10             ;// tVal8  = p14p12 = p11p9  + {3b,3b}
4460c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD16   tVal9,  tVal8,   tVal12             ;// tVal9  = p15p13 = p14p12 + {b,b}
4470c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR      r0x00FF00FF,     =MASK_CONST        ;// r0x00FF00FF = 0x00FF00FF
4480c1bc742181ded4930842b46e9507372f0b1b963James Dong
4490c1bc742181ded4930842b46e9507372f0b1b963James DongLOOP_PLANE
4500c1bc742181ded4930842b46e9507372f0b1b963James Dong
4510c1bc742181ded4930842b46e9507372f0b1b963James Dong        USAT16   temp2, #13, p3p1
4520c1bc742181ded4930842b46e9507372f0b1b963James Dong        USAT16   temp1, #13, p2p0
4530c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD16   p3p1,   p3p1,   c
4540c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD16   p2p0,   p2p0,   c
4550c1bc742181ded4930842b46e9507372f0b1b963James Dong        AND      temp2, r0x00FF00FF, temp2, ASR #5
4560c1bc742181ded4930842b46e9507372f0b1b963James Dong        AND      temp1, r0x00FF00FF, temp1, ASR #5
4570c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      temp1, temp1, temp2, LSL #8
4580c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      temp1, [pDst], #4
4590c1bc742181ded4930842b46e9507372f0b1b963James Dong
4600c1bc742181ded4930842b46e9507372f0b1b963James Dong        USAT16   temp2, #13, p7p5
4610c1bc742181ded4930842b46e9507372f0b1b963James Dong        USAT16   temp1, #13, p6p4
4620c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD16   p7p5,   p7p5,   c
4630c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD16   p6p4,   p6p4,   c
4640c1bc742181ded4930842b46e9507372f0b1b963James Dong        AND      temp2, r0x00FF00FF, temp2, ASR #5
4650c1bc742181ded4930842b46e9507372f0b1b963James Dong        AND      temp1, r0x00FF00FF, temp1, ASR #5
4660c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      temp1, temp1, temp2, LSL #8
4670c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      temp1, [pDst], #4
4680c1bc742181ded4930842b46e9507372f0b1b963James Dong
4690c1bc742181ded4930842b46e9507372f0b1b963James Dong        USAT16   temp2, #13, p11p9
4700c1bc742181ded4930842b46e9507372f0b1b963James Dong        USAT16   temp1, #13, p10p8
4710c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD16   p11p9,  p11p9,  c
4720c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD16   p10p8,  p10p8,  c
4730c1bc742181ded4930842b46e9507372f0b1b963James Dong        AND      temp2, r0x00FF00FF, temp2, ASR #5
4740c1bc742181ded4930842b46e9507372f0b1b963James Dong        AND      temp1, r0x00FF00FF, temp1, ASR #5
4750c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      temp1, temp1, temp2, LSL #8
4760c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      temp1, [pDst], #4
4770c1bc742181ded4930842b46e9507372f0b1b963James Dong
4780c1bc742181ded4930842b46e9507372f0b1b963James Dong        USAT16   temp2, #13, p15p13
4790c1bc742181ded4930842b46e9507372f0b1b963James Dong        USAT16   temp1, #13, p14p12
4800c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD16   p15p13, p15p13, c
4810c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD16   p14p12, p14p12, c
4820c1bc742181ded4930842b46e9507372f0b1b963James Dong        AND      temp2, r0x00FF00FF, temp2, ASR #5
4830c1bc742181ded4930842b46e9507372f0b1b963James Dong        AND      temp1, r0x00FF00FF, temp1, ASR #5
4840c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      temp1, temp1, temp2, LSL #8
4850c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      temp1, [pDst], #4
4860c1bc742181ded4930842b46e9507372f0b1b963James Dong
4870c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADDS     r0x00FF00FF, r0x00FF00FF, #1<<28     ;// Loop counter value in top 4 bits
4880c1bc742181ded4930842b46e9507372f0b1b963James Dong
4890c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      pDst, pDst, dstStep
4900c1bc742181ded4930842b46e9507372f0b1b963James Dong
4910c1bc742181ded4930842b46e9507372f0b1b963James Dong        BCC      LOOP_PLANE                           ;// Loop for 16 times
4920c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      return, #OMX_Sts_NoErr
4930c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_END
4940c1bc742181ded4930842b46e9507372f0b1b963James Dong
4950c1bc742181ded4930842b46e9507372f0b1b963James Dong        ENDIF ;// ARM1136JS
4960c1bc742181ded4930842b46e9507372f0b1b963James Dong
4970c1bc742181ded4930842b46e9507372f0b1b963James Dong
4980c1bc742181ded4930842b46e9507372f0b1b963James Dong        END
4990c1bc742181ded4930842b46e9507372f0b1b963James Dong;-----------------------------------------------------------------------------------------------
5000c1bc742181ded4930842b46e9507372f0b1b963James Dong; omxVCM4P10_PredictIntra_16x16 ends
5010c1bc742181ded4930842b46e9507372f0b1b963James Dong;-----------------------------------------------------------------------------------------------
502