10c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Copyright (C) 2007-2008 ARM Limited
378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Licensed under the Apache License, Version 2.0 (the "License");
578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// you may not use this file except in compliance with the License.
678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// You may obtain a copy of the License at
778e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
878e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//      http://www.apache.org/licenses/LICENSE-2.0
978e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
1078e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Unless required by applicable law or agreed to in writing, software
1178e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// distributed under the License is distributed on an "AS IS" BASIS,
1278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// See the License for the specific language governing permissions and
1478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// limitations under the License.
1578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
1678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
170c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
180c1bc742181ded4930842b46e9507372f0b1b963James Dong;// File Name:  omxVCM4P10_PredictIntra_16x16_s.s
190c1bc742181ded4930842b46e9507372f0b1b963James Dong;// OpenMAX DL: v1.0.2
200c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Revision:   9641
210c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Date:       Thursday, February 7, 2008
220c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
230c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
240c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
250c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
260c1bc742181ded4930842b46e9507372f0b1b963James Dong
270c1bc742181ded4930842b46e9507372f0b1b963James Dong        INCLUDE omxtypes_s.h
280c1bc742181ded4930842b46e9507372f0b1b963James Dong        INCLUDE armCOMM_s.h
290c1bc742181ded4930842b46e9507372f0b1b963James Dong
300c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_VARIANTS ARM1136JS
310c1bc742181ded4930842b46e9507372f0b1b963James Dong
320c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-------------------------------------------------------
330c1bc742181ded4930842b46e9507372f0b1b963James Dong;// This table for implementing switch case of C in asm by
340c1bc742181ded4930842b46e9507372f0b1b963James Dong;// the mehtod of two levels of indexing.
350c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-------------------------------------------------------
360c1bc742181ded4930842b46e9507372f0b1b963James Dong
370c1bc742181ded4930842b46e9507372f0b1b963James Dong    M_TABLE armVCM4P10_pIndexTable16x16
380c1bc742181ded4930842b46e9507372f0b1b963James Dong    DCD  OMX_VC_16X16_VERT, OMX_VC_16X16_HOR
390c1bc742181ded4930842b46e9507372f0b1b963James Dong    DCD  OMX_VC_16X16_DC,   OMX_VC_16X16_PLANE
400c1bc742181ded4930842b46e9507372f0b1b963James Dong
410c1bc742181ded4930842b46e9507372f0b1b963James Dong    IF ARM1136JS
420c1bc742181ded4930842b46e9507372f0b1b963James Dong
430c1bc742181ded4930842b46e9507372f0b1b963James Dong;//--------------------------------------------
440c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Constants
450c1bc742181ded4930842b46e9507372f0b1b963James Dong;//--------------------------------------------
460c1bc742181ded4930842b46e9507372f0b1b963James DongBLK_SIZE        EQU 0x10
470c1bc742181ded4930842b46e9507372f0b1b963James DongMUL_CONST0      EQU 0x01010101
480c1bc742181ded4930842b46e9507372f0b1b963James DongMUL_CONST1      EQU 0x00060004
490c1bc742181ded4930842b46e9507372f0b1b963James DongMUL_CONST2      EQU 0x00070005
500c1bc742181ded4930842b46e9507372f0b1b963James DongMUL_CONST3      EQU 0x00030001
510c1bc742181ded4930842b46e9507372f0b1b963James DongMASK_CONST      EQU 0x00FF00FF
520c1bc742181ded4930842b46e9507372f0b1b963James Dong
530c1bc742181ded4930842b46e9507372f0b1b963James Dong;//--------------------------------------------
540c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Scratch variable
550c1bc742181ded4930842b46e9507372f0b1b963James Dong;//--------------------------------------------
560c1bc742181ded4930842b46e9507372f0b1b963James Dongy               RN 12
570c1bc742181ded4930842b46e9507372f0b1b963James Dongpc              RN 15
580c1bc742181ded4930842b46e9507372f0b1b963James Dong
590c1bc742181ded4930842b46e9507372f0b1b963James Dongreturn          RN 0
600c1bc742181ded4930842b46e9507372f0b1b963James DonginnerCount      RN 0
610c1bc742181ded4930842b46e9507372f0b1b963James DongouterCount      RN 1
620c1bc742181ded4930842b46e9507372f0b1b963James DongpSrcLeft2       RN 1
630c1bc742181ded4930842b46e9507372f0b1b963James DongpDst2           RN 2
640c1bc742181ded4930842b46e9507372f0b1b963James Dongsum             RN 6
650c1bc742181ded4930842b46e9507372f0b1b963James DongpTable          RN 9
660c1bc742181ded4930842b46e9507372f0b1b963James Dongtemp1           RN 10
670c1bc742181ded4930842b46e9507372f0b1b963James Dongtemp2           RN 12
680c1bc742181ded4930842b46e9507372f0b1b963James DongcMul1           RN 11
690c1bc742181ded4930842b46e9507372f0b1b963James DongcMul2           RN 12
700c1bc742181ded4930842b46e9507372f0b1b963James Dongcount           RN 12
710c1bc742181ded4930842b46e9507372f0b1b963James DongdstStepx2       RN 11
720c1bc742181ded4930842b46e9507372f0b1b963James DongleftStepx2      RN 14
730c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x01010101     RN 10
740c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x00FF00FF     RN 11
750c1bc742181ded4930842b46e9507372f0b1b963James Dong
760c1bc742181ded4930842b46e9507372f0b1b963James DongtVal0           RN 0
770c1bc742181ded4930842b46e9507372f0b1b963James DongtVal1           RN 1
780c1bc742181ded4930842b46e9507372f0b1b963James DongtVal2           RN 2
790c1bc742181ded4930842b46e9507372f0b1b963James DongtVal3           RN 3
800c1bc742181ded4930842b46e9507372f0b1b963James DongtVal4           RN 4
810c1bc742181ded4930842b46e9507372f0b1b963James DongtVal5           RN 5
820c1bc742181ded4930842b46e9507372f0b1b963James DongtVal6           RN 6
830c1bc742181ded4930842b46e9507372f0b1b963James DongtVal7           RN 7
840c1bc742181ded4930842b46e9507372f0b1b963James DongtVal8           RN 8
850c1bc742181ded4930842b46e9507372f0b1b963James DongtVal9           RN 9
860c1bc742181ded4930842b46e9507372f0b1b963James DongtVal10          RN 10
870c1bc742181ded4930842b46e9507372f0b1b963James DongtVal11          RN 11
880c1bc742181ded4930842b46e9507372f0b1b963James DongtVal12          RN 12
890c1bc742181ded4930842b46e9507372f0b1b963James DongtVal14          RN 14
900c1bc742181ded4930842b46e9507372f0b1b963James Dong
910c1bc742181ded4930842b46e9507372f0b1b963James Dongb               RN 12
920c1bc742181ded4930842b46e9507372f0b1b963James Dongc               RN 14
930c1bc742181ded4930842b46e9507372f0b1b963James Dong
940c1bc742181ded4930842b46e9507372f0b1b963James Dongp2p0            RN 0
950c1bc742181ded4930842b46e9507372f0b1b963James Dongp3p1            RN 1
960c1bc742181ded4930842b46e9507372f0b1b963James Dongp6p4            RN 2
970c1bc742181ded4930842b46e9507372f0b1b963James Dongp7p5            RN 4
980c1bc742181ded4930842b46e9507372f0b1b963James Dongp10p8           RN 6
990c1bc742181ded4930842b46e9507372f0b1b963James Dongp11p9           RN 7
1000c1bc742181ded4930842b46e9507372f0b1b963James Dongp14p12          RN 8
1010c1bc742181ded4930842b46e9507372f0b1b963James Dongp15p13          RN 9
1020c1bc742181ded4930842b46e9507372f0b1b963James Dong
1030c1bc742181ded4930842b46e9507372f0b1b963James Dongp3210           RN 10
1040c1bc742181ded4930842b46e9507372f0b1b963James Dongp7654           RN 10
1050c1bc742181ded4930842b46e9507372f0b1b963James Dongp111098         RN 10
1060c1bc742181ded4930842b46e9507372f0b1b963James Dongp15141312       RN 10
1070c1bc742181ded4930842b46e9507372f0b1b963James Dong
1080c1bc742181ded4930842b46e9507372f0b1b963James Dong;//--------------------------------------------
1090c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare input registers
1100c1bc742181ded4930842b46e9507372f0b1b963James Dong;//--------------------------------------------
1110c1bc742181ded4930842b46e9507372f0b1b963James DongpSrcLeft        RN 0    ;// input pointer
1120c1bc742181ded4930842b46e9507372f0b1b963James DongpSrcAbove       RN 1    ;// input pointer
1130c1bc742181ded4930842b46e9507372f0b1b963James DongpSrcAboveLeft   RN 2    ;// input pointer
1140c1bc742181ded4930842b46e9507372f0b1b963James DongpDst            RN 3    ;// output pointer
1150c1bc742181ded4930842b46e9507372f0b1b963James DongleftStep        RN 4    ;// input variable
1160c1bc742181ded4930842b46e9507372f0b1b963James DongdstStep         RN 5    ;// input variable
1170c1bc742181ded4930842b46e9507372f0b1b963James DongpredMode        RN 6    ;// input variable
1180c1bc742181ded4930842b46e9507372f0b1b963James Dongavailability    RN 7    ;// input variable
1190c1bc742181ded4930842b46e9507372f0b1b963James Dong
1200c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-----------------------------------------------------------------------------------------------
1210c1bc742181ded4930842b46e9507372f0b1b963James Dong;// omxVCM4P10_PredictIntra_16x16 starts
1220c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-----------------------------------------------------------------------------------------------
1230c1bc742181ded4930842b46e9507372f0b1b963James Dong
1240c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Write function header
1250c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_START omxVCM4P10_PredictIntra_16x16, r11
1260c1bc742181ded4930842b46e9507372f0b1b963James Dong
1270c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Define stack arguments
1280c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ARG    LeftStep,     4
1290c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ARG    DstStep,      4
1300c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ARG    PredMode,     4
1310c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ARG    Availability, 4
1320c1bc742181ded4930842b46e9507372f0b1b963James Dong
1330c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=4
1340c1bc742181ded4930842b46e9507372f0b1b963James Dong
1350c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR      pTable,=armVCM4P10_pIndexTable16x16 ;// Load index table for switch case
1360c1bc742181ded4930842b46e9507372f0b1b963James Dong
1370c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Load argument from the stack
1380c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR    predMode, PredMode                  ;// Arg predMode loaded from stack to reg
1390c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR    leftStep, LeftStep                  ;// Arg leftStep loaded from stack to reg
1400c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR    dstStep,  DstStep                   ;// Arg dstStep loaded from stack to reg
1410c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR    availability, Availability          ;// Arg availability loaded from stack to reg
1420c1bc742181ded4930842b46e9507372f0b1b963James Dong
1430c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      y, #BLK_SIZE                        ;// Outer Loop Count
1440c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR      pc, [pTable, predMode, LSL #2]      ;// Branch to the case based on preMode
1450c1bc742181ded4930842b46e9507372f0b1b963James Dong
1460c1bc742181ded4930842b46e9507372f0b1b963James DongOMX_VC_16X16_VERT
1470c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDM      pSrcAbove, {tVal6,tVal7,tVal8,tVal9};// tVal 6 to 9 = pSrcAbove[0 to 15]
1480c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      dstStepx2, dstStep, dstStep         ;// double dstStep
1490c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      pDst2, pDst, dstStep                ;// pDst2- pDst advanced by dstStep
1500c1bc742181ded4930842b46e9507372f0b1b963James Dong
1510c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=2                       ;// Stall outside the loop
1520c1bc742181ded4930842b46e9507372f0b1b963James Dong
1530c1bc742181ded4930842b46e9507372f0b1b963James DongLOOP_VERT
1540c1bc742181ded4930842b46e9507372f0b1b963James Dong        STM      pDst, {tVal6,tVal7,tVal8,tVal9}     ;// pDst[0 to 15] = tVal 6 to 9
1550c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUBS     y, y, #2                            ;// y--
1560c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      pDst, pDst, dstStepx2               ;// pDst advanced by dstStep
1570c1bc742181ded4930842b46e9507372f0b1b963James Dong        STM      pDst2, {tVal6,tVal7,tVal8,tVal9}    ;// pDst2[16 to 31] = tVal 6 to 9
1580c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      pDst2, pDst2, dstStepx2             ;// pDst advanced by dstStep
1590c1bc742181ded4930842b46e9507372f0b1b963James Dong        BNE      LOOP_VERT                           ;// Loop for 8 times
1600c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      return, #OMX_Sts_NoErr
1610c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_EXIT
1620c1bc742181ded4930842b46e9507372f0b1b963James Dong
1630c1bc742181ded4930842b46e9507372f0b1b963James Dong
1640c1bc742181ded4930842b46e9507372f0b1b963James DongOMX_VC_16X16_HOR
1650c1bc742181ded4930842b46e9507372f0b1b963James Dong
1660c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=6
1670c1bc742181ded4930842b46e9507372f0b1b963James Dong
1680c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR      r0x01010101, =MUL_CONST0            ;// Const to repeat the byte in reg 4 times
1690c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      y, #4                               ;// Outer Loop Count
1700c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal6, [pSrcLeft], +leftStep        ;// tVal6 = pSrcLeft[0 to 3]
1710c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      pDst2, pDst, dstStep                ;// pDst2- pDst advanced by dstStep
1720c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal7, [pSrcLeft], +leftStep        ;// tVal1 = pSrcLeft[4 to 7]
1730c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      dstStepx2, dstStep, dstStep         ;// double dstStep
1740c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB      dstStepx2, dstStepx2, #12           ;// double dstStep  minus 12
1750c1bc742181ded4930842b46e9507372f0b1b963James Dong
1760c1bc742181ded4930842b46e9507372f0b1b963James DongLOOP_HOR
1770c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal8, [pSrcLeft], +leftStep        ;// tVal8 = pSrcLeft[0 to 3]
1780c1bc742181ded4930842b46e9507372f0b1b963James Dong        MUL      tVal6, tVal6, r0x01010101           ;// replicate the val in all the bytes
1790c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal9, [pSrcLeft], +leftStep        ;// tVal9 = pSrcLeft[4 to 7]
1800c1bc742181ded4930842b46e9507372f0b1b963James Dong        MUL      tVal7, tVal7, r0x01010101           ;// replicate the val in all the bytes
1810c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUBS     y, y, #1                            ;// y--
1820c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      tVal6, [pDst],  #+4                 ;// store {tVal6} at pDst[0 to 3]
1830c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      tVal7, [pDst2], #+4                 ;// store {tVal7} at pDst2[0 to 3]
1840c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      tVal6, [pDst],  #+4                 ;// store {tVal6} at pDst[4 to 7]
1850c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      tVal7, [pDst2], #+4                 ;// store {tVal7} at pDst2[4 to 7]
1860c1bc742181ded4930842b46e9507372f0b1b963James Dong        MUL      tVal8, tVal8, r0x01010101           ;// replicate the val in all the bytes
1870c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      tVal6, [pDst],  #+4                 ;// store {tVal6} at pDst[8 to 11]
1880c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      tVal7, [pDst2], #+4                 ;// store {tVal7} at pDst2[8 to 11]
1890c1bc742181ded4930842b46e9507372f0b1b963James Dong        MUL      tVal9, tVal9, r0x01010101           ;// replicate the val in all the bytes
1900c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    tVal6, [pDst], dstStepx2            ;// store {tVal6} at pDst[12 to 15]
1910c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    tVal7, [pDst2], dstStepx2           ;// store {tVal7} at pDst2[12 to 15]
1920c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      tVal8, [pDst],  #+4                 ;// store {tVal6} at pDst[0 to 3]
1930c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      tVal9, [pDst2], #+4                 ;// store {tVal7} at pDst2[0 to 3]
1940c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      tVal8, [pDst],  #+4                 ;// store {tVal6} at pDst[4 to 7]
1950c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      tVal9, [pDst2], #+4                 ;// store {tVal7} at pDst2[4 to 7]
1960c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      tVal8, [pDst],  #+4                 ;// store {tVal6} at pDst[8 to 11]
1970c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      tVal9, [pDst2], #+4                 ;// store {tVal7} at pDst2[8 to 11]
1980c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    tVal8, [pDst], dstStepx2            ;// store {tVal6} at pDst[12 to 15]
1990c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal6, [pSrcLeft], +leftStep        ;// tVal6 = pSrcLeft[0 to 3]
2000c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR    tVal9, [pDst2], dstStepx2           ;// store {tVal7} at pDst2[12 to 15]
2010c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal7, [pSrcLeft], +leftStep        ;// tVal7 = pSrcLeft[4 to 7]
2020c1bc742181ded4930842b46e9507372f0b1b963James Dong        BNE      LOOP_HOR                            ;// Loop for 3 times
2030c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      return, #OMX_Sts_NoErr
2040c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_EXIT
2050c1bc742181ded4930842b46e9507372f0b1b963James Dong
2060c1bc742181ded4930842b46e9507372f0b1b963James DongOMX_VC_16X16_DC
2070c1bc742181ded4930842b46e9507372f0b1b963James Dong
2080c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=2
2090c1bc742181ded4930842b46e9507372f0b1b963James Dong
2100c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      count, #0                           ;// count = 0
2110c1bc742181ded4930842b46e9507372f0b1b963James Dong        TST      availability, #OMX_VC_UPPER         ;// if(availability & #OMX_VC_UPPER)
2120c1bc742181ded4930842b46e9507372f0b1b963James Dong        BEQ      TST_LEFT                            ;// Jump to Left if not upper
2130c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDM      pSrcAbove,{tVal8,tVal9,tVal10,tVal11};// tVal 8 to 11 = pSrcAbove[0 to 15]
2140c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      count, count, #1                    ;// if upper inc count by 1
2150c1bc742181ded4930842b46e9507372f0b1b963James Dong
2160c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=2
2170c1bc742181ded4930842b46e9507372f0b1b963James Dong
2180c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTB16   tVal2, tVal8                        ;// pSrcAbove[0, 2]
2190c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTB16   tVal6, tVal9                        ;// pSrcAbove[4, 6]
2200c1bc742181ded4930842b46e9507372f0b1b963James Dong        UADD16   tVal2, tVal2, tVal6                 ;// pSrcAbove[0, 2] + pSrcAbove[4, 6]
2210c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTB16   tVal8, tVal8, ROR #8                ;// pSrcAbove[1, 3]
2220c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTB16   tVal9, tVal9, ROR #8                ;// pSrcAbove[5, 7]
2230c1bc742181ded4930842b46e9507372f0b1b963James Dong        UADD16   tVal8, tVal8, tVal9                 ;// pSrcAbove[1, 3] + pSrcAbove[5, 7]
2240c1bc742181ded4930842b46e9507372f0b1b963James Dong        UADD16   tVal2, tVal2, tVal8                 ;// sum(pSrcAbove[0] to pSrcAbove[7])
2250c1bc742181ded4930842b46e9507372f0b1b963James Dong
2260c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTB16   tVal8, tVal10                       ;// pSrcAbove[8, 10]
2270c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTB16   tVal9, tVal11                       ;// pSrcAbove[12, 14]
2280c1bc742181ded4930842b46e9507372f0b1b963James Dong        UADD16   tVal8, tVal8, tVal9                 ;// pSrcAbove[8, 10] + pSrcAbove[12, 14]
2290c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTB16   tVal10, tVal10, ROR #8              ;// pSrcAbove[9, 11]
2300c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTB16   tVal11, tVal11, ROR #8              ;// pSrcAbove[13, 15]
2310c1bc742181ded4930842b46e9507372f0b1b963James Dong        UADD16   tVal10, tVal10, tVal11              ;// pSrcAbove[9, 11] + pSrcAbove[13, 15]
2320c1bc742181ded4930842b46e9507372f0b1b963James Dong        UADD16   tVal8, tVal8, tVal10                ;// sum(pSrcAbove[8] to pSrcAbove[15])
2330c1bc742181ded4930842b46e9507372f0b1b963James Dong
2340c1bc742181ded4930842b46e9507372f0b1b963James Dong        UADD16   tVal2, tVal2, tVal8                 ;// sum(pSrcAbove[0] to pSrcAbove[15])
2350c1bc742181ded4930842b46e9507372f0b1b963James Dong
2360c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=1
2370c1bc742181ded4930842b46e9507372f0b1b963James Dong
2380c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal2, tVal2, tVal2, LSR #16        ;// sum(pSrcAbove[0] to pSrcAbove[15])
2390c1bc742181ded4930842b46e9507372f0b1b963James Dong
2400c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=1
2410c1bc742181ded4930842b46e9507372f0b1b963James Dong
2420c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTH     sum, tVal2                          ;// Extract the lower half for result
2430c1bc742181ded4930842b46e9507372f0b1b963James Dong
2440c1bc742181ded4930842b46e9507372f0b1b963James DongTST_LEFT
2450c1bc742181ded4930842b46e9507372f0b1b963James Dong        TST      availability, #OMX_VC_LEFT
2460c1bc742181ded4930842b46e9507372f0b1b963James Dong        BEQ      TST_COUNT
2470c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      leftStepx2, leftStep,leftStep       ;// leftStepx2 = 2 * leftStep
2480c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      pSrcLeft2, pSrcLeft, leftStep       ;// pSrcLeft2 = pSrcLeft + leftStep
2490c1bc742181ded4930842b46e9507372f0b1b963James Dong
2500c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal8, [pSrcLeft],  +leftStepx2     ;// tVal8 = pSrcLeft[0]
2510c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal9, [pSrcLeft2], +leftStepx2     ;// tVal9 = pSrcLeft[1]
2520c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal10, [pSrcLeft], +leftStepx2     ;// tVal10= pSrcLeft[2]
2530c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal11, [pSrcLeft2],+leftStepx2     ;// tVal11= pSrcLeft[3]
2540c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal7, tVal8, tVal9                 ;// tVal7 = tVal8 + tVal9
2550c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      count, count, #1                    ;// Inc Counter if Left is available
2560c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal6, tVal10, tVal11               ;// tVal6 = tVal10 + tVal11
2570c1bc742181ded4930842b46e9507372f0b1b963James Dong
2580c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal8, [pSrcLeft],  +leftStepx2     ;// tVal8 = pSrcLeft[0]
2590c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal9, [pSrcLeft2], +leftStepx2     ;// tVal9 = pSrcLeft[1]
2600c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal10, [pSrcLeft], +leftStepx2     ;// tVal10= pSrcLeft[2]
2610c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal11, [pSrcLeft2],+leftStepx2     ;// tVal11= pSrcLeft[3]
2620c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      sum, tVal7, tVal6                   ;// sum = tVal8 + tVal10
2630c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal8, tVal8, tVal9                 ;// tVal8 = tVal8 + tVal9
2640c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal10, tVal10, tVal11              ;// tVal10= tVal10 + tVal11
2650c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal7, tVal8, tVal10                ;// tVal7 = tVal8 + tVal10
2660c1bc742181ded4930842b46e9507372f0b1b963James Dong
2670c1bc742181ded4930842b46e9507372f0b1b963James Dong
2680c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal8, [pSrcLeft],  +leftStepx2     ;// tVal8 = pSrcLeft[0]
2690c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal9, [pSrcLeft2], +leftStepx2     ;// tVal9 = pSrcLeft[1]
2700c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal10, [pSrcLeft], +leftStepx2     ;// tVal10= pSrcLeft[2]
2710c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal11, [pSrcLeft2],+leftStepx2     ;// tVal11= pSrcLeft[3]
2720c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      sum, sum, tVal7                     ;// sum = sum + tVal7
2730c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal8, tVal8, tVal9                 ;// tVal8 = tVal8 + tVal9
2740c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal10, tVal10, tVal11              ;// tVal10= tVal10 + tVal11
2750c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal7, tVal8, tVal10                ;// tVal7 = tVal8 + tVal10
2760c1bc742181ded4930842b46e9507372f0b1b963James Dong
2770c1bc742181ded4930842b46e9507372f0b1b963James Dong
2780c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal8, [pSrcLeft],  +leftStepx2     ;// tVal8 = pSrcLeft[0]
2790c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal9, [pSrcLeft2], +leftStepx2     ;// tVal9 = pSrcLeft[1]
2800c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal10, [pSrcLeft], +leftStepx2     ;// tVal10= pSrcLeft[2]
2810c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal11, [pSrcLeft2],+leftStepx2     ;// tVal11= pSrcLeft[3]
2820c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      sum, sum, tVal7                     ;// sum = sum + tVal7
2830c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal8, tVal8, tVal9                 ;// tVal8 = tVal8 + tVal9
2840c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal10, tVal10, tVal11              ;// tVal10= tVal10 + tVal11
2850c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal7, tVal8, tVal10                ;// tVal7 = tVal8 + tVal10
2860c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      sum, sum, tVal7                     ;// sum = sum + tVal7
2870c1bc742181ded4930842b46e9507372f0b1b963James Dong
2880c1bc742181ded4930842b46e9507372f0b1b963James DongTST_COUNT
2890c1bc742181ded4930842b46e9507372f0b1b963James Dong        CMP      count, #0                           ;// if(count == 0)
2900c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOVEQ    sum, #128                           ;// sum = 128 if(count == 0)
2910c1bc742181ded4930842b46e9507372f0b1b963James Dong        BEQ      TST_COUNT0                          ;// if(count == 0)
2920c1bc742181ded4930842b46e9507372f0b1b963James Dong        CMP      count, #1                           ;// if(count == 1)
2930c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADDEQ    sum, sum, #8                        ;// sum += 8 if(count == 1)
2940c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADDNE    sum, sum, tVal2                     ;// sum = sumleft + sumupper
2950c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADDNE    sum, sum, #16                       ;// sum += 16 if(count == 2)
2960c1bc742181ded4930842b46e9507372f0b1b963James Dong
2970c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=1
2980c1bc742181ded4930842b46e9507372f0b1b963James Dong
2990c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTH     sum, sum                            ;// sum only byte rest cleared
3000c1bc742181ded4930842b46e9507372f0b1b963James Dong
3010c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=1
3020c1bc742181ded4930842b46e9507372f0b1b963James Dong
3030c1bc742181ded4930842b46e9507372f0b1b963James Dong        LSREQ    sum, sum, #4                        ;// sum >> 4 if(count == 1)
3040c1bc742181ded4930842b46e9507372f0b1b963James Dong
3050c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=1
3060c1bc742181ded4930842b46e9507372f0b1b963James Dong
3070c1bc742181ded4930842b46e9507372f0b1b963James Dong        LSRNE    sum, sum, #5                        ;// sum >> 5 if(count == 2)
3080c1bc742181ded4930842b46e9507372f0b1b963James Dong
3090c1bc742181ded4930842b46e9507372f0b1b963James DongTST_COUNT0
3100c1bc742181ded4930842b46e9507372f0b1b963James Dong
3110c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=1
3120c1bc742181ded4930842b46e9507372f0b1b963James Dong
3130c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      sum, sum, sum, LSL #8               ;// sum replicated in two halfword
3140c1bc742181ded4930842b46e9507372f0b1b963James Dong
3150c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=1
3160c1bc742181ded4930842b46e9507372f0b1b963James Dong
3170c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      tVal6, sum, sum, LSL #16            ;// sum  replicated in all bytes
3180c1bc742181ded4930842b46e9507372f0b1b963James Dong        CPY      tVal7, tVal6                        ;// tVal1 = tVal0
3190c1bc742181ded4930842b46e9507372f0b1b963James Dong        CPY      tVal8, tVal6                        ;// tVal2 = tVal0
3200c1bc742181ded4930842b46e9507372f0b1b963James Dong        CPY      tVal9, tVal6                        ;// tVal3 = tVal0
3210c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      dstStepx2, dstStep, dstStep         ;// double dstStep
3220c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      pDst2, pDst, dstStep                ;// pDst2- pDst advanced by dstStep
3230c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      y, #BLK_SIZE                        ;// Outer Loop Count
3240c1bc742181ded4930842b46e9507372f0b1b963James Dong
3250c1bc742181ded4930842b46e9507372f0b1b963James DongLOOP_DC
3260c1bc742181ded4930842b46e9507372f0b1b963James Dong        STM      pDst, {tVal6,tVal7,tVal8,tVal9}     ;// pDst[0 to 15] = tVal 6 to 9
3270c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUBS     y, y, #2                            ;// y--
3280c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      pDst, pDst, dstStepx2               ;// pDst advanced by dstStep
3290c1bc742181ded4930842b46e9507372f0b1b963James Dong        STM      pDst2, {tVal6,tVal7,tVal8,tVal9}    ;// pDst2[16 to 31] = tVal 6 to 9
3300c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      pDst2, pDst2, dstStepx2             ;// pDst advanced by dstStep
3310c1bc742181ded4930842b46e9507372f0b1b963James Dong        BNE      LOOP_DC                             ;// Loop for 8 times
3320c1bc742181ded4930842b46e9507372f0b1b963James Dong
3330c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      return, #OMX_Sts_NoErr
3340c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_EXIT
3350c1bc742181ded4930842b46e9507372f0b1b963James Dong
3360c1bc742181ded4930842b46e9507372f0b1b963James DongOMX_VC_16X16_PLANE
3370c1bc742181ded4930842b46e9507372f0b1b963James Dong
3380c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=3
3390c1bc742181ded4930842b46e9507372f0b1b963James Dong        RSB      tVal14, leftStep, leftStep, LSL #4  ;// tVal14 = 15*leftStep
3400c1bc742181ded4930842b46e9507372f0b1b963James Dong
3410c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=2
3420c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB     tVal10, [pSrcLeft,  tVal14]         ;// tVal10 = pSrcLeft[15*leftStep]
3430c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB     tVal11, [pSrcAboveLeft]             ;// tVal11 = pSrcAboveLeft[0]
3440c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB     tVal12, [pSrcAbove, #15]
3450c1bc742181ded4930842b46e9507372f0b1b963James Dong
3460c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal2,  tVal12,  tVal10             ;// tVal2  = pSrcAbove[15] + pSrcLeft[15*leftStep]
3470c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB      tVal10, tVal10,  tVal11             ;// tVal10 = V0 = pSrcLeft[15*leftStep] - pSrcAboveLeft[0]
3480c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB      tVal11, tVal12,  tVal11             ;// tVal11 = H0 = pSrcAbove[15] - pSrcAboveLeft[0]
3490c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      tVal2,  tVal2,   LSL #4             ;// tVal2  = a = 16 * (pSrcAbove[15] + pSrcLeft[15*leftStep])
3500c1bc742181ded4930842b46e9507372f0b1b963James Dong
3510c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     tVal11, tVal11, LSL #3              ;// 8*[15]-[-1]
3520c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB    tVal6, [pSrcAbove, #0]
3530c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB    tVal7, [pSrcAbove, #14]
3540c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB     tVal8, tVal7, tVal6
3550c1bc742181ded4930842b46e9507372f0b1b963James Dong        RSB     tVal8, tVal8, tVal8, LSL #3         ;// 7*[14]-[0]
3560c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     tVal11, tVal11, tVal8
3570c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB    tVal6, [pSrcAbove, #1]
3580c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB    tVal7, [pSrcAbove, #13]
3590c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB     tVal8, tVal7, tVal6
3600c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     tVal8, tVal8, tVal8
3610c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     tVal8, tVal8, tVal8, LSL #1         ;// 6*[13]-[1]
3620c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     tVal11, tVal11, tVal8
3630c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB    tVal6, [pSrcAbove, #2]
3640c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB    tVal7, [pSrcAbove, #12]
3650c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB     tVal8, tVal7, tVal6
3660c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     tVal8, tVal8, tVal8, LSL #2         ;// 5*[12]-[2]
3670c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     tVal11, tVal11, tVal8
3680c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB    tVal6, [pSrcAbove, #3]
3690c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB    tVal7, [pSrcAbove, #11]
3700c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB     tVal8, tVal7, tVal6
3710c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     tVal11, tVal11, tVal8, LSL #2       ;// + 4*[11]-[3]
3720c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB    tVal6, [pSrcAbove, #4]
3730c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB    tVal7, [pSrcAbove, #10]
3740c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB     tVal8, tVal7, tVal6
3750c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     tVal8, tVal8, tVal8, LSL #1         ;// 3*[10]-[4]
3760c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     tVal11, tVal11, tVal8
3770c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB    tVal6, [pSrcAbove, #5]
3780c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB    tVal7, [pSrcAbove, #9]
3790c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB     tVal8, tVal7, tVal6
3800c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     tVal11, tVal11, tVal8, LSL #1       ;// + 2*[9]-[5]
3810c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB    tVal6, [pSrcAbove, #6]
3820c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB    tVal7, [pSrcAbove, #8]
3830c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB     tVal8, tVal7, tVal6                 ;// 1*[8]-[6]
3840c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     tVal7, tVal11, tVal8
3850c1bc742181ded4930842b46e9507372f0b1b963James Dong
3860c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal2,  tVal2,   #16                ;// tVal2  = a + 16
3870c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      tVal1,  pSrcLeft                    ;// tVal4  = pSrcLeft
3880c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB      tVal9,  tVal14,   leftStep          ;// tVal9  = 14*leftStep
3890c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal9,  pSrcLeft, tVal9             ;// tVal9  = pSrcLeft + 14*leftStep
3900c1bc742181ded4930842b46e9507372f0b1b963James Dong
3910c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal8,  [tVal9], -leftStep          ;// tVal8  = pSrcLeft[14*leftStep]
3920c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal11, [tVal1], +leftStep          ;// tVal11 = pSrcLeft[0]
3930c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal7,  tVal7,  tVal7,  LSL #2      ;// tVal7  = 5 * H
3940c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal7,  tVal7,  #32                 ;// tVal7  = 5 * H + 32
3950c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB      tVal8,  tVal8,  tVal11              ;// tVal8  = pSrcLeft[14*leftStep] - pSrcLeft[0]
3960c1bc742181ded4930842b46e9507372f0b1b963James Dong        ASR      tVal12, tVal7,  #6                  ;// tVal12 = b = (5 * H + 32) >> 6
3970c1bc742181ded4930842b46e9507372f0b1b963James Dong
3980c1bc742181ded4930842b46e9507372f0b1b963James Dong        RSB      tVal8,  tVal8,  tVal8,  LSL #3      ;// tVal8  = V1 = 7* (pSrcLeft[14*leftStep]-pSrcLeft[0])
3990c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal6,  tVal8,  tVal10, LSL #3      ;// tVal6  = V = V0 +V1
4000c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal8,  [tVal9], -leftStep          ;// tVal8  = pSrcLeft[13*leftStep]
4010c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal10, [tVal1], +leftStep          ;// tVal10 = pSrcLeft[leftStep]
4020c1bc742181ded4930842b46e9507372f0b1b963James Dong        RSB      tVal7,  tVal12,  tVal12,  LSL #3    ;// tVal7  = 7*b
4030c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB      tVal2,  tVal2,   tVal7              ;// tVal2  = a + 16 - 7*b
4040c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB      tVal7,  tVal8,   tVal10             ;// tVal7  = pSrcLeft[13*leftStep] - pSrcLeft[leftStep]
4050c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal8,  [tVal9], -leftStep          ;// tVal8  = pSrcLeft[12*lS]
4060c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal7,  tVal7,   tVal7              ;// tVal7  = 2 * (pSrcLeft[13*leftStep] - pSrcLeft[leftStep])
4070c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal10, [tVal1], +leftStep          ;// tVal10 = pSrcLeft[2*leftStep]
4080c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal7,  tVal7,   tVal7,  LSL #1     ;// tVal7  = 6 * (pSrcLeft[13*leftStep] - pSrcLeft[leftStep])
4090c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal6,  tVal6,   tVal7              ;// tVal6  = V = V + V2
4100c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB      tVal7,  tVal8,   tVal10             ;// tVal7  = pSrcLeft[12*leftStep] - pSrcLeft[2*leftStep]
4110c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal8,  [tVal9], -leftStep          ;// tVal8  = pSrcLeft[11*leftStep]
4120c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal10, [tVal1], +leftStep          ;// tVal10 = pSrcLeft[3*leftStep]
4130c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal7,  tVal7,   tVal7,  LSL #2     ;// tVal7  = 5 * (pSrcLeft[12*leftStep] - pSrcLeft[2*leftStep])
4140c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal6,  tVal6,   tVal7              ;// tVal6  = V = V + V3
4150c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB      tVal7,  tVal8,   tVal10             ;// tVal7  = pSrcLeft[11*leftStep] - pSrcLeft[3*leftStep]
4160c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal8,  [tVal9], -leftStep          ;// tVal8  = pSrcLeft[10*leftStep]
4170c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal10, [tVal1], +leftStep          ;// tVal10 = pSrcLeft[4*leftStep]
4180c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal6,  tVal6,   tVal7,  LSL #2     ;// tVal6  = V = V + V4
4190c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB      dstStep, dstStep, #16               ;// tVal5  = dstStep - 16
4200c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB      tVal7,  tVal8,   tVal10             ;// tVal7  = pSrcLeft[10*leftStep] - pSrcLeft[4*leftStep]
4210c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal8,  [tVal9], -leftStep          ;// tVal8  = pSrcLeft[9*leftStep]
4220c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal10, [tVal1], +leftStep          ;// tVal10 = pSrcLeft[5*leftStep]
4230c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal7,  tVal7,   tVal7,  LSL #1     ;// tVal7  = 3 * (pSrcLeft[10*leftStep] - pSrcLeft[4*leftStep])
4240c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal6,  tVal6,   tVal7              ;// tVal6  = V = V + V5
4250c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB      tVal7,  tVal8,   tVal10             ;// tVal7  = pSrcLeft[9*leftStep] - pSrcLeft[5*leftStep]
4260c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal8,  [tVal9], -leftStep          ;// tVal8  = pSrcLeft[8*leftStep]
4270c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRB   tVal10, [tVal1], +leftStep          ;// tVal10 = pSrcLeft[6*leftStep]
4280c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal6,  tVal6,   tVal7,  LSL #1     ;// tVal6  = V = V + V6
4290c1bc742181ded4930842b46e9507372f0b1b963James Dong
4300c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=1
4310c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB      tVal7,  tVal8,   tVal10             ;// tVal7  = pSrcLeft[8*leftStep] - pSrcLeft[6*leftStep]
4320c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal6,  tVal6,   tVal7              ;// tVal6  = V = V + V7
4330c1bc742181ded4930842b46e9507372f0b1b963James Dong
4340c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=1
4350c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal6,  tVal6,   tVal6,  LSL #2     ;// tVal6  = 5*V
4360c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal6,  tVal6,   #32                ;// tVal6  = 5*V + 32
4370c1bc742181ded4930842b46e9507372f0b1b963James Dong
4380c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=1
4390c1bc742181ded4930842b46e9507372f0b1b963James Dong        ASR      tVal14, tVal6,   #6                 ;// tVal14 = c = (5*V + 32)>>6
4400c1bc742181ded4930842b46e9507372f0b1b963James Dong
4410c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=1
4420c1bc742181ded4930842b46e9507372f0b1b963James Dong        RSB      tVal6,  tVal14,  tVal14, LSL #3     ;// tVal6  = 7*c
4430c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTH     tVal14, tVal14                      ;// tVal14 = Cleared the upper half word
4440c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal10, tVal12,  tVal12             ;// tVal10 = 2*b
4450c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      tVal14, tVal14,  tVal14, LSL #16    ;// tVal14 = {c  ,  c}
4460c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB      tVal6,  tVal2,   tVal6              ;// tVal6  = d = a - 7*b - 7*c + 16
4470c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal1,  tVal6,   tVal10             ;// tVal1  = pp2 = d + 2*b
4480c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      tVal10, tVal10,  tVal12             ;// tVal10 =3*b
4490c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      tVal0,  tVal6,   tVal1,  LSL #16    ;// tval0  = p2p0   = pack {p2, p0}
4500c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTH     tVal12, tVal12                      ;// tVal12 = Cleared the upper half word
4510c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTH     tVal10, tVal10                      ;// tVal12 = Cleared the upper half word
4520c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      tVal12, tVal12,  tVal12, LSL #16    ;// tVal12 = {b  ,  b}
4530c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      tVal10, tVal10,  tVal10, LSL #16    ;// tVal10 = {3b , 3b}
4540c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD16   tVal1,  tVal0,   tVal12             ;// tVal1  = p3p1   = p2p0   + {b,b}
4550c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD16   tVal2,  tVal1,   tVal10             ;// tVal2  = p6p4   = p3p1   + {3b,3b}
4560c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD16   tVal4,  tVal2,   tVal12             ;// tVal4  = p7p5   = p6p4   + {b,b}
4570c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD16   tVal6,  tVal4,   tVal10             ;// tVal6  = p10p8  = p7p5   + {3b,3b}
4580c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD16   tVal7,  tVal6,   tVal12             ;// tVal7  = p11p9  = p10p8  + {b,b}
4590c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD16   tVal8,  tVal7,   tVal10             ;// tVal8  = p14p12 = p11p9  + {3b,3b}
4600c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD16   tVal9,  tVal8,   tVal12             ;// tVal9  = p15p13 = p14p12 + {b,b}
4610c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR      r0x00FF00FF,     =MASK_CONST        ;// r0x00FF00FF = 0x00FF00FF
4620c1bc742181ded4930842b46e9507372f0b1b963James Dong
4630c1bc742181ded4930842b46e9507372f0b1b963James DongLOOP_PLANE
4640c1bc742181ded4930842b46e9507372f0b1b963James Dong
4650c1bc742181ded4930842b46e9507372f0b1b963James Dong        USAT16   temp2, #13, p3p1
4660c1bc742181ded4930842b46e9507372f0b1b963James Dong        USAT16   temp1, #13, p2p0
4670c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD16   p3p1,   p3p1,   c
4680c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD16   p2p0,   p2p0,   c
4690c1bc742181ded4930842b46e9507372f0b1b963James Dong        AND      temp2, r0x00FF00FF, temp2, ASR #5
4700c1bc742181ded4930842b46e9507372f0b1b963James Dong        AND      temp1, r0x00FF00FF, temp1, ASR #5
4710c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      temp1, temp1, temp2, LSL #8
4720c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      temp1, [pDst], #4
4730c1bc742181ded4930842b46e9507372f0b1b963James Dong
4740c1bc742181ded4930842b46e9507372f0b1b963James Dong        USAT16   temp2, #13, p7p5
4750c1bc742181ded4930842b46e9507372f0b1b963James Dong        USAT16   temp1, #13, p6p4
4760c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD16   p7p5,   p7p5,   c
4770c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD16   p6p4,   p6p4,   c
4780c1bc742181ded4930842b46e9507372f0b1b963James Dong        AND      temp2, r0x00FF00FF, temp2, ASR #5
4790c1bc742181ded4930842b46e9507372f0b1b963James Dong        AND      temp1, r0x00FF00FF, temp1, ASR #5
4800c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      temp1, temp1, temp2, LSL #8
4810c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      temp1, [pDst], #4
4820c1bc742181ded4930842b46e9507372f0b1b963James Dong
4830c1bc742181ded4930842b46e9507372f0b1b963James Dong        USAT16   temp2, #13, p11p9
4840c1bc742181ded4930842b46e9507372f0b1b963James Dong        USAT16   temp1, #13, p10p8
4850c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD16   p11p9,  p11p9,  c
4860c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD16   p10p8,  p10p8,  c
4870c1bc742181ded4930842b46e9507372f0b1b963James Dong        AND      temp2, r0x00FF00FF, temp2, ASR #5
4880c1bc742181ded4930842b46e9507372f0b1b963James Dong        AND      temp1, r0x00FF00FF, temp1, ASR #5
4890c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      temp1, temp1, temp2, LSL #8
4900c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      temp1, [pDst], #4
4910c1bc742181ded4930842b46e9507372f0b1b963James Dong
4920c1bc742181ded4930842b46e9507372f0b1b963James Dong        USAT16   temp2, #13, p15p13
4930c1bc742181ded4930842b46e9507372f0b1b963James Dong        USAT16   temp1, #13, p14p12
4940c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD16   p15p13, p15p13, c
4950c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD16   p14p12, p14p12, c
4960c1bc742181ded4930842b46e9507372f0b1b963James Dong        AND      temp2, r0x00FF00FF, temp2, ASR #5
4970c1bc742181ded4930842b46e9507372f0b1b963James Dong        AND      temp1, r0x00FF00FF, temp1, ASR #5
4980c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR      temp1, temp1, temp2, LSL #8
4990c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      temp1, [pDst], #4
5000c1bc742181ded4930842b46e9507372f0b1b963James Dong
5010c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADDS     r0x00FF00FF, r0x00FF00FF, #1<<28     ;// Loop counter value in top 4 bits
5020c1bc742181ded4930842b46e9507372f0b1b963James Dong
5030c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      pDst, pDst, dstStep
5040c1bc742181ded4930842b46e9507372f0b1b963James Dong
5050c1bc742181ded4930842b46e9507372f0b1b963James Dong        BCC      LOOP_PLANE                           ;// Loop for 16 times
5060c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      return, #OMX_Sts_NoErr
5070c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_END
5080c1bc742181ded4930842b46e9507372f0b1b963James Dong
5090c1bc742181ded4930842b46e9507372f0b1b963James Dong        ENDIF ;// ARM1136JS
5100c1bc742181ded4930842b46e9507372f0b1b963James Dong
5110c1bc742181ded4930842b46e9507372f0b1b963James Dong
5120c1bc742181ded4930842b46e9507372f0b1b963James Dong        END
5130c1bc742181ded4930842b46e9507372f0b1b963James Dong;-----------------------------------------------------------------------------------------------
5140c1bc742181ded4930842b46e9507372f0b1b963James Dong; omxVCM4P10_PredictIntra_16x16 ends
5150c1bc742181ded4930842b46e9507372f0b1b963James Dong;-----------------------------------------------------------------------------------------------
516