10c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Copyright (C) 2007-2008 ARM Limited
378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Licensed under the Apache License, Version 2.0 (the "License");
578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// you may not use this file except in compliance with the License.
678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// You may obtain a copy of the License at
778e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
878e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//      http://www.apache.org/licenses/LICENSE-2.0
978e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
1078e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Unless required by applicable law or agreed to in writing, software
1178e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// distributed under the License is distributed on an "AS IS" BASIS,
1278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// See the License for the specific language governing permissions and
1478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// limitations under the License.
1578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
1678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
170c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
180c1bc742181ded4930842b46e9507372f0b1b963James Dong;// File Name:  omxVCM4P10_PredictIntra_16x16_s.s
190c1bc742181ded4930842b46e9507372f0b1b963James Dong;// OpenMAX DL: v1.0.2
200c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Revision:   12290
210c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Date:       Wednesday, April 9, 2008
220c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
230c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
240c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
250c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
260c1bc742181ded4930842b46e9507372f0b1b963James Dong
270c1bc742181ded4930842b46e9507372f0b1b963James Dong        INCLUDE omxtypes_s.h
280c1bc742181ded4930842b46e9507372f0b1b963James Dong        INCLUDE armCOMM_s.h
290c1bc742181ded4930842b46e9507372f0b1b963James Dong
300c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_VARIANTS CortexA8
310c1bc742181ded4930842b46e9507372f0b1b963James Dong
320c1bc742181ded4930842b46e9507372f0b1b963James Dong
330c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-------------------------------------------------------
340c1bc742181ded4930842b46e9507372f0b1b963James Dong;// This table for implementing switch case of C in asm by
350c1bc742181ded4930842b46e9507372f0b1b963James Dong;// the mehtod of two levels of indexing.
360c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-------------------------------------------------------
370c1bc742181ded4930842b46e9507372f0b1b963James Dong
380c1bc742181ded4930842b46e9507372f0b1b963James Dong    M_TABLE armVCM4P10_pIndexTable16x16
390c1bc742181ded4930842b46e9507372f0b1b963James Dong    DCD  OMX_VC_16X16_VERT, OMX_VC_16X16_HOR
400c1bc742181ded4930842b46e9507372f0b1b963James Dong    DCD  OMX_VC_16X16_DC,   OMX_VC_16X16_PLANE
410c1bc742181ded4930842b46e9507372f0b1b963James Dong
420c1bc742181ded4930842b46e9507372f0b1b963James Dong
430c1bc742181ded4930842b46e9507372f0b1b963James Dong    IF CortexA8
440c1bc742181ded4930842b46e9507372f0b1b963James Dong
450c1bc742181ded4930842b46e9507372f0b1b963James Dong    M_TABLE armVCM4P10_MultiplierTable16x16,1
460c1bc742181ded4930842b46e9507372f0b1b963James Dong    DCW   7,  6,  5,  4,  3,  2,  1,  8
470c1bc742181ded4930842b46e9507372f0b1b963James Dong    DCW   0,  1,  2,  3,  4,  5,  6,  7
480c1bc742181ded4930842b46e9507372f0b1b963James Dong    DCW   8,  9, 10, 11, 12, 13, 14, 15
490c1bc742181ded4930842b46e9507372f0b1b963James Dong
500c1bc742181ded4930842b46e9507372f0b1b963James Dong;//--------------------------------------------
510c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Constants
520c1bc742181ded4930842b46e9507372f0b1b963James Dong;//--------------------------------------------
530c1bc742181ded4930842b46e9507372f0b1b963James DongBLK_SIZE        EQU 0x10
540c1bc742181ded4930842b46e9507372f0b1b963James DongMUL_CONST0      EQU 0x01010101
550c1bc742181ded4930842b46e9507372f0b1b963James DongMUL_CONST1      EQU 0x00060004
560c1bc742181ded4930842b46e9507372f0b1b963James DongMUL_CONST2      EQU 0x00070005
570c1bc742181ded4930842b46e9507372f0b1b963James DongMUL_CONST3      EQU 0x00030001
580c1bc742181ded4930842b46e9507372f0b1b963James DongMASK_CONST      EQU 0x00FF00FF
590c1bc742181ded4930842b46e9507372f0b1b963James Dong
600c1bc742181ded4930842b46e9507372f0b1b963James Dong;//--------------------------------------------
610c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Scratch variable
620c1bc742181ded4930842b46e9507372f0b1b963James Dong;//--------------------------------------------
630c1bc742181ded4930842b46e9507372f0b1b963James Dongy               RN 12
640c1bc742181ded4930842b46e9507372f0b1b963James Dongpc              RN 15
650c1bc742181ded4930842b46e9507372f0b1b963James Dong
660c1bc742181ded4930842b46e9507372f0b1b963James Dongreturn          RN 0
670c1bc742181ded4930842b46e9507372f0b1b963James DongpTable          RN 9
680c1bc742181ded4930842b46e9507372f0b1b963James Dongcount           RN 11
690c1bc742181ded4930842b46e9507372f0b1b963James DongpMultTable      RN 9
700c1bc742181ded4930842b46e9507372f0b1b963James Dong; ----------------------------------------------
710c1bc742181ded4930842b46e9507372f0b1b963James Dong; Neon registers
720c1bc742181ded4930842b46e9507372f0b1b963James Dong; ----------------------------------------------
730c1bc742181ded4930842b46e9507372f0b1b963James DongqAbove          QN Q0.U8
740c1bc742181ded4930842b46e9507372f0b1b963James DongqLeft           QN Q1.U8
750c1bc742181ded4930842b46e9507372f0b1b963James DongqSum8           QN Q0.U16
760c1bc742181ded4930842b46e9507372f0b1b963James DongdSum80          DN D0.U16
770c1bc742181ded4930842b46e9507372f0b1b963James DongdSum81          DN D1.U16
780c1bc742181ded4930842b46e9507372f0b1b963James DongdSum4           DN D0.U16
790c1bc742181ded4930842b46e9507372f0b1b963James DongdSum2           DN D0.U32
800c1bc742181ded4930842b46e9507372f0b1b963James DongdSum1           DN D0.U64
810c1bc742181ded4930842b46e9507372f0b1b963James DongqOut            QN Q3.U8
820c1bc742181ded4930842b46e9507372f0b1b963James DongdSumLeft        DN D6.U64
830c1bc742181ded4930842b46e9507372f0b1b963James DongdSumAbove       DN D7.U64
840c1bc742181ded4930842b46e9507372f0b1b963James DongdSum            DN D8.U64
850c1bc742181ded4930842b46e9507372f0b1b963James DongdSum0           DN D8.U8[0]
860c1bc742181ded4930842b46e9507372f0b1b963James Dong
870c1bc742181ded4930842b46e9507372f0b1b963James DongqH              QN Q11.S32
880c1bc742181ded4930842b46e9507372f0b1b963James DongqV              QN Q12.S32
890c1bc742181ded4930842b46e9507372f0b1b963James DongqA              QN Q11.S16
900c1bc742181ded4930842b46e9507372f0b1b963James DongqB              QN Q6.S16
910c1bc742181ded4930842b46e9507372f0b1b963James DongqC              QN Q7.S16
920c1bc742181ded4930842b46e9507372f0b1b963James Dong
930c1bc742181ded4930842b46e9507372f0b1b963James DongqB0             QN Q5.S16
940c1bc742181ded4930842b46e9507372f0b1b963James DongqB1             QN Q6.S16
950c1bc742181ded4930842b46e9507372f0b1b963James DongdA1             DN D23.S16
960c1bc742181ded4930842b46e9507372f0b1b963James Dong
970c1bc742181ded4930842b46e9507372f0b1b963James DongdH0             DN D22.S32
980c1bc742181ded4930842b46e9507372f0b1b963James DongdH1             DN D23.S32
990c1bc742181ded4930842b46e9507372f0b1b963James DongdV0             DN D24.S32
1000c1bc742181ded4930842b46e9507372f0b1b963James DongdV1             DN D25.S32
1010c1bc742181ded4930842b46e9507372f0b1b963James Dong
1020c1bc742181ded4930842b46e9507372f0b1b963James DongqHV             QN Q11.S64
1030c1bc742181ded4930842b46e9507372f0b1b963James DongqHV0            QN Q11.S32
1040c1bc742181ded4930842b46e9507372f0b1b963James DongqHV1            QN Q12.S64
1050c1bc742181ded4930842b46e9507372f0b1b963James Dong
1060c1bc742181ded4930842b46e9507372f0b1b963James DongdHV00           DN D22.S32
1070c1bc742181ded4930842b46e9507372f0b1b963James DongdHV01           DN D23.S32
1080c1bc742181ded4930842b46e9507372f0b1b963James Dong
1090c1bc742181ded4930842b46e9507372f0b1b963James DongdHV0            DN D22.S16[0]
1100c1bc742181ded4930842b46e9507372f0b1b963James DongdHV1            DN D23.S16[0]
1110c1bc742181ded4930842b46e9507372f0b1b963James DongdHV10           DN D24.S64
1120c1bc742181ded4930842b46e9507372f0b1b963James DongdHV11           DN D25.S64
1130c1bc742181ded4930842b46e9507372f0b1b963James Dong
1140c1bc742181ded4930842b46e9507372f0b1b963James DongqSum0           QN Q0.S16
1150c1bc742181ded4930842b46e9507372f0b1b963James DongqSum1           QN Q1.S16
1160c1bc742181ded4930842b46e9507372f0b1b963James Dong
1170c1bc742181ded4930842b46e9507372f0b1b963James DongdOut0           DN D6.U8
1180c1bc742181ded4930842b46e9507372f0b1b963James DongdOut1           DN D7.U8
1190c1bc742181ded4930842b46e9507372f0b1b963James Dong
1200c1bc742181ded4930842b46e9507372f0b1b963James DongdLeft0          DN D2.U8
1210c1bc742181ded4930842b46e9507372f0b1b963James DongdLeft1          DN D3.U8
1220c1bc742181ded4930842b46e9507372f0b1b963James DongqConst          QN Q13.S16
1230c1bc742181ded4930842b46e9507372f0b1b963James Dong
1240c1bc742181ded4930842b46e9507372f0b1b963James DongdAbove0         DN D0.U8
1250c1bc742181ded4930842b46e9507372f0b1b963James DongdAbove1         DN D1.U8
1260c1bc742181ded4930842b46e9507372f0b1b963James Dong
1270c1bc742181ded4930842b46e9507372f0b1b963James DongdRevLeft64      DN D12.U64
1280c1bc742181ded4930842b46e9507372f0b1b963James DongdRevLeft        DN D12.U8
1290c1bc742181ded4930842b46e9507372f0b1b963James DongdRevAbove64     DN D5.U64
1300c1bc742181ded4930842b46e9507372f0b1b963James DongdRevAbove       DN D5.U8
1310c1bc742181ded4930842b46e9507372f0b1b963James DongqLeftDiff       QN Q8.S16
1320c1bc742181ded4930842b46e9507372f0b1b963James DongdLeftDiff1      DN D17.S16
1330c1bc742181ded4930842b46e9507372f0b1b963James DongdLeftDiff64     DN D17.S64
1340c1bc742181ded4930842b46e9507372f0b1b963James DongqDiffLeft       QN Q8.S16
1350c1bc742181ded4930842b46e9507372f0b1b963James DongqDiffAbove      QN Q4.S16
1360c1bc742181ded4930842b46e9507372f0b1b963James DongdAboveDiff1     DN D9.S16
1370c1bc742181ded4930842b46e9507372f0b1b963James DongdAboveDiff64    DN D9.S64
1380c1bc742181ded4930842b46e9507372f0b1b963James DongqAboveDiff      QN Q4.S16
1390c1bc742181ded4930842b46e9507372f0b1b963James Dong
1400c1bc742181ded4930842b46e9507372f0b1b963James DongdAboveLeft      DN D4.U8
1410c1bc742181ded4930842b46e9507372f0b1b963James Dong
1420c1bc742181ded4930842b46e9507372f0b1b963James DongdDiffLeft0      DN D16.S16
1430c1bc742181ded4930842b46e9507372f0b1b963James DongdDiffLeft1      DN D17.S16
1440c1bc742181ded4930842b46e9507372f0b1b963James DongdDiffAbove0     DN D8.S16
1450c1bc742181ded4930842b46e9507372f0b1b963James DongdDiffAbove1     DN D9.S16
1460c1bc742181ded4930842b46e9507372f0b1b963James Dong
1470c1bc742181ded4930842b46e9507372f0b1b963James DongqLeft15minus0   QN Q7.S16
1480c1bc742181ded4930842b46e9507372f0b1b963James DongdLeft15minus0   DN D14.S16
1490c1bc742181ded4930842b46e9507372f0b1b963James DongqAbove15minus0  QN Q3.S16
1500c1bc742181ded4930842b46e9507372f0b1b963James DongdAbove15minus0  DN D6.S16
1510c1bc742181ded4930842b46e9507372f0b1b963James Dong
1520c1bc742181ded4930842b46e9507372f0b1b963James DongqMultiplier     QN Q10.S16
1530c1bc742181ded4930842b46e9507372f0b1b963James DongqMultiplier0    QN Q10.S16
1540c1bc742181ded4930842b46e9507372f0b1b963James DongqMultiplier1    QN Q12.S16
1550c1bc742181ded4930842b46e9507372f0b1b963James DongdMultiplier0    DN D20.S16
1560c1bc742181ded4930842b46e9507372f0b1b963James DongdMultiplier1    DN D21.S16
1570c1bc742181ded4930842b46e9507372f0b1b963James Dong
1580c1bc742181ded4930842b46e9507372f0b1b963James DongdBPlusCMult7    DN D1.S64
1590c1bc742181ded4930842b46e9507372f0b1b963James DongdBPlusCMult7S16 DN D1.S16
1600c1bc742181ded4930842b46e9507372f0b1b963James Dong
1610c1bc742181ded4930842b46e9507372f0b1b963James DongqTmp            QN Q0.U8
1620c1bc742181ded4930842b46e9507372f0b1b963James Dong
1630c1bc742181ded4930842b46e9507372f0b1b963James Dong;//--------------------------------------------
1640c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare input registers
1650c1bc742181ded4930842b46e9507372f0b1b963James Dong;//--------------------------------------------
1660c1bc742181ded4930842b46e9507372f0b1b963James DongpSrcLeft        RN 0    ;// input pointer
1670c1bc742181ded4930842b46e9507372f0b1b963James DongpSrcAbove       RN 1    ;// input pointer
1680c1bc742181ded4930842b46e9507372f0b1b963James DongpSrcAboveLeft   RN 2    ;// input pointer
1690c1bc742181ded4930842b46e9507372f0b1b963James DongpDst            RN 3    ;// output pointer
1700c1bc742181ded4930842b46e9507372f0b1b963James DongleftStep        RN 4    ;// input variable
1710c1bc742181ded4930842b46e9507372f0b1b963James DongdstStep         RN 5    ;// input variable
1720c1bc742181ded4930842b46e9507372f0b1b963James DongpredMode        RN 6    ;// input variable
1730c1bc742181ded4930842b46e9507372f0b1b963James Dongavailability    RN 7    ;// input variable
1740c1bc742181ded4930842b46e9507372f0b1b963James Dong
1750c1bc742181ded4930842b46e9507372f0b1b963James DongpTmp            RN 8
1760c1bc742181ded4930842b46e9507372f0b1b963James Dongstep            RN 10
1770c1bc742181ded4930842b46e9507372f0b1b963James DongpTmp2           RN 11
1780c1bc742181ded4930842b46e9507372f0b1b963James Dong
1790c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-----------------------------------------------------------------------------------------------
1800c1bc742181ded4930842b46e9507372f0b1b963James Dong;// omxVCM4P10_PredictIntra_16x16 starts
1810c1bc742181ded4930842b46e9507372f0b1b963James Dong;//-----------------------------------------------------------------------------------------------
1820c1bc742181ded4930842b46e9507372f0b1b963James Dong
1830c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Write function header
1840c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_START omxVCM4P10_PredictIntra_16x16, r11, d15
1850c1bc742181ded4930842b46e9507372f0b1b963James Dong
1860c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Define stack arguments
1870c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ARG    LeftStep,     4
1880c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ARG    DstStep,      4
1890c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ARG    PredMode,     4
1900c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ARG    Availability, 4
1910c1bc742181ded4930842b46e9507372f0b1b963James Dong
1920c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// M_STALL ARM1136JS=4
1930c1bc742181ded4930842b46e9507372f0b1b963James Dong
1940c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR      pTable,=armVCM4P10_pIndexTable16x16 ;// Load index table for switch case
1950c1bc742181ded4930842b46e9507372f0b1b963James Dong
1960c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Load argument from the stack
1970c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR    predMode, PredMode                  ;// Arg predMode loaded from stack to reg
1980c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR    leftStep, LeftStep                  ;// Arg leftStep loaded from stack to reg
1990c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR    dstStep,  DstStep                   ;// Arg dstStep loaded from stack to reg
2000c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR    availability, Availability          ;// Arg availability loaded from stack to reg
2010c1bc742181ded4930842b46e9507372f0b1b963James Dong
2020c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      y, #BLK_SIZE                        ;// Outer Loop Count
2030c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR      pc, [pTable, predMode, LSL #2]      ;// Branch to the case based on preMode
2040c1bc742181ded4930842b46e9507372f0b1b963James Dong
2050c1bc742181ded4930842b46e9507372f0b1b963James DongOMX_VC_16X16_VERT
2060c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1    qAbove,  [pSrcAbove]
2070c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     pTmp, pDst, dstStep
2080c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     step, dstStep, dstStep
2090c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1    qAbove, [pDst], step
2100c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1    qAbove, [pTmp], step
2110c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1    qAbove, [pDst], step
2120c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1    qAbove, [pTmp], step
2130c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1    qAbove, [pDst], step
2140c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1    qAbove, [pTmp], step
2150c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1    qAbove, [pDst], step
2160c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1    qAbove, [pTmp], step
2170c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1    qAbove, [pDst], step
2180c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1    qAbove, [pTmp], step
2190c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1    qAbove, [pDst], step
2200c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1    qAbove, [pTmp], step
2210c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1    qAbove, [pDst], step
2220c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1    qAbove, [pTmp], step
2230c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1    qAbove, [pDst]
2240c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1    qAbove, [pTmp]
2250c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     return, #OMX_Sts_NoErr               ;// returnNoError
2260c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_EXIT
2270c1bc742181ded4930842b46e9507372f0b1b963James Dong
2280c1bc742181ded4930842b46e9507372f0b1b963James DongOMX_VC_16X16_HOR
2290c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     pTmp, pSrcLeft, leftStep
2300c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     leftStep, leftStep, leftStep
2310c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     pTmp2, pDst, dstStep
2320c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     dstStep, dstStep, dstStep
2330c1bc742181ded4930842b46e9507372f0b1b963James DongLoopHor
2340c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1     {qLeft[]}, [pSrcLeft], leftStep
2350c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1     {qTmp[]}, [pTmp], leftStep
2360c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUBS     y, y, #8
2370c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1     qLeft, [pDst], dstStep
2380c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1     qTmp, [pTmp2], dstStep
2390c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1     {qLeft[]}, [pSrcLeft], leftStep
2400c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1     {qTmp[]}, [pTmp], leftStep
2410c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1     qLeft, [pDst], dstStep
2420c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1     qTmp, [pTmp2], dstStep
2430c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1     {qLeft[]}, [pSrcLeft], leftStep
2440c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1     {qTmp[]}, [pTmp], leftStep
2450c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1     qLeft, [pDst], dstStep
2460c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1     qTmp, [pTmp2], dstStep
2470c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1     {qLeft[]}, [pSrcLeft], leftStep
2480c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1     {qTmp[]}, [pTmp], leftStep
2490c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1     qLeft, [pDst], dstStep
2500c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1     qTmp, [pTmp2], dstStep
2510c1bc742181ded4930842b46e9507372f0b1b963James Dong
2520c1bc742181ded4930842b46e9507372f0b1b963James Dong        BNE      LoopHor                                  ;// Loop for 16 times
2530c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      return, #OMX_Sts_NoErr
2540c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_EXIT
2550c1bc742181ded4930842b46e9507372f0b1b963James Dong
2560c1bc742181ded4930842b46e9507372f0b1b963James DongOMX_VC_16X16_DC
2570c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      count, #0                                 ;// count = 0
2580c1bc742181ded4930842b46e9507372f0b1b963James Dong        TST      availability, #OMX_VC_LEFT
2590c1bc742181ded4930842b46e9507372f0b1b963James Dong        BEQ      UpperOrNoneAvailable                      ;// Jump to Upper if not left
2600c1bc742181ded4930842b46e9507372f0b1b963James Dong
2610c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     pTmp, pSrcLeft, leftStep
2620c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     step, leftStep, leftStep
2630c1bc742181ded4930842b46e9507372f0b1b963James Dong
2640c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1    {qLeft[0]}, [pSrcLeft],step
2650c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1    {qLeft[1]}, [pTmp],step
2660c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1    {qLeft[2]}, [pSrcLeft],step
2670c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1    {qLeft[3]}, [pTmp],step
2680c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1    {qLeft[4]}, [pSrcLeft],step
2690c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1    {qLeft[5]}, [pTmp],step
2700c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1    {qLeft[6]}, [pSrcLeft],step
2710c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1    {qLeft[7]}, [pTmp],step
2720c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1    {qLeft[8]}, [pSrcLeft],step
2730c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1    {qLeft[9]}, [pTmp],step
2740c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1    {qLeft[10]},[pSrcLeft],step
2750c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1    {qLeft[11]},[pTmp],step
2760c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1    {qLeft[12]},[pSrcLeft],step
2770c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1    {qLeft[13]},[pTmp],step
2780c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1    {qLeft[14]},[pSrcLeft],step
2790c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1    {qLeft[15]},[pTmp]
2800c1bc742181ded4930842b46e9507372f0b1b963James Dong
2810c1bc742181ded4930842b46e9507372f0b1b963James Dong        VPADDL   qSum8, qLeft
2820c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     count, count, #1
2830c1bc742181ded4930842b46e9507372f0b1b963James Dong        VPADD    dSum4, dSum80, dSum81
2840c1bc742181ded4930842b46e9507372f0b1b963James Dong        VPADDL   dSum2, dSum4
2850c1bc742181ded4930842b46e9507372f0b1b963James Dong        VPADDL   dSumLeft, dSum2
2860c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRSHR    dSum, dSumLeft, #4
2870c1bc742181ded4930842b46e9507372f0b1b963James Dong
2880c1bc742181ded4930842b46e9507372f0b1b963James DongUpperOrNoneAvailable
2890c1bc742181ded4930842b46e9507372f0b1b963James Dong        TST      availability,  #OMX_VC_UPPER              ;// if(availability & #OMX_VC_UPPER)
2900c1bc742181ded4930842b46e9507372f0b1b963James Dong        BEQ      BothOrNoneAvailable                       ;// Jump to Left if not upper
2910c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1     qAbove, [pSrcAbove]
2920c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      count, count, #1                          ;// if upper inc count by 1
2930c1bc742181ded4930842b46e9507372f0b1b963James Dong        VPADDL   qSum8, qAbove
2940c1bc742181ded4930842b46e9507372f0b1b963James Dong        VPADD    dSum4, dSum80, dSum81
2950c1bc742181ded4930842b46e9507372f0b1b963James Dong        VPADDL   dSum2, dSum4
2960c1bc742181ded4930842b46e9507372f0b1b963James Dong        VPADDL   dSumAbove, dSum2
2970c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRSHR    dSum, dSumAbove, #4
2980c1bc742181ded4930842b46e9507372f0b1b963James Dong
2990c1bc742181ded4930842b46e9507372f0b1b963James DongBothOrNoneAvailable
3000c1bc742181ded4930842b46e9507372f0b1b963James Dong        CMP      count, #2                                  ;// check if both available
3010c1bc742181ded4930842b46e9507372f0b1b963James Dong        BNE      NoneAvailable
3020c1bc742181ded4930842b46e9507372f0b1b963James Dong        VADD     dSum, dSumAbove, dSumLeft
3030c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRSHR    dSum, dSum, #5
3040c1bc742181ded4930842b46e9507372f0b1b963James Dong
3050c1bc742181ded4930842b46e9507372f0b1b963James Dong
3060c1bc742181ded4930842b46e9507372f0b1b963James DongNoneAvailable
3070c1bc742181ded4930842b46e9507372f0b1b963James Dong        VDUP     qOut, dSum0
3080c1bc742181ded4930842b46e9507372f0b1b963James Dong        CMP      count, #0                                  ;// check if none available
3090c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      pTmp, pDst, dstStep
3100c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD      step, dstStep, dstStep
3110c1bc742181ded4930842b46e9507372f0b1b963James Dong        BNE      LoopDC
3120c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMOV     qOut, #128
3130c1bc742181ded4930842b46e9507372f0b1b963James DongLoopDC
3140c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1    qOut, [pDst], step
3150c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1    qOut, [pTmp], step
3160c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1    qOut, [pDst], step
3170c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1    qOut, [pTmp], step
3180c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1    qOut, [pDst], step
3190c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1    qOut, [pTmp], step
3200c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1    qOut, [pDst], step
3210c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1    qOut, [pTmp], step
3220c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1    qOut, [pDst], step
3230c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1    qOut, [pTmp], step
3240c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1    qOut, [pDst], step
3250c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1    qOut, [pTmp], step
3260c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1    qOut, [pDst], step
3270c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1    qOut, [pTmp], step
3280c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1    qOut, [pDst], step
3290c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1    qOut, [pTmp], step
3300c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     return, #OMX_Sts_NoErr
3310c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_EXIT
3320c1bc742181ded4930842b46e9507372f0b1b963James Dong
3330c1bc742181ded4930842b46e9507372f0b1b963James DongOMX_VC_16X16_PLANE
3340c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR     pMultTable, =armVCM4P10_MultiplierTable16x16
3350c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1    qAbove, [pSrcAbove]                         ;// pSrcAbove[x]      :0<= x <= 7
3360c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1    dAboveLeft[0],[pSrcAboveLeft]
3370c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     pTmp, pSrcLeft, leftStep
3380c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     step, leftStep, leftStep
3390c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1    {qLeft[0]},  [pSrcLeft],step
3400c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1    {qLeft[1]},  [pTmp],step
3410c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1    {qLeft[2]},  [pSrcLeft],step
3420c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1    {qLeft[3]},  [pTmp],step
3430c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1    {qLeft[4]},  [pSrcLeft],step
3440c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1    {qLeft[5]},  [pTmp],step
3450c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1    {qLeft[6]},  [pSrcLeft],step
3460c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1    {qLeft[7]},  [pTmp],step
3470c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1    {qLeft[8]},  [pSrcLeft],step
3480c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1    {qLeft[9]},  [pTmp],step
3490c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1    {qLeft[10]}, [pSrcLeft],step
3500c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1    {qLeft[11]}, [pTmp],step
3510c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1    {qLeft[12]}, [pSrcLeft],step
3520c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1    {qLeft[13]}, [pTmp],step
3530c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1    {qLeft[14]}, [pSrcLeft],step
3540c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1    {qLeft[15]}, [pTmp]
3550c1bc742181ded4930842b46e9507372f0b1b963James Dong
3560c1bc742181ded4930842b46e9507372f0b1b963James Dong        VREV64  dRevAbove, dAbove1                          ;// pSrcAbove[15:14:13:12:11:10:9:8]
3570c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSUBL   qAbove15minus0, dRevAbove, dAboveLeft       ;// qAbove7minus0[0] = pSrcAbove[15] - pSrcAboveLeft[0]
3580c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSHR    dRevAbove64, dRevAbove64, #8                ;// pSrcAbove[14:13:12:11:10:9:8:X]
3590c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSUBL   qAboveDiff, dRevAbove, dAbove0
3600c1bc742181ded4930842b46e9507372f0b1b963James Dong
3610c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSHL    dAboveDiff64, dAboveDiff64, #16
3620c1bc742181ded4930842b46e9507372f0b1b963James Dong        VEXT    dDiffAbove1, dAboveDiff1, dAbove15minus0, #1
3630c1bc742181ded4930842b46e9507372f0b1b963James Dong
3640c1bc742181ded4930842b46e9507372f0b1b963James Dong        VREV64  dRevLeft,dLeft1                             ;// pSrcLeft[15:14:13:12:11:10:9:8]
3650c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSUBL   qLeft15minus0,dRevLeft, dAboveLeft          ;// qAbove7minus0[0] = pSrcLeft[7] - pSrcAboveLeft[0]
3660c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSHR    dRevLeft64, dRevLeft64, #8                  ;// pSrcLeft[14:13:12:11:10:9:8:X]
3670c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSUBL   qLeftDiff,dRevLeft, dLeft0
3680c1bc742181ded4930842b46e9507372f0b1b963James Dong
3690c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Multiplier = [8|1|2|...|6|7]
3700c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1    qMultiplier, [pMultTable]!
3710c1bc742181ded4930842b46e9507372f0b1b963James Dong
3720c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSHL    dLeftDiff64, dLeftDiff64, #16
3730c1bc742181ded4930842b46e9507372f0b1b963James Dong        VEXT    dDiffLeft1, dLeftDiff1, dLeft15minus0, #1
3740c1bc742181ded4930842b46e9507372f0b1b963James Dong
3750c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMULL   qH,dDiffAbove0, dMultiplier0
3760c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMULL   qV,dDiffLeft0,  dMultiplier0
3770c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMLAL   qH,dDiffAbove1, dMultiplier1
3780c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMLAL   qV,dDiffLeft1,  dMultiplier1
3790c1bc742181ded4930842b46e9507372f0b1b963James Dong
3800c1bc742181ded4930842b46e9507372f0b1b963James Dong        VPADD   dHV00,dH1,dH0
3810c1bc742181ded4930842b46e9507372f0b1b963James Dong        VPADD   dHV01,dV1,dV0
3820c1bc742181ded4930842b46e9507372f0b1b963James Dong        VPADDL  qHV, qHV0
3830c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSHL    qHV1,qHV,#2
3840c1bc742181ded4930842b46e9507372f0b1b963James Dong        VADD    qHV,qHV,qHV1
3850c1bc742181ded4930842b46e9507372f0b1b963James Dong
3860c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// HV = [c = ((5*V+32)>>6) | b = ((5*H+32)>>6)]
3870c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRSHR   qHV,qHV,#6
3880c1bc742181ded4930842b46e9507372f0b1b963James Dong
3890c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// HV1 = [c*7|b*7]
3900c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSHL    qHV1,qHV,#3
3910c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSUB    qHV1,qHV1,qHV
3920c1bc742181ded4930842b46e9507372f0b1b963James Dong
3930c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Multiplier1 = [0|1|2|...|7]
3940c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1    qMultiplier0, [pMultTable]!
3950c1bc742181ded4930842b46e9507372f0b1b963James Dong        VDUP    qB, dHV0
3960c1bc742181ded4930842b46e9507372f0b1b963James Dong        VDUP    qC, dHV1
3970c1bc742181ded4930842b46e9507372f0b1b963James Dong
3980c1bc742181ded4930842b46e9507372f0b1b963James Dong        VADDL   qA,dAbove1,dLeft1
3990c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSHL    qA,qA, #4
4000c1bc742181ded4930842b46e9507372f0b1b963James Dong        VDUP    qA,dA1[3]
4010c1bc742181ded4930842b46e9507372f0b1b963James Dong        VADD    dBPlusCMult7, dHV10, dHV11
4020c1bc742181ded4930842b46e9507372f0b1b963James Dong
4030c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Multiplier1 = [8|9|10|...|15]
4040c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1    qMultiplier1, [pMultTable]
4050c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Const = a - 7*(b+c)
4060c1bc742181ded4930842b46e9507372f0b1b963James Dong        VDUP    qConst, dBPlusCMult7S16[0]
4070c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSUB    qConst, qA, qConst
4080c1bc742181ded4930842b46e9507372f0b1b963James Dong
4090c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// B0 = [0*b|1*b|2*b|3*b|......|7*b]
4100c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMUL    qB0,qB,qMultiplier0
4110c1bc742181ded4930842b46e9507372f0b1b963James Dong
4120c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// B0 = [8*b|9*b|10*b|11*b|....|15*b]
4130c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMUL    qB1,qB,qMultiplier1
4140c1bc742181ded4930842b46e9507372f0b1b963James Dong
4150c1bc742181ded4930842b46e9507372f0b1b963James Dong        VADD    qSum0, qB0, qConst
4160c1bc742181ded4930842b46e9507372f0b1b963James Dong        VADD    qSum1, qB1, qConst
4170c1bc742181ded4930842b46e9507372f0b1b963James Dong
4180c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Loops for 16 times
4190c1bc742181ded4930842b46e9507372f0b1b963James DongLoopPlane
4200c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// (b*x + c*y + C)>>5
4210c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQRSHRUN dOut0, qSum0,#5
4220c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQRSHRUN dOut1, qSum1,#5
4230c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUBS     y, y, #1
4240c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1     qOut,[pDst],dstStep
4250c1bc742181ded4930842b46e9507372f0b1b963James Dong        VADD     qSum0,qSum0,qC
4260c1bc742181ded4930842b46e9507372f0b1b963James Dong        VADD     qSum1,qSum1,qC
4270c1bc742181ded4930842b46e9507372f0b1b963James Dong        BNE      LoopPlane
4280c1bc742181ded4930842b46e9507372f0b1b963James Dong
4290c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      return, #OMX_Sts_NoErr
4300c1bc742181ded4930842b46e9507372f0b1b963James Dong
4310c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_END
4320c1bc742181ded4930842b46e9507372f0b1b963James Dong
4330c1bc742181ded4930842b46e9507372f0b1b963James Dong        ENDIF ;// CortexA8
4340c1bc742181ded4930842b46e9507372f0b1b963James Dong
4350c1bc742181ded4930842b46e9507372f0b1b963James Dong        END
4360c1bc742181ded4930842b46e9507372f0b1b963James Dong;-----------------------------------------------------------------------------------------------
4370c1bc742181ded4930842b46e9507372f0b1b963James Dong; omxVCM4P10_PredictIntra_16x16 ends
4380c1bc742181ded4930842b46e9507372f0b1b963James Dong;-----------------------------------------------------------------------------------------------
439