10c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Copyright (C) 2007-2008 ARM Limited
378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Licensed under the Apache License, Version 2.0 (the "License");
578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// you may not use this file except in compliance with the License.
678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// You may obtain a copy of the License at
778e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
878e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//      http://www.apache.org/licenses/LICENSE-2.0
978e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
1078e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Unless required by applicable law or agreed to in writing, software
1178e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// distributed under the License is distributed on an "AS IS" BASIS,
1278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// See the License for the specific language governing permissions and
1478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// limitations under the License.
1578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
1678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
170c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
180c1bc742181ded4930842b46e9507372f0b1b963James Dong;// File Name:  armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s
190c1bc742181ded4930842b46e9507372f0b1b963James Dong;// OpenMAX DL: v1.0.2
200c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Revision:   9641
210c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Date:       Thursday, February 7, 2008
220c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
230c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
240c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
250c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
260c1bc742181ded4930842b46e9507372f0b1b963James Dong
270c1bc742181ded4930842b46e9507372f0b1b963James Dong        INCLUDE omxtypes_s.h
280c1bc742181ded4930842b46e9507372f0b1b963James Dong        INCLUDE armCOMM_s.h
290c1bc742181ded4930842b46e9507372f0b1b963James Dong
300c1bc742181ded4930842b46e9507372f0b1b963James Dong        EXPORT armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
310c1bc742181ded4930842b46e9507372f0b1b963James Dong
320c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_VARIANTS ARM1136JS
330c1bc742181ded4930842b46e9507372f0b1b963James Dong
340c1bc742181ded4930842b46e9507372f0b1b963James Dong
350c1bc742181ded4930842b46e9507372f0b1b963James Dong
360c1bc742181ded4930842b46e9507372f0b1b963James Dong
370c1bc742181ded4930842b46e9507372f0b1b963James Dong
380c1bc742181ded4930842b46e9507372f0b1b963James Dong    IF ARM1136JS
390c1bc742181ded4930842b46e9507372f0b1b963James Dong
400c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ALLOC8 ppDstArgs, 8
410c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ALLOC4 ppSrc, 4
420c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ALLOC4 ppDst, 4
430c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ALLOC4 pCounter, 4
440c1bc742181ded4930842b46e9507372f0b1b963James Dong
450c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Function header
460c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Function:
470c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//     armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
480c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
490c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Implements diagonal interpolation for a block of size 4x4. Input and output should
500c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// be aligned.
510c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
520c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Registers used as input for this function
530c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// r0,r1,r2,r3, r8 where r0,r2  input pointer and r1,r3 step size, r8 intermediate-buf pointer
540c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
550c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Registers preserved for top level function
560c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// r0,r1,r2,r3,r4,r5,r6,r14
570c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
580c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Registers modified by the function
590c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// r7,r8,r9,r10,r11,r12
600c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
610c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Output registers
620c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// None. Function will preserve r0-r3
630c1bc742181ded4930842b46e9507372f0b1b963James Dong
640c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_START armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe, r6
650c1bc742181ded4930842b46e9507372f0b1b963James Dong
660c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare input registers
670c1bc742181ded4930842b46e9507372f0b1b963James DongpSrc            RN 0
680c1bc742181ded4930842b46e9507372f0b1b963James DongsrcStep         RN 1
690c1bc742181ded4930842b46e9507372f0b1b963James DongpDst            RN 2
700c1bc742181ded4930842b46e9507372f0b1b963James DongdstStep         RN 3
710c1bc742181ded4930842b46e9507372f0b1b963James Dong
720c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare inner loop registers
730c1bc742181ded4930842b46e9507372f0b1b963James DongValA            RN 5
740c1bc742181ded4930842b46e9507372f0b1b963James DongValA0           RN 4
750c1bc742181ded4930842b46e9507372f0b1b963James DongValA1           RN 5
760c1bc742181ded4930842b46e9507372f0b1b963James DongValAF0          RN 4
770c1bc742181ded4930842b46e9507372f0b1b963James DongValAF1          RN 5
780c1bc742181ded4930842b46e9507372f0b1b963James Dong
790c1bc742181ded4930842b46e9507372f0b1b963James DongValB            RN 11
800c1bc742181ded4930842b46e9507372f0b1b963James Dong
810c1bc742181ded4930842b46e9507372f0b1b963James DongValC            RN 5
820c1bc742181ded4930842b46e9507372f0b1b963James DongValC0           RN 4
830c1bc742181ded4930842b46e9507372f0b1b963James DongValC1           RN 5
840c1bc742181ded4930842b46e9507372f0b1b963James DongValCD0          RN 12
850c1bc742181ded4930842b46e9507372f0b1b963James DongValCD1          RN 14
860c1bc742181ded4930842b46e9507372f0b1b963James DongValCF0          RN 4
870c1bc742181ded4930842b46e9507372f0b1b963James DongValCF1          RN 5
880c1bc742181ded4930842b46e9507372f0b1b963James Dong
890c1bc742181ded4930842b46e9507372f0b1b963James DongValD            RN 10
900c1bc742181ded4930842b46e9507372f0b1b963James Dong
910c1bc742181ded4930842b46e9507372f0b1b963James DongValE            RN 7
920c1bc742181ded4930842b46e9507372f0b1b963James DongValE0           RN 6
930c1bc742181ded4930842b46e9507372f0b1b963James DongValE1           RN 7
940c1bc742181ded4930842b46e9507372f0b1b963James DongValEB0          RN 10
950c1bc742181ded4930842b46e9507372f0b1b963James DongValEB1          RN 11
960c1bc742181ded4930842b46e9507372f0b1b963James DongValED0          RN 6
970c1bc742181ded4930842b46e9507372f0b1b963James DongValED1          RN 7
980c1bc742181ded4930842b46e9507372f0b1b963James Dong
990c1bc742181ded4930842b46e9507372f0b1b963James DongValF            RN 10
1000c1bc742181ded4930842b46e9507372f0b1b963James Dong
1010c1bc742181ded4930842b46e9507372f0b1b963James DongValG            RN 14
1020c1bc742181ded4930842b46e9507372f0b1b963James DongValG0           RN 12
1030c1bc742181ded4930842b46e9507372f0b1b963James DongValG1           RN 14
1040c1bc742181ded4930842b46e9507372f0b1b963James DongValGB0          RN 12
1050c1bc742181ded4930842b46e9507372f0b1b963James DongValGB1          RN 14
1060c1bc742181ded4930842b46e9507372f0b1b963James Dong
1070c1bc742181ded4930842b46e9507372f0b1b963James DongAcc0            RN 4
1080c1bc742181ded4930842b46e9507372f0b1b963James DongAcc1            RN 5
1090c1bc742181ded4930842b46e9507372f0b1b963James DongAcc2            RN 6
1100c1bc742181ded4930842b46e9507372f0b1b963James DongAcc3            RN 7
1110c1bc742181ded4930842b46e9507372f0b1b963James Dong
1120c1bc742181ded4930842b46e9507372f0b1b963James DongTemp            RN 7
1130c1bc742181ded4930842b46e9507372f0b1b963James DongStep            RN 6
1140c1bc742181ded4930842b46e9507372f0b1b963James Dong
1150c1bc742181ded4930842b46e9507372f0b1b963James DongpInterBuf       RN 8
1160c1bc742181ded4930842b46e9507372f0b1b963James DongCounter         RN 8
1170c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x00ff00ff     RN 9                                        ;// [0 255 0 255] where 255 is offset
1180c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x0001fc00     RN 10                                       ;// [0 (16*255 - 16) 0 (16*255 - 16)]
1190c1bc742181ded4930842b46e9507372f0b1b963James Dong
1200c1bc742181ded4930842b46e9507372f0b1b963James Dong
1210c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare inner loop registers
1220c1bc742181ded4930842b46e9507372f0b1b963James DongValCA           RN 8
1230c1bc742181ded4930842b46e9507372f0b1b963James DongValDB           RN 9
1240c1bc742181ded4930842b46e9507372f0b1b963James DongValGE           RN 10
1250c1bc742181ded4930842b46e9507372f0b1b963James DongValHF           RN 11
1260c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x00140001     RN 12
1270c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x0014fffb     RN 14
1280c1bc742181ded4930842b46e9507372f0b1b963James Dong
1290c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x00000200     RN 12
1300c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x000000ff     RN 12
1310c1bc742181ded4930842b46e9507372f0b1b963James Dong
1320c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD      pDst, dstStep, ppDstArgs
1330c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         pDst, pInterBuf
1340c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         dstStep, #24
1350c1bc742181ded4930842b46e9507372f0b1b963James Dong
1360c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Set up counter of format, [0]  [0]  [1 (height)]  [8 (width)]
1370c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         Counter, #1
1380c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         Temp, #8
1390c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         Counter, Temp, Counter, LSL #8        ;// [0 0 H W]
1400c1bc742181ded4930842b46e9507372f0b1b963James Dong
1410c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR         r0x00ff00ff, =0x00ff00ff                ;// [0 255 0 255] 255 is offset to avoid negative results
1420c1bc742181ded4930842b46e9507372f0b1b963James DongWidthLoop
1430c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR       pSrc, ppSrc
1440c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR       pDst, ppDst
1450c1bc742181ded4930842b46e9507372f0b1b963James DongHeightLoop
1460c1bc742181ded4930842b46e9507372f0b1b963James DongTwoRowsLoop
1470c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       ValC, [pSrc], srcStep                   ;// Load  [c3 c2 c1 c0]
1480c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       ValD, [pSrc], srcStep                   ;// Load  [d3 d2 d1 d0]
1490c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       ValE, [pSrc], srcStep                   ;// Load  [e3 e2 e1 e0]
1500c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, srcStep, LSL #2
1510c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16     ValC0, r0x00ff00ff, ValC                ;// [0 c2 0 c0] + [0 255 0 255]
1520c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16     ValC1, r0x00ff00ff, ValC, ROR #8        ;// [0 c3 0 c1] + [0 255 0 255]
1530c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR         ValB, [pSrc]                            ;// Load  [b3 b2 b1 b0]
1540c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16     ValE0, r0x00ff00ff, ValE                ;// [0 e2 0 e0] + [0 255 0 255]
1550c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16     ValE1, r0x00ff00ff, ValE, ROR #8        ;// [0 e3 0 e1] + [0 255 0 255]
1560c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16     ValCD0, ValC0, ValD                     ;// [0 c2 0 c0] + [0 255 0 255] + [0 d2 0 d0]
1570c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16     ValCD1, ValC1, ValD, ROR #8             ;// [0 c3 0 c1] + [0 255 0 255] + [0 d3 0 d1]
1580c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16     ValEB0, ValE0, ValB                     ;// [0 e2 0 e0] + [0 255 0 255] + [0 b2 0 b0]
1590c1bc742181ded4930842b46e9507372f0b1b963James Dong        RSB         ValCD0, ValEB0, ValCD0, LSL #2          ;// 4*(Off+C+D) - (Off+B+E)
1600c1bc742181ded4930842b46e9507372f0b1b963James Dong
1610c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR         ValD, [pSrc, srcStep, LSL #1]                       ;// Load  [d3 d2 d1 d0]
1620c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16     ValEB1, ValE1, ValB, ROR #8             ;// [0 e3 0 e1] + [0 255 0 255] + [0 b3 0 b1]
1630c1bc742181ded4930842b46e9507372f0b1b963James Dong        RSB         ValCD1, ValEB1, ValCD1, LSL #2
1640c1bc742181ded4930842b46e9507372f0b1b963James Dong
1650c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16     ValED0, ValE0, ValD                     ;// [0 e2 0 e0] + [0 255 0 255] + [0 d2 0 d0]
1660c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16     ValED1, ValE1, ValD, ROR #8             ;// [0 e3 0 e1] + [0 255 0 255] + [0 d3 0 d1]
1670c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR         ValF, [pSrc, srcStep, LSL #2]           ;// Load  [f3 f2 f1 f0]
1680c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       ValB, [pSrc], srcStep                   ;// Load  [b3 b2 b1 b0]
1690c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         ValCD0, ValCD0, ValCD0, LSL #2          ;// 5 * [4*(Off+C+D) - (Off+B+E)]
1700c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         ValCD1, ValCD1, ValCD1, LSL #2
1710c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16     ValCF1, ValC1, ValF, ROR #8             ;// [0 c3 0 c1] + [0 255 0 255] + [0 f3 0 f1]
1720c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16     ValCF0, ValC0, ValF                     ;// [0 c2 0 c0] + [0 255 0 255] + [0 f2 0 f0]
1730c1bc742181ded4930842b46e9507372f0b1b963James Dong        RSB         ValED1, ValCF1, ValED1, LSL #2
1740c1bc742181ded4930842b46e9507372f0b1b963James Dong
1750c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         ValA, pSrc, srcStep, LSL #1
1760c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR         ValA, [ValA]                            ;// Load  [a3 a2 a1 a0]
1770c1bc742181ded4930842b46e9507372f0b1b963James Dong        RSB         ValED0, ValCF0, ValED0, LSL #2          ;// 4*(Off+E+D) - (Off+C+F)
1780c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         ValED1, ValED1, ValED1, LSL #2
1790c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         ValED0, ValED0, ValED0, LSL #2          ;// 5 * [4*(Off+E+D) - (Off+C+F)]
1800c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16     ValA0, r0x00ff00ff, ValA                ;// [0 a2 0 a0] + [0 255 0 255]
1810c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16     ValA1, r0x00ff00ff, ValA, ROR #8        ;// [0 a3 0 a1] + [0 255 0 255]
1820c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16     ValAF0, ValA0, ValF                     ;// [0 a2 0 a0] + [0 255 0 255] + [0 f2 0 f0]
1830c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16     ValAF1, ValA1, ValF, ROR #8             ;// [0 a3 0 a1] + [0 255 0 255] + [0 f3 0 f1]
1840c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         Acc1, ValCD1, ValAF1
1850c1bc742181ded4930842b46e9507372f0b1b963James Dong
1860c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR         ValG, [pSrc, srcStep, LSL #2]           ;// Load  [g3 g2 g1 g0]
1870c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         Acc0, ValCD0, ValAF0                    ;// Acc0 = 16*Off + (A+F) + 20*(C+D) - 5*(B+E)
1880c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR         Acc1, [pDst, #4]                        ;// Store result & adjust pointer
1890c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR       Acc0, [pDst], dstStep                   ;// Store result & adjust pointer
1900c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16     ValG0, r0x00ff00ff, ValG                ;// [0 g2 0 g0] + [0 255 0 255]
1910c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16     ValG1, r0x00ff00ff, ValG, ROR #8        ;// [0 g3 0 g1] + [0 255 0 255]
1920c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16     ValGB0, ValG0, ValB                     ;// [0 g2 0 g0] + [0 255 0 255] + [0 b2 0 b0]
1930c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16     ValGB1, ValG1, ValB, ROR #8             ;// [0 g3 0 g1] + [0 255 0 255] + [0 b3 0 b1]
1940c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         Acc2, ValED0, ValGB0                    ;// Acc2 = 16*Off + (B+G) + 20*(D+E) - 5*(C+F)
1950c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         Acc3, ValED1, ValGB1
1960c1bc742181ded4930842b46e9507372f0b1b963James Dong
1970c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR         Acc3, [pDst, #4]                        ;// Store result & adjust pointer
1980c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR       Acc2, [pDst], dstStep                   ;// Store result & adjust pointer
1990c1bc742181ded4930842b46e9507372f0b1b963James Dong
2000c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUBS        Counter, Counter, #1 << 8               ;// Loop till height is 10
2010c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         pSrc, pSrc, srcStep, LSL #1
2020c1bc742181ded4930842b46e9507372f0b1b963James Dong        BPL         HeightLoop
2030c1bc742181ded4930842b46e9507372f0b1b963James Dong
2040c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       pSrc, ppSrc
2050c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       pDst, ppDst
2060c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADDS        Counter, Counter, #(1 << 8)-4           ;// Loop till width is 12
2070c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         pSrc, pSrc, #4
2080c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         pDst, pDst, #8
2090c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         Counter, Counter, #1<<8
2100c1bc742181ded4930842b46e9507372f0b1b963James Dong        BPL         WidthLoop
2110c1bc742181ded4930842b46e9507372f0b1b963James Dong
2120c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
2130c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Horizontal interpolation using multiplication
2140c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
2150c1bc742181ded4930842b46e9507372f0b1b963James Dong
2160c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pDst, #24
2170c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         srcStep, #24
2180c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRD      pDst, dstStep, ppDstArgs
2190c1bc742181ded4930842b46e9507372f0b1b963James Dong
2200c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         Counter, #4
2210c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR         r0x0014fffb, =0x0014fffb
2220c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR         r0x00140001, =0x00140001
2230c1bc742181ded4930842b46e9507372f0b1b963James Dong
2240c1bc742181ded4930842b46e9507372f0b1b963James DongHeightLoop1
2250c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR       Counter, pCounter
2260c1bc742181ded4930842b46e9507372f0b1b963James Dong
2270c1bc742181ded4930842b46e9507372f0b1b963James Dong
2280c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR         ValCA, [pSrc], #4                   ;// Load  [0 c 0 a]
2290c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR         ValDB, [pSrc], #4                   ;// Load  [0 d 0 b]
2300c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR         ValGE, [pSrc], #4                   ;// Load  [0 g 0 e]
2310c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR         ValHF, [pSrc], #4                   ;// Load  [0 h 0 f]
2320c1bc742181ded4930842b46e9507372f0b1b963James Dong
2330c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Acc0 = smuad ([0 20 0 1], add([0 c 0 a] + [0 d 0 f])) - (5 * (b + e))
2340c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Acc1 = smuad ([0 20 0 1], add([0 e 0 g] + [0 d 0 b])) - (5 * (c + f))
2350c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Acc2 = smuad ([0 1 0 20], add([0 c 0 e] + [0 h 0 f])) - (5 * (d + g))
2360c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Acc3 = smuad ([0 20 0 1], add([0 d 0 f] + [0 i 0 g])) - (5 * (e + h))
2370c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMUAD       Acc0, ValCA, r0x00140001            ;// Acc0  = [0 c 0 a] * [0 20 0 1]
2380c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMUAD       Acc1, ValDB, r0x00140001            ;// Acc1  = [0 c 0 a] * [0 20 0 1]
2390c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMUADX      Acc2, ValGE, r0x0014fffb            ;// Acc2  = [0 g 0 e] * [0 20 0 -5]
2400c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMUAD       Acc3, ValGE, r0x0014fffb            ;// Acc3  = [0 g 0 e] * [0 20 0 -5]
2410c1bc742181ded4930842b46e9507372f0b1b963James Dong
2420c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMLAD       Acc0, ValDB, r0x0014fffb, Acc0      ;// Acc0 += [0 d 0 b] * [0 20 0 -5]
2430c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMLADX      Acc1, ValGE, r0x00140001, Acc1      ;// Acc1 += [0 g 0 e] * [0 20 0 1]
2440c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMLADX      Acc2, ValHF, r0x00140001, Acc2      ;// Acc2 += [0 h 0 f] * [0 20 0 1]
2450c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMLADX      Acc3, ValHF, r0x0014fffb, Acc3      ;// Acc3 += [0 h 0 f] * [0 20 0 -5]
2460c1bc742181ded4930842b46e9507372f0b1b963James Dong
2470c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMLABB      Acc0, ValGE, r0x0014fffb, Acc0      ;// Acc0 += [0 g 0 e] * [0 0 0 -5]
2480c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMLATB      Acc1, ValCA, r0x0014fffb, Acc1      ;// Acc1 += [0 d 0 b] * [0 0 0 -5]
2490c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMLATB      Acc2, ValCA, r0x00140001, Acc2      ;// Acc2 += [0 c 0 a] * [0 0 0 1]
2500c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMLATB      Acc3, ValDB, r0x00140001, Acc3      ;// Acc3 += [0 c 0 a] * [0 0 0 1]
2510c1bc742181ded4930842b46e9507372f0b1b963James Dong
2520c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRH        ValCA, [pSrc], #8                   ;// 8 = srcStep - 16
2530c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMLABB      Acc0, ValHF, r0x00140001, Acc0      ;// Acc0 += [0 h 0 f] * [0 0 0 1]
2540c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMLABB      Acc1, ValHF, r0x0014fffb, Acc1      ;// Acc1 += [0 h 0 f] * [0 0 0 -5]
2550c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMLATB      Acc2, ValDB, r0x0014fffb, Acc2      ;// Acc2 += [0 d 0 b] * [0 0 0 -5]
2560c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMLABB      Acc3, ValCA, r0x00140001, Acc3      ;// Acc3 += [0 d 0 b] * [0 0 0 1]
2570c1bc742181ded4930842b46e9507372f0b1b963James Dong
2580c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR         r0x0001fc00, =0x0001fc00            ;// (0xff * 16 * 32) - 512
2590c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         Acc0, Acc0, r0x0001fc00
2600c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         Acc1, Acc1, r0x0001fc00
2610c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         Acc2, Acc2, r0x0001fc00
2620c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         Acc3, Acc3, r0x0001fc00
2630c1bc742181ded4930842b46e9507372f0b1b963James Dong
2640c1bc742181ded4930842b46e9507372f0b1b963James Dong        USAT        Acc0, #18, Acc0
2650c1bc742181ded4930842b46e9507372f0b1b963James Dong        USAT        Acc1, #18, Acc1
2660c1bc742181ded4930842b46e9507372f0b1b963James Dong        USAT        Acc2, #18, Acc2
2670c1bc742181ded4930842b46e9507372f0b1b963James Dong        USAT        Acc3, #18, Acc3
2680c1bc742181ded4930842b46e9507372f0b1b963James Dong
2690c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         Acc0, Acc0, LSR #10
2700c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         Acc1, Acc1, LSR #10
2710c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         Acc2, Acc2, LSR #10
2720c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         Acc3, Acc3, LSR #10
2730c1bc742181ded4930842b46e9507372f0b1b963James Dong
2740c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       Counter, pCounter
2750c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR         Acc0, Acc0, Acc1, LSL #8
2760c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR         Acc2, Acc2, Acc3, LSL #8
2770c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUBS        Counter, Counter, #1
2780c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR         Acc0, Acc0, Acc2, LSL #16
2790c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR       Acc0, [pDst], dstStep
2800c1bc742181ded4930842b46e9507372f0b1b963James Dong        BGT         HeightLoop1
2810c1bc742181ded4930842b46e9507372f0b1b963James DongEnd
2820c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pDst, pDst, dstStep, LSL #2
2830c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, srcStep, LSL #2
2840c1bc742181ded4930842b46e9507372f0b1b963James Dong
2850c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_END
2860c1bc742181ded4930842b46e9507372f0b1b963James Dong
2870c1bc742181ded4930842b46e9507372f0b1b963James Dong    ENDIF
2880c1bc742181ded4930842b46e9507372f0b1b963James Dong
2890c1bc742181ded4930842b46e9507372f0b1b963James Dong    END
2900c1bc742181ded4930842b46e9507372f0b1b963James Dong
291