10c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Copyright (C) 2007-2008 ARM Limited
378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Licensed under the Apache License, Version 2.0 (the "License");
578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// you may not use this file except in compliance with the License.
678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// You may obtain a copy of the License at
778e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
878e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//      http://www.apache.org/licenses/LICENSE-2.0
978e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
1078e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Unless required by applicable law or agreed to in writing, software
1178e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// distributed under the License is distributed on an "AS IS" BASIS,
1278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// See the License for the specific language governing permissions and
1478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// limitations under the License.
1578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
1678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
170c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
180c1bc742181ded4930842b46e9507372f0b1b963James Dong;// File Name:  armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s
190c1bc742181ded4930842b46e9507372f0b1b963James Dong;// OpenMAX DL: v1.0.2
200c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Revision:   9641
210c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Date:       Thursday, February 7, 2008
220c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
230c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
240c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
250c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
260c1bc742181ded4930842b46e9507372f0b1b963James Dong
270c1bc742181ded4930842b46e9507372f0b1b963James Dong        INCLUDE omxtypes_s.h
280c1bc742181ded4930842b46e9507372f0b1b963James Dong        INCLUDE armCOMM_s.h
290c1bc742181ded4930842b46e9507372f0b1b963James Dong
300c1bc742181ded4930842b46e9507372f0b1b963James Dong        EXPORT armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
310c1bc742181ded4930842b46e9507372f0b1b963James Dong
320c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_VARIANTS ARM1136JS
330c1bc742181ded4930842b46e9507372f0b1b963James Dong
340c1bc742181ded4930842b46e9507372f0b1b963James Dong
350c1bc742181ded4930842b46e9507372f0b1b963James Dong
360c1bc742181ded4930842b46e9507372f0b1b963James Dong    IF ARM1136JS
370c1bc742181ded4930842b46e9507372f0b1b963James Dong
380c1bc742181ded4930842b46e9507372f0b1b963James Dong
390c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ALLOC8 ppDstArgs, 8
400c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ALLOC8 pTempResult1, 8
410c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ALLOC8 pTempResult2, 8
420c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ALLOC4 ppSrc, 4
430c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ALLOC4 ppDst, 4
440c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ALLOC4 pDstStep, 4
450c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ALLOC4 pSrcStep, 4
460c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ALLOC4 pCounter, 4
470c1bc742181ded4930842b46e9507372f0b1b963James Dong
480c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Function header
490c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Function:
500c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//     armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
510c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
520c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Implements diagonal interpolation for a block of size 4x4. Input and output should
530c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// be aligned.
540c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
550c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Registers used as input for this function
560c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// r0,r1,r2,r3, r8 where r0,r2  input pointer and r1,r3 step size, r8 intermediate-buf pointer
570c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
580c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Registers preserved for top level function
590c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// r0,r1,r2,r3,r4,r5,r6,r14
600c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
610c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Registers modified by the function
620c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// r7,r8,r9,r10,r11,r12
630c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
640c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Output registers
650c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// None. Function will preserve r0-r3
660c1bc742181ded4930842b46e9507372f0b1b963James Dong
670c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_START armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe, r6
680c1bc742181ded4930842b46e9507372f0b1b963James Dong
690c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare input registers
700c1bc742181ded4930842b46e9507372f0b1b963James DongpSrc            RN 0
710c1bc742181ded4930842b46e9507372f0b1b963James DongsrcStep         RN 1
720c1bc742181ded4930842b46e9507372f0b1b963James DongpDst            RN 2
730c1bc742181ded4930842b46e9507372f0b1b963James DongdstStep         RN 3
740c1bc742181ded4930842b46e9507372f0b1b963James Dong
750c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare inner loop registers
760c1bc742181ded4930842b46e9507372f0b1b963James DongAcc0            RN 4
770c1bc742181ded4930842b46e9507372f0b1b963James DongAcc1            RN 5
780c1bc742181ded4930842b46e9507372f0b1b963James DongAcc2            RN 6
790c1bc742181ded4930842b46e9507372f0b1b963James DongAcc3            RN 7
800c1bc742181ded4930842b46e9507372f0b1b963James Dong
810c1bc742181ded4930842b46e9507372f0b1b963James DongValA            RN 4
820c1bc742181ded4930842b46e9507372f0b1b963James DongValB            RN 5
830c1bc742181ded4930842b46e9507372f0b1b963James DongValC            RN 6
840c1bc742181ded4930842b46e9507372f0b1b963James DongValD            RN 7
850c1bc742181ded4930842b46e9507372f0b1b963James DongValE            RN 8
860c1bc742181ded4930842b46e9507372f0b1b963James DongValF            RN 9
870c1bc742181ded4930842b46e9507372f0b1b963James DongValG            RN 12
880c1bc742181ded4930842b46e9507372f0b1b963James DongValH            RN 14
890c1bc742181ded4930842b46e9507372f0b1b963James DongValI            RN 1
900c1bc742181ded4930842b46e9507372f0b1b963James Dong
910c1bc742181ded4930842b46e9507372f0b1b963James DongTemp1           RN 3
920c1bc742181ded4930842b46e9507372f0b1b963James DongTemp2           RN 1
930c1bc742181ded4930842b46e9507372f0b1b963James DongTemp3           RN 12
940c1bc742181ded4930842b46e9507372f0b1b963James DongTemp4           RN 7
950c1bc742181ded4930842b46e9507372f0b1b963James DongTemp5           RN 5
960c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x0fe00fe0     RN 3                                    ;// [0 (16*255 - 16) 0 (16*255 - 16)]
970c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x00ff00ff     RN 10                                   ;// [0 255 0 255] where 255 is offset
980c1bc742181ded4930842b46e9507372f0b1b963James DongCounter         RN 11
990c1bc742181ded4930842b46e9507372f0b1b963James DongpInterBuf       RN 8
1000c1bc742181ded4930842b46e9507372f0b1b963James Dong
1010c1bc742181ded4930842b46e9507372f0b1b963James DongValCA           RN 8
1020c1bc742181ded4930842b46e9507372f0b1b963James DongValDB           RN 9
1030c1bc742181ded4930842b46e9507372f0b1b963James DongValGE           RN 10
1040c1bc742181ded4930842b46e9507372f0b1b963James DongValHF           RN 11
1050c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x00140001     RN 12
1060c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x0014fffb     RN 14
1070c1bc742181ded4930842b46e9507372f0b1b963James Dong
1080c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x0001fc00     RN 11
1090c1bc742181ded4930842b46e9507372f0b1b963James Dong
1100c1bc742181ded4930842b46e9507372f0b1b963James DongAccx            RN 8
1110c1bc742181ded4930842b46e9507372f0b1b963James DongAccy            RN 9
1120c1bc742181ded4930842b46e9507372f0b1b963James DongTemp6           RN 14
1130c1bc742181ded4930842b46e9507372f0b1b963James Dong
1140c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD      pDst, dstStep, ppDstArgs
1150c1bc742181ded4930842b46e9507372f0b1b963James Dong
1160c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         pDst, pInterBuf
1170c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         dstStep, #16
1180c1bc742181ded4930842b46e9507372f0b1b963James Dong
1190c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Set up counter of format, [0]  [0]  [1 (height)]  [8 (width)]
1200c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         Counter, #4
1210c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR       dstStep, pDstStep
1220c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR       srcStep, pSrcStep
1230c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR         r0x00ff00ff, =0x00ff00ff               ;// [0 255 0 255] 255 is offset to avoid negative results
1240c1bc742181ded4930842b46e9507372f0b1b963James Dong
1250c1bc742181ded4930842b46e9507372f0b1b963James DongHeightLoop
1260c1bc742181ded4930842b46e9507372f0b1b963James DongNextTwoRowsLoop
1270c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR     ValD, [pSrc, srcStep]                   ;// Load row 1 [d1 c1 b1 a1]
1280c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR     ValA, [pSrc], #4                        ;// Load row 0 [d0 c0 b0 a0]
1290c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR     ValH, [pSrc, srcStep]                   ;// Load  [h1 g1 f1 e1]
1300c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR     ValE, [pSrc], #4                        ;// Load  [h0 g0 f0 e0]
1310c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB    Temp2, [pSrc, srcStep]                  ;// Load row 1 [l1 k1 j1 i1]
1320c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB    Temp1, [pSrc], #-8                      ;// Load row 0 [l0 k0 j0 i0]
1330c1bc742181ded4930842b46e9507372f0b1b963James Dong
1340c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHBT   ValB, ValA, ValD, LSL #16               ;// [b1 a1 b0 a0]
1350c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHTB   ValD, ValD, ValA, ASR #16               ;// [d1 c1 d0 c0]
1360c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16 ValA, r0x00ff00ff, ValB                 ;// [00 a1 00 a0] + [0 255 0 255]
1370c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16 ValC, r0x00ff00ff, ValD                 ;// [00 c1 00 c0] + [0 255 0 255]
1380c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHBT   ValI, Temp1, Temp2, LSL #16             ;// [00 i1 00 i0]
1390c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHBT   ValF, ValE, ValH, LSL #16               ;// [f1 e1 f0 e0]
1400c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHTB   ValH, ValH, ValE, ASR #16               ;// [h1 g1 h0 g0]
1410c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16 ValE, r0x00ff00ff, ValF                 ;// [00 e1 00 e0] + [0 255 0 255]
1420c1bc742181ded4930842b46e9507372f0b1b963James Dong
1430c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Calculate Acc0
1440c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Acc0 = a - 5*b + 20*c + 20*d - 5*e + f
1450c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16 Temp1, ValC, ValD, ROR #8
1460c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16 Temp3, ValE, ValB, ROR #8
1470c1bc742181ded4930842b46e9507372f0b1b963James Dong        RSB     Temp1, Temp3, Temp1, LSL #2
1480c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16 Acc0, ValA, ValF, ROR #8
1490c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     Temp1, Temp1, Temp1, LSL #2
1500c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     Acc0, Acc0, Temp1
1510c1bc742181ded4930842b46e9507372f0b1b963James Dong
1520c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Calculate Acc1
1530c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Acc1 = b - 5*c + 20*d + 20*e - 5*f + g
1540c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16 Temp1, ValE, ValD, ROR #8
1550c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16 Temp3, ValC, ValF, ROR #8
1560c1bc742181ded4930842b46e9507372f0b1b963James Dong        RSB     Temp1, Temp3, Temp1, LSL #2
1570c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16 ValG, r0x00ff00ff, ValH                 ;// [00 g1 00 g0] + [0 255 0 255]
1580c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     Temp1, Temp1, Temp1, LSL #2
1590c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16 Acc1, ValG, ValB, ROR #8
1600c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     Acc1, Acc1, Temp1
1610c1bc742181ded4930842b46e9507372f0b1b963James Dong
1620c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16 Acc2, ValC, ValH, ROR #8
1630c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     ValI, r0x00ff00ff, ValI                 ;// [00 i1 00 i0] + [0 255 0 255]
1640c1bc742181ded4930842b46e9507372f0b1b963James Dong
1650c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Calculate Acc2
1660c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Acc2 = c - 5*d + 20*e + 20*f - 5*g + h
1670c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16 Temp1, ValG, ValD, ROR #8
1680c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16 Acc3, ValI, ValD, ROR #8
1690c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16 Temp2, ValE, ValF, ROR #8
1700c1bc742181ded4930842b46e9507372f0b1b963James Dong
1710c1bc742181ded4930842b46e9507372f0b1b963James Dong        RSB     Temp1, Temp1, Temp2, LSL #2
1720c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16 Temp2, ValG, ValF, ROR #8
1730c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     Temp1, Temp1, Temp1, LSL #2
1740c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     Acc2, Acc2, Temp1
1750c1bc742181ded4930842b46e9507372f0b1b963James Dong
1760c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Calculate Acc3
1770c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Acc3 = d - 5*e + 20*f + 20*g - 5*h + i
1780c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16 Temp1, ValE, ValH, ROR #8
1790c1bc742181ded4930842b46e9507372f0b1b963James Dong        RSB     Temp1, Temp1, Temp2, LSL #2
1800c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     Temp1, Temp1, Temp1, LSL #2
1810c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     Acc3, Acc3, Temp1
1820c1bc742181ded4930842b46e9507372f0b1b963James Dong
1830c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR   dstStep, pDstStep
1840c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR   srcStep, pSrcStep
1850c1bc742181ded4930842b46e9507372f0b1b963James Dong
1860c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// If Counter is even store Acc0-Acc3 in a temporary buffer
1870c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// If Counter is off store Acc0-Acc3 and previous Acc0-Acc3 in a intermediate buf
1880c1bc742181ded4930842b46e9507372f0b1b963James Dong        ANDS        Temp3, Counter, #1
1890c1bc742181ded4930842b46e9507372f0b1b963James Dong        BEQ         NoProcessing
1900c1bc742181ded4930842b46e9507372f0b1b963James Dong
1910c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Packing previous and current Acc0-Acc3 values
1920c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRD      Accx, Accy, pTempResult1
1930c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHBT       Temp6, Accx, Acc0, LSL #16          ;//[0 a2 0 a0] = [0 a3 0 a2] [0 a1 0 a0]
1940c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHTB       Acc0, Acc0, Accx, ASR #16           ;//[0 a3 0 a1] = [0 a1 0 a0] [0 a3 0 a2]
1950c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR         Acc0, [pDst, dstStep]
1960c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR         Temp6, [pDst], #4
1970c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHBT       Temp6, Accy, Acc1, LSL #16          ;//[0 b2 0 b0] = [0 b3 0 b2] [0 b1 0 b0]
1980c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHTB       Acc1, Acc1, Accy, ASR #16            ;//[0 b3 0 b1] = [0 b1 0 b0] [0 b3 0 b2]
1990c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRD      Accx, Accy, pTempResult2
2000c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR         Acc1, [pDst, dstStep]
2010c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR         Temp6, [pDst], #4
2020c1bc742181ded4930842b46e9507372f0b1b963James Dong
2030c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHBT       Temp6, Accx, Acc2, LSL #16          ;//[0 c2 0 c0] = [0 c3 0 c2] [0 c1 0 c0]
2040c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHTB       Acc2, Acc2, Accx, ASR #16            ;//[0 c3 0 c1] = [0 c1 0 c0] [0 c3 0 c2]
2050c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR         Acc2, [pDst, dstStep]
2060c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR         Temp6, [pDst], #4
2070c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHBT       Temp6, Accy, Acc3, LSL #16          ;//[0 d2 0 d0] = [0 d3 0 d2] [0 d1 0 d0]
2080c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHTB       Acc3, Acc3, Accy, ASR #16            ;//[0 d3 0 d1] = [0 d1 0 d0] [0 d3 0 d2]
2090c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR         Acc3, [pDst, dstStep]
2100c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR         Temp6, [pDst], #-12
2110c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         pDst, pDst, dstStep, LSL #1
2120c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           AfterStore
2130c1bc742181ded4930842b46e9507372f0b1b963James Dong
2140c1bc742181ded4930842b46e9507372f0b1b963James DongNoProcessing
2150c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD      Acc0, Acc1, pTempResult1
2160c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD      Acc2, Acc3, pTempResult2
2170c1bc742181ded4930842b46e9507372f0b1b963James DongAfterStore
2180c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUBS        Counter, Counter, #1                ;// Loop till height is 10
2190c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         pSrc, pSrc, srcStep, LSL #1
2200c1bc742181ded4930842b46e9507372f0b1b963James Dong        BPL         HeightLoop
2210c1bc742181ded4930842b46e9507372f0b1b963James Dong
2220c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR         Acc0, [pDst], #4                    ;//[0 a1 0 a0]
2230c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR         Acc1, [pDst], #4
2240c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR         Acc2, [pDst], #4
2250c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR         Acc3, [pDst], #-12
2260c1bc742181ded4930842b46e9507372f0b1b963James Dong
2270c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
2280c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Horizontal interpolation using multiplication
2290c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
2300c1bc742181ded4930842b46e9507372f0b1b963James Dong
2310c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pDst, dstStep, LSL #2
2320c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         srcStep, #16
2330c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRD      pDst, dstStep, ppDstArgs
2340c1bc742181ded4930842b46e9507372f0b1b963James Dong
2350c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         Counter, #4
2360c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR         r0x0014fffb, =0x0014fffb
2370c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR         r0x00140001, =0x00140001
2380c1bc742181ded4930842b46e9507372f0b1b963James Dong
2390c1bc742181ded4930842b46e9507372f0b1b963James DongHeightLoop1
2400c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR       Counter, pCounter
2410c1bc742181ded4930842b46e9507372f0b1b963James Dong
2420c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       ValCA, [pSrc], srcStep               ;// Load  [0 c 0 a]
2430c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       ValDB, [pSrc], srcStep               ;// Load  [0 d 0 b]
2440c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       ValGE, [pSrc], srcStep               ;// Load  [0 g 0 e]
2450c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       ValHF, [pSrc], srcStep               ;// Load  [0 h 0 f]
2460c1bc742181ded4930842b46e9507372f0b1b963James Dong
2470c1bc742181ded4930842b46e9507372f0b1b963James Dong
2480c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Acc0 = smuad ([0 20 0 1], add([0 c 0 a] + [0 d 0 f])) - (5 * (b + e))
2490c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Acc1 = smuad ([0 20 0 1], add([0 e 0 g] + [0 d 0 b])) - (5 * (c + f))
2500c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Acc2 = smuad ([0 1 0 20], add([0 c 0 e] + [0 h 0 f])) - (5 * (d + g))
2510c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Acc3 = smuad ([0 20 0 1], add([0 d 0 f] + [0 i 0 g])) - (5 * (e + h))
2520c1bc742181ded4930842b46e9507372f0b1b963James Dong
2530c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMUAD       Acc0, ValCA, r0x00140001            ;// Acc0  = [0 c 0 a] * [0 20 0 1]
2540c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMUAD       Acc1, ValDB, r0x00140001            ;// Acc1  = [0 c 0 a] * [0 20 0 1]
2550c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMUADX      Acc2, ValGE, r0x0014fffb            ;// Acc2  = [0 g 0 e] * [0 20 0 -5]
2560c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMUAD       Acc3, ValGE, r0x0014fffb            ;// Acc3  = [0 g 0 e] * [0 20 0 -5]
2570c1bc742181ded4930842b46e9507372f0b1b963James Dong
2580c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMLAD       Acc0, ValDB, r0x0014fffb, Acc0      ;// Acc0 += [0 d 0 b] * [0 20 0 -5]
2590c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMLADX      Acc1, ValGE, r0x00140001, Acc1      ;// Acc1 += [0 g 0 e] * [0 20 0 1]
2600c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMLADX      Acc2, ValHF, r0x00140001, Acc2      ;// Acc2 += [0 h 0 f] * [0 20 0 1]
2610c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMLADX      Acc3, ValHF, r0x0014fffb, Acc3      ;// Acc3 += [0 h 0 f] * [0 20 0 -5]
2620c1bc742181ded4930842b46e9507372f0b1b963James Dong
2630c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMLABB      Acc0, ValGE, r0x0014fffb, Acc0      ;// Acc0 += [0 g 0 e] * [0 0 0 -5]
2640c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMLATB      Acc1, ValCA, r0x0014fffb, Acc1      ;// Acc1 += [0 d 0 b] * [0 0 0 -5]
2650c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMLATB      Acc2, ValCA, r0x00140001, Acc2      ;// Acc2 += [0 c 0 a] * [0 0 0 1]
2660c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMLATB      Acc3, ValDB, r0x00140001, Acc3      ;// Acc3 += [0 c 0 a] * [0 0 0 1]
2670c1bc742181ded4930842b46e9507372f0b1b963James Dong
2680c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRH        ValCA, [pSrc], #4                   ;// 8 = srcStep - 16
2690c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMLABB      Acc0, ValHF, r0x00140001, Acc0      ;// Acc0 += [0 h 0 f] * [0 0 0 1]
2700c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMLABB      Acc1, ValHF, r0x0014fffb, Acc1      ;// Acc1 += [0 h 0 f] * [0 0 0 -5]
2710c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMLATB      Acc2, ValDB, r0x0014fffb, Acc2      ;// Acc2 += [0 d 0 b] * [0 0 0 -5]
2720c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMLABB      Acc3, ValCA, r0x00140001, Acc3      ;// Acc3 += [0 d 0 b] * [0 0 0 1]
2730c1bc742181ded4930842b46e9507372f0b1b963James Dong
2740c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR         r0x0001fc00, =0x0001fc00            ;// (0xff * 16 * 32) - 512
2750c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         Acc0, Acc0, r0x0001fc00
2760c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         Acc1, Acc1, r0x0001fc00
2770c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         Acc2, Acc2, r0x0001fc00
2780c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         Acc3, Acc3, r0x0001fc00
2790c1bc742181ded4930842b46e9507372f0b1b963James Dong
2800c1bc742181ded4930842b46e9507372f0b1b963James Dong        USAT        Acc0, #18, Acc0
2810c1bc742181ded4930842b46e9507372f0b1b963James Dong        USAT        Acc1, #18, Acc1
2820c1bc742181ded4930842b46e9507372f0b1b963James Dong        USAT        Acc2, #18, Acc2
2830c1bc742181ded4930842b46e9507372f0b1b963James Dong        USAT        Acc3, #18, Acc3
2840c1bc742181ded4930842b46e9507372f0b1b963James Dong
2850c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         Acc0, Acc0, LSR #10
2860c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRB      Acc0, [pDst], dstStep
2870c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         Acc1, Acc1, LSR #10
2880c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRB      Acc1, [pDst], dstStep
2890c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         Acc2, Acc2, LSR #10
2900c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRB      Acc2, [pDst], dstStep
2910c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         Acc3, Acc3, LSR #10
2920c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRB      Acc3, [pDst], dstStep
2930c1bc742181ded4930842b46e9507372f0b1b963James Dong
2940c1bc742181ded4930842b46e9507372f0b1b963James Dong
2950c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       Counter, pCounter
2960c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pDst, pDst, dstStep, LSL #2
2970c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, srcStep, LSL #2
2980c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         pDst, pDst, #1
2990c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUBS        Counter, Counter, #1
3000c1bc742181ded4930842b46e9507372f0b1b963James Dong        BGT         HeightLoop1
3010c1bc742181ded4930842b46e9507372f0b1b963James DongEnd
3020c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pDst, pDst, #4
3030c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, #16
3040c1bc742181ded4930842b46e9507372f0b1b963James Dong
3050c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_END
3060c1bc742181ded4930842b46e9507372f0b1b963James Dong
3070c1bc742181ded4930842b46e9507372f0b1b963James Dong    ENDIF
3080c1bc742181ded4930842b46e9507372f0b1b963James Dong
3090c1bc742181ded4930842b46e9507372f0b1b963James Dong    END
3100c1bc742181ded4930842b46e9507372f0b1b963James Dong
311