armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s revision 0c1bc742181ded4930842b46e9507372f0b1b963
10c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
20c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
30c1bc742181ded4930842b46e9507372f0b1b963James Dong;// File Name:  armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s
40c1bc742181ded4930842b46e9507372f0b1b963James Dong;// OpenMAX DL: v1.0.2
50c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Revision:   9641
60c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Date:       Thursday, February 7, 2008
70c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
80c1bc742181ded4930842b46e9507372f0b1b963James Dong;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
90c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
100c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
110c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
120c1bc742181ded4930842b46e9507372f0b1b963James Dong
130c1bc742181ded4930842b46e9507372f0b1b963James Dong        INCLUDE omxtypes_s.h
140c1bc742181ded4930842b46e9507372f0b1b963James Dong        INCLUDE armCOMM_s.h
150c1bc742181ded4930842b46e9507372f0b1b963James Dong
160c1bc742181ded4930842b46e9507372f0b1b963James Dong        EXPORT armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
170c1bc742181ded4930842b46e9507372f0b1b963James Dong
180c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_VARIANTS ARM1136JS
190c1bc742181ded4930842b46e9507372f0b1b963James Dong
200c1bc742181ded4930842b46e9507372f0b1b963James Dong
210c1bc742181ded4930842b46e9507372f0b1b963James Dong
220c1bc742181ded4930842b46e9507372f0b1b963James Dong    IF ARM1136JS
230c1bc742181ded4930842b46e9507372f0b1b963James Dong
240c1bc742181ded4930842b46e9507372f0b1b963James Dong
250c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ALLOC8 ppDstArgs, 8
260c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ALLOC8 pTempResult1, 8
270c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ALLOC8 pTempResult2, 8
280c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ALLOC4 ppSrc, 4
290c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ALLOC4 ppDst, 4
300c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ALLOC4 pDstStep, 4
310c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ALLOC4 pSrcStep, 4
320c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ALLOC4 pCounter, 4
330c1bc742181ded4930842b46e9507372f0b1b963James Dong
340c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Function header
350c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Function:
360c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//     armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
370c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
380c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Implements diagonal interpolation for a block of size 4x4. Input and output should
390c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// be aligned.
400c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
410c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Registers used as input for this function
420c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// r0,r1,r2,r3, r8 where r0,r2  input pointer and r1,r3 step size, r8 intermediate-buf pointer
430c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
440c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Registers preserved for top level function
450c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// r0,r1,r2,r3,r4,r5,r6,r14
460c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
470c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Registers modified by the function
480c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// r7,r8,r9,r10,r11,r12
490c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
500c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Output registers
510c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// None. Function will preserve r0-r3
520c1bc742181ded4930842b46e9507372f0b1b963James Dong
530c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_START armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe, r6
540c1bc742181ded4930842b46e9507372f0b1b963James Dong
550c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare input registers
560c1bc742181ded4930842b46e9507372f0b1b963James DongpSrc            RN 0
570c1bc742181ded4930842b46e9507372f0b1b963James DongsrcStep         RN 1
580c1bc742181ded4930842b46e9507372f0b1b963James DongpDst            RN 2
590c1bc742181ded4930842b46e9507372f0b1b963James DongdstStep         RN 3
600c1bc742181ded4930842b46e9507372f0b1b963James Dong
610c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare inner loop registers
620c1bc742181ded4930842b46e9507372f0b1b963James DongAcc0            RN 4
630c1bc742181ded4930842b46e9507372f0b1b963James DongAcc1            RN 5
640c1bc742181ded4930842b46e9507372f0b1b963James DongAcc2            RN 6
650c1bc742181ded4930842b46e9507372f0b1b963James DongAcc3            RN 7
660c1bc742181ded4930842b46e9507372f0b1b963James Dong
670c1bc742181ded4930842b46e9507372f0b1b963James DongValA            RN 4
680c1bc742181ded4930842b46e9507372f0b1b963James DongValB            RN 5
690c1bc742181ded4930842b46e9507372f0b1b963James DongValC            RN 6
700c1bc742181ded4930842b46e9507372f0b1b963James DongValD            RN 7
710c1bc742181ded4930842b46e9507372f0b1b963James DongValE            RN 8
720c1bc742181ded4930842b46e9507372f0b1b963James DongValF            RN 9
730c1bc742181ded4930842b46e9507372f0b1b963James DongValG            RN 12
740c1bc742181ded4930842b46e9507372f0b1b963James DongValH            RN 14
750c1bc742181ded4930842b46e9507372f0b1b963James DongValI            RN 1
760c1bc742181ded4930842b46e9507372f0b1b963James Dong
770c1bc742181ded4930842b46e9507372f0b1b963James DongTemp1           RN 3
780c1bc742181ded4930842b46e9507372f0b1b963James DongTemp2           RN 1
790c1bc742181ded4930842b46e9507372f0b1b963James DongTemp3           RN 12
800c1bc742181ded4930842b46e9507372f0b1b963James DongTemp4           RN 7
810c1bc742181ded4930842b46e9507372f0b1b963James DongTemp5           RN 5
820c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x0fe00fe0     RN 3                                    ;// [0 (16*255 - 16) 0 (16*255 - 16)]
830c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x00ff00ff     RN 10                                   ;// [0 255 0 255] where 255 is offset
840c1bc742181ded4930842b46e9507372f0b1b963James DongCounter         RN 11
850c1bc742181ded4930842b46e9507372f0b1b963James DongpInterBuf       RN 8
860c1bc742181ded4930842b46e9507372f0b1b963James Dong
870c1bc742181ded4930842b46e9507372f0b1b963James DongValCA           RN 8
880c1bc742181ded4930842b46e9507372f0b1b963James DongValDB           RN 9
890c1bc742181ded4930842b46e9507372f0b1b963James DongValGE           RN 10
900c1bc742181ded4930842b46e9507372f0b1b963James DongValHF           RN 11
910c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x00140001     RN 12
920c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x0014fffb     RN 14
930c1bc742181ded4930842b46e9507372f0b1b963James Dong
940c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x0001fc00     RN 11
950c1bc742181ded4930842b46e9507372f0b1b963James Dong
960c1bc742181ded4930842b46e9507372f0b1b963James DongAccx            RN 8
970c1bc742181ded4930842b46e9507372f0b1b963James DongAccy            RN 9
980c1bc742181ded4930842b46e9507372f0b1b963James DongTemp6           RN 14
990c1bc742181ded4930842b46e9507372f0b1b963James Dong
1000c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD      pDst, dstStep, ppDstArgs
1010c1bc742181ded4930842b46e9507372f0b1b963James Dong
1020c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         pDst, pInterBuf
1030c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         dstStep, #16
1040c1bc742181ded4930842b46e9507372f0b1b963James Dong
1050c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Set up counter of format, [0]  [0]  [1 (height)]  [8 (width)]
1060c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         Counter, #4
1070c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR       dstStep, pDstStep
1080c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR       srcStep, pSrcStep
1090c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR         r0x00ff00ff, =0x00ff00ff               ;// [0 255 0 255] 255 is offset to avoid negative results
1100c1bc742181ded4930842b46e9507372f0b1b963James Dong
1110c1bc742181ded4930842b46e9507372f0b1b963James DongHeightLoop
1120c1bc742181ded4930842b46e9507372f0b1b963James DongNextTwoRowsLoop
1130c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR     ValD, [pSrc, srcStep]                   ;// Load row 1 [d1 c1 b1 a1]
1140c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR     ValA, [pSrc], #4                        ;// Load row 0 [d0 c0 b0 a0]
1150c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR     ValH, [pSrc, srcStep]                   ;// Load  [h1 g1 f1 e1]
1160c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR     ValE, [pSrc], #4                        ;// Load  [h0 g0 f0 e0]
1170c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB    Temp2, [pSrc, srcStep]                  ;// Load row 1 [l1 k1 j1 i1]
1180c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB    Temp1, [pSrc], #-8                      ;// Load row 0 [l0 k0 j0 i0]
1190c1bc742181ded4930842b46e9507372f0b1b963James Dong
1200c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHBT   ValB, ValA, ValD, LSL #16               ;// [b1 a1 b0 a0]
1210c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHTB   ValD, ValD, ValA, ASR #16               ;// [d1 c1 d0 c0]
1220c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16 ValA, r0x00ff00ff, ValB                 ;// [00 a1 00 a0] + [0 255 0 255]
1230c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16 ValC, r0x00ff00ff, ValD                 ;// [00 c1 00 c0] + [0 255 0 255]
1240c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHBT   ValI, Temp1, Temp2, LSL #16             ;// [00 i1 00 i0]
1250c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHBT   ValF, ValE, ValH, LSL #16               ;// [f1 e1 f0 e0]
1260c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHTB   ValH, ValH, ValE, ASR #16               ;// [h1 g1 h0 g0]
1270c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16 ValE, r0x00ff00ff, ValF                 ;// [00 e1 00 e0] + [0 255 0 255]
1280c1bc742181ded4930842b46e9507372f0b1b963James Dong
1290c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Calculate Acc0
1300c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Acc0 = a - 5*b + 20*c + 20*d - 5*e + f
1310c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16 Temp1, ValC, ValD, ROR #8
1320c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16 Temp3, ValE, ValB, ROR #8
1330c1bc742181ded4930842b46e9507372f0b1b963James Dong        RSB     Temp1, Temp3, Temp1, LSL #2
1340c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16 Acc0, ValA, ValF, ROR #8
1350c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     Temp1, Temp1, Temp1, LSL #2
1360c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     Acc0, Acc0, Temp1
1370c1bc742181ded4930842b46e9507372f0b1b963James Dong
1380c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Calculate Acc1
1390c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Acc1 = b - 5*c + 20*d + 20*e - 5*f + g
1400c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16 Temp1, ValE, ValD, ROR #8
1410c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16 Temp3, ValC, ValF, ROR #8
1420c1bc742181ded4930842b46e9507372f0b1b963James Dong        RSB     Temp1, Temp3, Temp1, LSL #2
1430c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16 ValG, r0x00ff00ff, ValH                 ;// [00 g1 00 g0] + [0 255 0 255]
1440c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     Temp1, Temp1, Temp1, LSL #2
1450c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16 Acc1, ValG, ValB, ROR #8
1460c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     Acc1, Acc1, Temp1
1470c1bc742181ded4930842b46e9507372f0b1b963James Dong
1480c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16 Acc2, ValC, ValH, ROR #8
1490c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     ValI, r0x00ff00ff, ValI                 ;// [00 i1 00 i0] + [0 255 0 255]
1500c1bc742181ded4930842b46e9507372f0b1b963James Dong
1510c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Calculate Acc2
1520c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Acc2 = c - 5*d + 20*e + 20*f - 5*g + h
1530c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16 Temp1, ValG, ValD, ROR #8
1540c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16 Acc3, ValI, ValD, ROR #8
1550c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16 Temp2, ValE, ValF, ROR #8
1560c1bc742181ded4930842b46e9507372f0b1b963James Dong
1570c1bc742181ded4930842b46e9507372f0b1b963James Dong        RSB     Temp1, Temp1, Temp2, LSL #2
1580c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16 Temp2, ValG, ValF, ROR #8
1590c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     Temp1, Temp1, Temp1, LSL #2
1600c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     Acc2, Acc2, Temp1
1610c1bc742181ded4930842b46e9507372f0b1b963James Dong
1620c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Calculate Acc3
1630c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Acc3 = d - 5*e + 20*f + 20*g - 5*h + i
1640c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16 Temp1, ValE, ValH, ROR #8
1650c1bc742181ded4930842b46e9507372f0b1b963James Dong        RSB     Temp1, Temp1, Temp2, LSL #2
1660c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     Temp1, Temp1, Temp1, LSL #2
1670c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     Acc3, Acc3, Temp1
1680c1bc742181ded4930842b46e9507372f0b1b963James Dong
1690c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR   dstStep, pDstStep
1700c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR   srcStep, pSrcStep
1710c1bc742181ded4930842b46e9507372f0b1b963James Dong
1720c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// If Counter is even store Acc0-Acc3 in a temporary buffer
1730c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// If Counter is off store Acc0-Acc3 and previous Acc0-Acc3 in a intermediate buf
1740c1bc742181ded4930842b46e9507372f0b1b963James Dong        ANDS        Temp3, Counter, #1
1750c1bc742181ded4930842b46e9507372f0b1b963James Dong        BEQ         NoProcessing
1760c1bc742181ded4930842b46e9507372f0b1b963James Dong
1770c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Packing previous and current Acc0-Acc3 values
1780c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRD      Accx, Accy, pTempResult1
1790c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHBT       Temp6, Accx, Acc0, LSL #16          ;//[0 a2 0 a0] = [0 a3 0 a2] [0 a1 0 a0]
1800c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHTB       Acc0, Acc0, Accx, ASR #16           ;//[0 a3 0 a1] = [0 a1 0 a0] [0 a3 0 a2]
1810c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR         Acc0, [pDst, dstStep]
1820c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR         Temp6, [pDst], #4
1830c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHBT       Temp6, Accy, Acc1, LSL #16          ;//[0 b2 0 b0] = [0 b3 0 b2] [0 b1 0 b0]
1840c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHTB       Acc1, Acc1, Accy, ASR #16            ;//[0 b3 0 b1] = [0 b1 0 b0] [0 b3 0 b2]
1850c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRD      Accx, Accy, pTempResult2
1860c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR         Acc1, [pDst, dstStep]
1870c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR         Temp6, [pDst], #4
1880c1bc742181ded4930842b46e9507372f0b1b963James Dong
1890c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHBT       Temp6, Accx, Acc2, LSL #16          ;//[0 c2 0 c0] = [0 c3 0 c2] [0 c1 0 c0]
1900c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHTB       Acc2, Acc2, Accx, ASR #16            ;//[0 c3 0 c1] = [0 c1 0 c0] [0 c3 0 c2]
1910c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR         Acc2, [pDst, dstStep]
1920c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR         Temp6, [pDst], #4
1930c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHBT       Temp6, Accy, Acc3, LSL #16          ;//[0 d2 0 d0] = [0 d3 0 d2] [0 d1 0 d0]
1940c1bc742181ded4930842b46e9507372f0b1b963James Dong        PKHTB       Acc3, Acc3, Accy, ASR #16            ;//[0 d3 0 d1] = [0 d1 0 d0] [0 d3 0 d2]
1950c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR         Acc3, [pDst, dstStep]
1960c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR         Temp6, [pDst], #-12
1970c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         pDst, pDst, dstStep, LSL #1
1980c1bc742181ded4930842b46e9507372f0b1b963James Dong        B           AfterStore
1990c1bc742181ded4930842b46e9507372f0b1b963James Dong
2000c1bc742181ded4930842b46e9507372f0b1b963James DongNoProcessing
2010c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD      Acc0, Acc1, pTempResult1
2020c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRD      Acc2, Acc3, pTempResult2
2030c1bc742181ded4930842b46e9507372f0b1b963James DongAfterStore
2040c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUBS        Counter, Counter, #1                ;// Loop till height is 10
2050c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         pSrc, pSrc, srcStep, LSL #1
2060c1bc742181ded4930842b46e9507372f0b1b963James Dong        BPL         HeightLoop
2070c1bc742181ded4930842b46e9507372f0b1b963James Dong
2080c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR         Acc0, [pDst], #4                    ;//[0 a1 0 a0]
2090c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR         Acc1, [pDst], #4
2100c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR         Acc2, [pDst], #4
2110c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR         Acc3, [pDst], #-12
2120c1bc742181ded4930842b46e9507372f0b1b963James Dong
2130c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
2140c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Horizontal interpolation using multiplication
2150c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
2160c1bc742181ded4930842b46e9507372f0b1b963James Dong
2170c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pDst, dstStep, LSL #2
2180c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         srcStep, #16
2190c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDRD      pDst, dstStep, ppDstArgs
2200c1bc742181ded4930842b46e9507372f0b1b963James Dong
2210c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         Counter, #4
2220c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR         r0x0014fffb, =0x0014fffb
2230c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR         r0x00140001, =0x00140001
2240c1bc742181ded4930842b46e9507372f0b1b963James Dong
2250c1bc742181ded4930842b46e9507372f0b1b963James DongHeightLoop1
2260c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR       Counter, pCounter
2270c1bc742181ded4930842b46e9507372f0b1b963James Dong
2280c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       ValCA, [pSrc], srcStep               ;// Load  [0 c 0 a]
2290c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       ValDB, [pSrc], srcStep               ;// Load  [0 d 0 b]
2300c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       ValGE, [pSrc], srcStep               ;// Load  [0 g 0 e]
2310c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       ValHF, [pSrc], srcStep               ;// Load  [0 h 0 f]
2320c1bc742181ded4930842b46e9507372f0b1b963James Dong
2330c1bc742181ded4930842b46e9507372f0b1b963James Dong
2340c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Acc0 = smuad ([0 20 0 1], add([0 c 0 a] + [0 d 0 f])) - (5 * (b + e))
2350c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Acc1 = smuad ([0 20 0 1], add([0 e 0 g] + [0 d 0 b])) - (5 * (c + f))
2360c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Acc2 = smuad ([0 1 0 20], add([0 c 0 e] + [0 h 0 f])) - (5 * (d + g))
2370c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Acc3 = smuad ([0 20 0 1], add([0 d 0 f] + [0 i 0 g])) - (5 * (e + h))
2380c1bc742181ded4930842b46e9507372f0b1b963James Dong
2390c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMUAD       Acc0, ValCA, r0x00140001            ;// Acc0  = [0 c 0 a] * [0 20 0 1]
2400c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMUAD       Acc1, ValDB, r0x00140001            ;// Acc1  = [0 c 0 a] * [0 20 0 1]
2410c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMUADX      Acc2, ValGE, r0x0014fffb            ;// Acc2  = [0 g 0 e] * [0 20 0 -5]
2420c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMUAD       Acc3, ValGE, r0x0014fffb            ;// Acc3  = [0 g 0 e] * [0 20 0 -5]
2430c1bc742181ded4930842b46e9507372f0b1b963James Dong
2440c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMLAD       Acc0, ValDB, r0x0014fffb, Acc0      ;// Acc0 += [0 d 0 b] * [0 20 0 -5]
2450c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMLADX      Acc1, ValGE, r0x00140001, Acc1      ;// Acc1 += [0 g 0 e] * [0 20 0 1]
2460c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMLADX      Acc2, ValHF, r0x00140001, Acc2      ;// Acc2 += [0 h 0 f] * [0 20 0 1]
2470c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMLADX      Acc3, ValHF, r0x0014fffb, Acc3      ;// Acc3 += [0 h 0 f] * [0 20 0 -5]
2480c1bc742181ded4930842b46e9507372f0b1b963James Dong
2490c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMLABB      Acc0, ValGE, r0x0014fffb, Acc0      ;// Acc0 += [0 g 0 e] * [0 0 0 -5]
2500c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMLATB      Acc1, ValCA, r0x0014fffb, Acc1      ;// Acc1 += [0 d 0 b] * [0 0 0 -5]
2510c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMLATB      Acc2, ValCA, r0x00140001, Acc2      ;// Acc2 += [0 c 0 a] * [0 0 0 1]
2520c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMLATB      Acc3, ValDB, r0x00140001, Acc3      ;// Acc3 += [0 c 0 a] * [0 0 0 1]
2530c1bc742181ded4930842b46e9507372f0b1b963James Dong
2540c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRH        ValCA, [pSrc], #4                   ;// 8 = srcStep - 16
2550c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMLABB      Acc0, ValHF, r0x00140001, Acc0      ;// Acc0 += [0 h 0 f] * [0 0 0 1]
2560c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMLABB      Acc1, ValHF, r0x0014fffb, Acc1      ;// Acc1 += [0 h 0 f] * [0 0 0 -5]
2570c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMLATB      Acc2, ValDB, r0x0014fffb, Acc2      ;// Acc2 += [0 d 0 b] * [0 0 0 -5]
2580c1bc742181ded4930842b46e9507372f0b1b963James Dong        SMLABB      Acc3, ValCA, r0x00140001, Acc3      ;// Acc3 += [0 d 0 b] * [0 0 0 1]
2590c1bc742181ded4930842b46e9507372f0b1b963James Dong
2600c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR         r0x0001fc00, =0x0001fc00            ;// (0xff * 16 * 32) - 512
2610c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         Acc0, Acc0, r0x0001fc00
2620c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         Acc1, Acc1, r0x0001fc00
2630c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         Acc2, Acc2, r0x0001fc00
2640c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         Acc3, Acc3, r0x0001fc00
2650c1bc742181ded4930842b46e9507372f0b1b963James Dong
2660c1bc742181ded4930842b46e9507372f0b1b963James Dong        USAT        Acc0, #18, Acc0
2670c1bc742181ded4930842b46e9507372f0b1b963James Dong        USAT        Acc1, #18, Acc1
2680c1bc742181ded4930842b46e9507372f0b1b963James Dong        USAT        Acc2, #18, Acc2
2690c1bc742181ded4930842b46e9507372f0b1b963James Dong        USAT        Acc3, #18, Acc3
2700c1bc742181ded4930842b46e9507372f0b1b963James Dong
2710c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         Acc0, Acc0, LSR #10
2720c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRB      Acc0, [pDst], dstStep
2730c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         Acc1, Acc1, LSR #10
2740c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRB      Acc1, [pDst], dstStep
2750c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         Acc2, Acc2, LSR #10
2760c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRB      Acc2, [pDst], dstStep
2770c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         Acc3, Acc3, LSR #10
2780c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STRB      Acc3, [pDst], dstStep
2790c1bc742181ded4930842b46e9507372f0b1b963James Dong
2800c1bc742181ded4930842b46e9507372f0b1b963James Dong
2810c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       Counter, pCounter
2820c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pDst, pDst, dstStep, LSL #2
2830c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, srcStep, LSL #2
2840c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         pDst, pDst, #1
2850c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUBS        Counter, Counter, #1
2860c1bc742181ded4930842b46e9507372f0b1b963James Dong        BGT         HeightLoop1
2870c1bc742181ded4930842b46e9507372f0b1b963James DongEnd
2880c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pDst, pDst, #4
2890c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, #16
2900c1bc742181ded4930842b46e9507372f0b1b963James Dong
2910c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_END
2920c1bc742181ded4930842b46e9507372f0b1b963James Dong
2930c1bc742181ded4930842b46e9507372f0b1b963James Dong    ENDIF
2940c1bc742181ded4930842b46e9507372f0b1b963James Dong
2950c1bc742181ded4930842b46e9507372f0b1b963James Dong    END
2960c1bc742181ded4930842b46e9507372f0b1b963James Dong
297