10c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Copyright (C) 2007-2008 ARM Limited
378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Licensed under the Apache License, Version 2.0 (the "License");
578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// you may not use this file except in compliance with the License.
678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// You may obtain a copy of the License at
778e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
878e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//      http://www.apache.org/licenses/LICENSE-2.0
978e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
1078e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Unless required by applicable law or agreed to in writing, software
1178e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// distributed under the License is distributed on an "AS IS" BASIS,
1278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// See the License for the specific language governing permissions and
1478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// limitations under the License.
1578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
1678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
170c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
180c1bc742181ded4930842b46e9507372f0b1b963James Dong;// File Name:  armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s
190c1bc742181ded4930842b46e9507372f0b1b963James Dong;// OpenMAX DL: v1.0.2
200c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Revision:   9641
210c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Date:       Thursday, February 7, 2008
220c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
230c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
240c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
250c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
260c1bc742181ded4930842b46e9507372f0b1b963James Dong
270c1bc742181ded4930842b46e9507372f0b1b963James Dong        INCLUDE omxtypes_s.h
280c1bc742181ded4930842b46e9507372f0b1b963James Dong        INCLUDE armCOMM_s.h
290c1bc742181ded4930842b46e9507372f0b1b963James Dong
300c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_VARIANTS ARM1136JS
310c1bc742181ded4930842b46e9507372f0b1b963James Dong
320c1bc742181ded4930842b46e9507372f0b1b963James Dong        EXPORT armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
330c1bc742181ded4930842b46e9507372f0b1b963James Dong
340c1bc742181ded4930842b46e9507372f0b1b963James Dong
350c1bc742181ded4930842b46e9507372f0b1b963James Dong
360c1bc742181ded4930842b46e9507372f0b1b963James Dong    IF ARM1136JS
370c1bc742181ded4930842b46e9507372f0b1b963James Dong
380c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Function header
390c1bc742181ded4930842b46e9507372f0b1b963James Dong
400c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Function:
410c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//     armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
420c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
430c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Implements vertical interpolation for a block of size 4x4. Input and output should
440c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// be aligned.
450c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
460c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Registers used as input for this function
470c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// r0,r1,r2,r3 where r0,r2  input pointer and r1,r3 corresponding step size
480c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
490c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Registers preserved for top level function
500c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// r0,r1,r2,r3,r4,r5,r6,r14
510c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
520c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Registers modified by the function
530c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// r7,r8,r9,r10,r11,r12
540c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
550c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Output registers
560c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// None. Function will preserve r0-r3
570c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_START armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe, r6
580c1bc742181ded4930842b46e9507372f0b1b963James Dong
590c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare input registers
600c1bc742181ded4930842b46e9507372f0b1b963James DongpSrc            RN 0
610c1bc742181ded4930842b46e9507372f0b1b963James DongsrcStep         RN 1
620c1bc742181ded4930842b46e9507372f0b1b963James DongpDst            RN 2
630c1bc742181ded4930842b46e9507372f0b1b963James DongdstStep         RN 3
640c1bc742181ded4930842b46e9507372f0b1b963James Dong
650c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare inner loop registers
660c1bc742181ded4930842b46e9507372f0b1b963James DongValA            RN 5
670c1bc742181ded4930842b46e9507372f0b1b963James DongValA0           RN 4
680c1bc742181ded4930842b46e9507372f0b1b963James DongValA1           RN 5
690c1bc742181ded4930842b46e9507372f0b1b963James DongValAF0          RN 4
700c1bc742181ded4930842b46e9507372f0b1b963James DongValAF1          RN 5
710c1bc742181ded4930842b46e9507372f0b1b963James Dong
720c1bc742181ded4930842b46e9507372f0b1b963James DongValB            RN 11
730c1bc742181ded4930842b46e9507372f0b1b963James Dong
740c1bc742181ded4930842b46e9507372f0b1b963James DongValC            RN 5
750c1bc742181ded4930842b46e9507372f0b1b963James DongValC0           RN 4
760c1bc742181ded4930842b46e9507372f0b1b963James DongValC1           RN 5
770c1bc742181ded4930842b46e9507372f0b1b963James DongValCD0          RN 12
780c1bc742181ded4930842b46e9507372f0b1b963James DongValCD1          RN 14
790c1bc742181ded4930842b46e9507372f0b1b963James DongValCF0          RN 4
800c1bc742181ded4930842b46e9507372f0b1b963James DongValCF1          RN 5
810c1bc742181ded4930842b46e9507372f0b1b963James Dong
820c1bc742181ded4930842b46e9507372f0b1b963James DongValD            RN 10
830c1bc742181ded4930842b46e9507372f0b1b963James Dong
840c1bc742181ded4930842b46e9507372f0b1b963James DongValE            RN 7
850c1bc742181ded4930842b46e9507372f0b1b963James DongValE0           RN 6
860c1bc742181ded4930842b46e9507372f0b1b963James DongValE1           RN 7
870c1bc742181ded4930842b46e9507372f0b1b963James DongValEB0          RN 10
880c1bc742181ded4930842b46e9507372f0b1b963James DongValEB1          RN 11
890c1bc742181ded4930842b46e9507372f0b1b963James DongValED0          RN 6
900c1bc742181ded4930842b46e9507372f0b1b963James DongValED1          RN 7
910c1bc742181ded4930842b46e9507372f0b1b963James Dong
920c1bc742181ded4930842b46e9507372f0b1b963James DongValF            RN 10
930c1bc742181ded4930842b46e9507372f0b1b963James Dong
940c1bc742181ded4930842b46e9507372f0b1b963James DongValG            RN 14
950c1bc742181ded4930842b46e9507372f0b1b963James DongValG0           RN 12
960c1bc742181ded4930842b46e9507372f0b1b963James DongValG1           RN 14
970c1bc742181ded4930842b46e9507372f0b1b963James DongValGB0          RN 12
980c1bc742181ded4930842b46e9507372f0b1b963James DongValGB1          RN 14
990c1bc742181ded4930842b46e9507372f0b1b963James Dong
1000c1bc742181ded4930842b46e9507372f0b1b963James DongAcc0            RN 4
1010c1bc742181ded4930842b46e9507372f0b1b963James DongAcc1            RN 5
1020c1bc742181ded4930842b46e9507372f0b1b963James DongAcc2            RN 6
1030c1bc742181ded4930842b46e9507372f0b1b963James DongAcc3            RN 7
1040c1bc742181ded4930842b46e9507372f0b1b963James Dong
1050c1bc742181ded4930842b46e9507372f0b1b963James DongTemp            RN 7
1060c1bc742181ded4930842b46e9507372f0b1b963James DongHeight          RN 3
1070c1bc742181ded4930842b46e9507372f0b1b963James DongStep            RN 6
1080c1bc742181ded4930842b46e9507372f0b1b963James Dong
1090c1bc742181ded4930842b46e9507372f0b1b963James DongCounter         RN 8
1100c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x00ff00ff     RN 9                                        ;// [0 255 0 255] where 255 is offset
1110c1bc742181ded4930842b46e9507372f0b1b963James Dongr0x0fe00fe0     RN 10                                       ;// [0 (16*255 - 16) 0 (16*255 - 16)]
1120c1bc742181ded4930842b46e9507372f0b1b963James Dong
1130c1bc742181ded4930842b46e9507372f0b1b963James Dong
1140c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR         r0x00ff00ff, =0x00ff00ff                ;// [0 255 0 255] 255 is offset to avoid negative results
1150c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV         Counter, #2
1160c1bc742181ded4930842b46e9507372f0b1b963James Dong
1170c1bc742181ded4930842b46e9507372f0b1b963James DongTwoRowsLoop
1180c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       ValC, [pSrc], srcStep                   ;// Load  [c3 c2 c1 c0]
1190c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       ValD, [pSrc], srcStep                   ;// Load  [d3 d2 d1 d0]
1200c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       ValE, [pSrc], srcStep                   ;// Load  [e3 e2 e1 e0]
1210c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         pSrc, pSrc, srcStep, LSL #2
1220c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR         ValB, [pSrc]                            ;// Load  [b3 b2 b1 b0]
1230c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16     ValC0, r0x00ff00ff, ValC                ;// [0 c2 0 c0] + [0 255 0 255]
1240c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16     ValC1, r0x00ff00ff, ValC, ROR #8        ;// [0 c3 0 c1] + [0 255 0 255]
1250c1bc742181ded4930842b46e9507372f0b1b963James Dong
1260c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16     ValE0, r0x00ff00ff, ValE                ;// [0 e2 0 e0] + [0 255 0 255]
1270c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16     ValE1, r0x00ff00ff, ValE, ROR #8        ;// [0 e3 0 e1] + [0 255 0 255]
1280c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16     ValCD0, ValC0, ValD                     ;// [0 c2 0 c0] + [0 255 0 255] + [0 d2 0 d0]
1290c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16     ValCD1, ValC1, ValD, ROR #8             ;// [0 c3 0 c1] + [0 255 0 255] + [0 d3 0 d1]
1300c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16     ValEB0, ValE0, ValB                     ;// [0 e2 0 e0] + [0 255 0 255] + [0 b2 0 b0]
1310c1bc742181ded4930842b46e9507372f0b1b963James Dong        RSB         ValCD0, ValEB0, ValCD0, LSL #2          ;// 4*(Off+C+D) - (Off+B+E)
1320c1bc742181ded4930842b46e9507372f0b1b963James Dong
1330c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR         ValD, [pSrc, srcStep, LSL #1]                       ;// Load  [d3 d2 d1 d0]
1340c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16     ValEB1, ValE1, ValB, ROR #8             ;// [0 e3 0 e1] + [0 255 0 255] + [0 b3 0 b1]
1350c1bc742181ded4930842b46e9507372f0b1b963James Dong        RSB         ValCD1, ValEB1, ValCD1, LSL #2
1360c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// One cycle stall
1370c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16     ValED0, ValE0, ValD                     ;// [0 e2 0 e0] + [0 255 0 255] + [0 d2 0 d0]
1380c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16     ValED1, ValE1, ValD, ROR #8             ;// [0 e3 0 e1] + [0 255 0 255] + [0 d3 0 d1]
1390c1bc742181ded4930842b46e9507372f0b1b963James Dong
1400c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR         ValF, [pSrc, srcStep, LSL #2]           ;// Load  [f3 f2 f1 f0]
1410c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR       ValB, [pSrc], srcStep                   ;// Load  [b3 b2 b1 b0]
1420c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         ValCD0, ValCD0, ValCD0, LSL #2          ;// 5 * [4*(Off+C+D) - (Off+B+E)]
1430c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         ValCD1, ValCD1, ValCD1, LSL #2
1440c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16     ValCF1, ValC1, ValF, ROR #8             ;// [0 c3 0 c1] + [0 255 0 255] + [0 f3 0 f1]
1450c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16     ValCF0, ValC0, ValF                     ;// [0 c2 0 c0] + [0 255 0 255] + [0 f2 0 f0]
1460c1bc742181ded4930842b46e9507372f0b1b963James Dong        RSB         ValED1, ValCF1, ValED1, LSL #2
1470c1bc742181ded4930842b46e9507372f0b1b963James Dong
1480c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB         ValA, pSrc, srcStep, LSL #1
1490c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR         ValA, [ValA]                            ;// Load  [a3 a2 a1 a0]
1500c1bc742181ded4930842b46e9507372f0b1b963James Dong        RSB         ValED0, ValCF0, ValED0, LSL #2          ;// 4*(Off+E+D) - (Off+C+F)
1510c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         ValED1, ValED1, ValED1, LSL #2
1520c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         ValED0, ValED0, ValED0, LSL #2          ;// 5 * [4*(Off+E+D) - (Off+C+F)]
1530c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16     ValA0, r0x00ff00ff, ValA                ;// [0 a2 0 a0] + [0 255 0 255]
1540c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16     ValA1, r0x00ff00ff, ValA, ROR #8        ;// [0 a3 0 a1] + [0 255 0 255]
1550c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16     ValAF0, ValA0, ValF                     ;// [0 a2 0 a0] + [0 255 0 255] + [0 f2 0 f0]
1560c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16     ValAF1, ValA1, ValF, ROR #8             ;// [0 a3 0 a1] + [0 255 0 255] + [0 f3 0 f1]
1570c1bc742181ded4930842b46e9507372f0b1b963James Dong
1580c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR         r0x0fe00fe0, =0x0fe00fe0                ;// [0 255 0 255] 255 is offset to avoid negative results
1590c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         Acc1, ValCD1, ValAF1
1600c1bc742181ded4930842b46e9507372f0b1b963James Dong
1610c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR         ValG, [pSrc, srcStep, LSL #2]           ;// Load  [g3 g2 g1 g0]
1620c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         Acc0, ValCD0, ValAF0                    ;// Acc0 = 16*Off + (A+F) + 20*(C+D) - 5*(B+E)
1630c1bc742181ded4930842b46e9507372f0b1b963James Dong        UQSUB16     Acc1, Acc1, r0x0fe00fe0                 ;// Acc1 -= (16*Off - 16)
1640c1bc742181ded4930842b46e9507372f0b1b963James Dong        UQSUB16     Acc0, Acc0, r0x0fe00fe0
1650c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16     ValG0, r0x00ff00ff, ValG                ;// [0 g2 0 g0] + [0 255 0 255]
1660c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16     ValG1, r0x00ff00ff, ValG, ROR #8        ;// [0 g3 0 g1] + [0 255 0 255]
1670c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16     ValGB0, ValG0, ValB                     ;// [0 g2 0 g0] + [0 255 0 255] + [0 b2 0 b0]
1680c1bc742181ded4930842b46e9507372f0b1b963James Dong        UXTAB16     ValGB1, ValG1, ValB, ROR #8             ;// [0 g3 0 g1] + [0 255 0 255] + [0 b3 0 b1]
1690c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         Acc2, ValED0, ValGB0                    ;// Acc2 = 16*Off + (B+G) + 20*(D+E) - 5*(C+F)
1700c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         Acc3, ValED1, ValGB1
1710c1bc742181ded4930842b46e9507372f0b1b963James Dong        UQSUB16     Acc3, Acc3, r0x0fe00fe0                 ;// Acc3 -= (16*Off - 16)
1720c1bc742181ded4930842b46e9507372f0b1b963James Dong        UQSUB16     Acc2, Acc2, r0x0fe00fe0
1730c1bc742181ded4930842b46e9507372f0b1b963James Dong        USAT16      Acc1, #13, Acc1                         ;// Saturate to 8+5 = 13 bits
1740c1bc742181ded4930842b46e9507372f0b1b963James Dong        USAT16      Acc0, #13, Acc0
1750c1bc742181ded4930842b46e9507372f0b1b963James Dong        USAT16      Acc3, #13, Acc3
1760c1bc742181ded4930842b46e9507372f0b1b963James Dong        USAT16      Acc2, #13, Acc2
1770c1bc742181ded4930842b46e9507372f0b1b963James Dong        AND         Acc1, r0x00ff00ff, Acc1, LSR #5         ;// [0 a3 0 a1]
1780c1bc742181ded4930842b46e9507372f0b1b963James Dong        AND         Acc0, r0x00ff00ff, Acc0, LSR #5         ;// [0 a2 0 a0]
1790c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR         Acc0, Acc0, Acc1, LSL #8                ;// [a3 a2 a1 a0]
1800c1bc742181ded4930842b46e9507372f0b1b963James Dong        AND         Acc3, r0x00ff00ff, Acc3, LSR #5         ;// [0 b3 0 b1]
1810c1bc742181ded4930842b46e9507372f0b1b963James Dong        AND         Acc2, r0x00ff00ff, Acc2, LSR #5         ;// [0 b2 0 b0]
1820c1bc742181ded4930842b46e9507372f0b1b963James Dong
1830c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR       Acc0, [pDst], dstStep                   ;// Store result & adjust pointer
1840c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR         Acc2, Acc2, Acc3, LSL #8                ;// [b3 b2 b1 b0]
1850c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_STR       Acc2, [pDst], dstStep                   ;// Store result & adjust pointer
1860c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         pSrc, pSrc, srcStep, LSL #1
1870c1bc742181ded4930842b46e9507372f0b1b963James Dong
1880c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUBS        Counter, Counter, #1
1890c1bc742181ded4930842b46e9507372f0b1b963James Dong        BGT         TwoRowsLoop
1900c1bc742181ded4930842b46e9507372f0b1b963James DongEnd
1910c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB     pDst, pDst, dstStep, LSL #2
1920c1bc742181ded4930842b46e9507372f0b1b963James Dong        SUB     pSrc, pSrc, srcStep, LSL #2
1930c1bc742181ded4930842b46e9507372f0b1b963James Dong
1940c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_END
1950c1bc742181ded4930842b46e9507372f0b1b963James Dong
1960c1bc742181ded4930842b46e9507372f0b1b963James Dong    ENDIF
1970c1bc742181ded4930842b46e9507372f0b1b963James Dong
1980c1bc742181ded4930842b46e9507372f0b1b963James Dong    END
19978e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar
200