10c1bc742181ded4930842b46e9507372f0b1b963James Dong; Copyright (C) 2009 The Android Open Source Project 20c1bc742181ded4930842b46e9507372f0b1b963James Dong; 30c1bc742181ded4930842b46e9507372f0b1b963James Dong; Licensed under the Apache License, Version 2.0 (the "License"); 40c1bc742181ded4930842b46e9507372f0b1b963James Dong; you may not use this file except in compliance with the License. 50c1bc742181ded4930842b46e9507372f0b1b963James Dong; You may obtain a copy of the License at 60c1bc742181ded4930842b46e9507372f0b1b963James Dong; 70c1bc742181ded4930842b46e9507372f0b1b963James Dong; http://www.apache.org/licenses/LICENSE-2.0 80c1bc742181ded4930842b46e9507372f0b1b963James Dong; 90c1bc742181ded4930842b46e9507372f0b1b963James Dong; Unless required by applicable law or agreed to in writing, software 100c1bc742181ded4930842b46e9507372f0b1b963James Dong; distributed under the License is distributed on an "AS IS" BASIS, 110c1bc742181ded4930842b46e9507372f0b1b963James Dong; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 120c1bc742181ded4930842b46e9507372f0b1b963James Dong; See the License for the specific language governing permissions and 130c1bc742181ded4930842b46e9507372f0b1b963James Dong; limitations under the License. 140c1bc742181ded4930842b46e9507372f0b1b963James Dong 150c1bc742181ded4930842b46e9507372f0b1b963James Dong;------------------------------------------------------------------------------- 160c1bc742181ded4930842b46e9507372f0b1b963James Dong;-- 170c1bc742181ded4930842b46e9507372f0b1b963James Dong;-- Abstract : ARMv6 optimized version of h264bsdInterpolateHorVerQuarter 180c1bc742181ded4930842b46e9507372f0b1b963James Dong;-- function 190c1bc742181ded4930842b46e9507372f0b1b963James Dong;-- 200c1bc742181ded4930842b46e9507372f0b1b963James Dong;------------------------------------------------------------------------------- 210c1bc742181ded4930842b46e9507372f0b1b963James Dong 220c1bc742181ded4930842b46e9507372f0b1b963James Dong 230c1bc742181ded4930842b46e9507372f0b1b963James Dong IF :DEF: H264DEC_WINASM 240c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// We dont use REQUIRE8 and PRESERVE8 for winasm 250c1bc742181ded4930842b46e9507372f0b1b963James Dong ELSE 260c1bc742181ded4930842b46e9507372f0b1b963James Dong REQUIRE8 270c1bc742181ded4930842b46e9507372f0b1b963James Dong PRESERVE8 280c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 290c1bc742181ded4930842b46e9507372f0b1b963James Dong 300c1bc742181ded4930842b46e9507372f0b1b963James Dong AREA |.text|, CODE 310c1bc742181ded4930842b46e9507372f0b1b963James Dong 320c1bc742181ded4930842b46e9507372f0b1b963James Dong;// h264bsdInterpolateHorVerQuarter register allocation 330c1bc742181ded4930842b46e9507372f0b1b963James Dong 340c1bc742181ded4930842b46e9507372f0b1b963James Dongref RN 0 350c1bc742181ded4930842b46e9507372f0b1b963James Dong 360c1bc742181ded4930842b46e9507372f0b1b963James Dongmb RN 1 370c1bc742181ded4930842b46e9507372f0b1b963James Dongbuff RN 1 380c1bc742181ded4930842b46e9507372f0b1b963James Dong 390c1bc742181ded4930842b46e9507372f0b1b963James Dongcount RN 2 400c1bc742181ded4930842b46e9507372f0b1b963James Dongx0 RN 2 410c1bc742181ded4930842b46e9507372f0b1b963James Dong 420c1bc742181ded4930842b46e9507372f0b1b963James Dongy0 RN 3 430c1bc742181ded4930842b46e9507372f0b1b963James Dongx_2_0 RN 3 440c1bc742181ded4930842b46e9507372f0b1b963James Dongres RN 3 450c1bc742181ded4930842b46e9507372f0b1b963James Dong 460c1bc742181ded4930842b46e9507372f0b1b963James Dongx_3_1 RN 4 470c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp1 RN 4 480c1bc742181ded4930842b46e9507372f0b1b963James Dong 490c1bc742181ded4930842b46e9507372f0b1b963James Dongheight RN 5 500c1bc742181ded4930842b46e9507372f0b1b963James Dongx_6_4 RN 5 510c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp2 RN 5 520c1bc742181ded4930842b46e9507372f0b1b963James Dong 530c1bc742181ded4930842b46e9507372f0b1b963James DongpartW RN 6 540c1bc742181ded4930842b46e9507372f0b1b963James Dongx_7_5 RN 6 550c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp3 RN 6 560c1bc742181ded4930842b46e9507372f0b1b963James Dong 570c1bc742181ded4930842b46e9507372f0b1b963James DongpartH RN 7 580c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp4 RN 7 590c1bc742181ded4930842b46e9507372f0b1b963James Dong 600c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp5 RN 8 610c1bc742181ded4930842b46e9507372f0b1b963James Dong 620c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp6 RN 9 630c1bc742181ded4930842b46e9507372f0b1b963James Dong 640c1bc742181ded4930842b46e9507372f0b1b963James Dongtmpa RN 10 650c1bc742181ded4930842b46e9507372f0b1b963James Dong 660c1bc742181ded4930842b46e9507372f0b1b963James Dongmult_20_01 RN 11 670c1bc742181ded4930842b46e9507372f0b1b963James Dongtmpb RN 11 680c1bc742181ded4930842b46e9507372f0b1b963James Dong 690c1bc742181ded4930842b46e9507372f0b1b963James Dongmult_20_m5 RN 12 700c1bc742181ded4930842b46e9507372f0b1b963James Dongwidth RN 12 710c1bc742181ded4930842b46e9507372f0b1b963James Dong 720c1bc742181ded4930842b46e9507372f0b1b963James Dongplus16 RN 14 730c1bc742181ded4930842b46e9507372f0b1b963James Dong 740c1bc742181ded4930842b46e9507372f0b1b963James Dong 750c1bc742181ded4930842b46e9507372f0b1b963James Dong;// function exports and imports 760c1bc742181ded4930842b46e9507372f0b1b963James Dong 770c1bc742181ded4930842b46e9507372f0b1b963James Dong IMPORT h264bsdFillBlock 780c1bc742181ded4930842b46e9507372f0b1b963James Dong 790c1bc742181ded4930842b46e9507372f0b1b963James Dong EXPORT h264bsdInterpolateHorVerQuarter 800c1bc742181ded4930842b46e9507372f0b1b963James Dong 810c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Horizontal filter approach 820c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 830c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Basic idea in horizontal filtering is to adjust coefficients 840c1bc742181ded4930842b46e9507372f0b1b963James Dong;// like below. Calculation is done with 16-bit maths. 850c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 860c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Reg x_2_0 x_3_1 x_6_4 x_7_5 x_2_0 870c1bc742181ded4930842b46e9507372f0b1b963James Dong;// [ 2 0 ] [ 3 1 ] [ 6 4 ] [ 7 5 ] [ 10 8 ] ... 880c1bc742181ded4930842b46e9507372f0b1b963James Dong;// y_0 = 20 1 20 -5 -5 1 890c1bc742181ded4930842b46e9507372f0b1b963James Dong;// y_1 = -5 20 1 1 20 -5 900c1bc742181ded4930842b46e9507372f0b1b963James Dong;// y_2 = 1 -5 -5 20 1 20 910c1bc742181ded4930842b46e9507372f0b1b963James Dong;// y_3 = 1 20 -5 -5 20 1 920c1bc742181ded4930842b46e9507372f0b1b963James Dong 930c1bc742181ded4930842b46e9507372f0b1b963James Dong 940c1bc742181ded4930842b46e9507372f0b1b963James Dongh264bsdInterpolateHorVerQuarter 950c1bc742181ded4930842b46e9507372f0b1b963James Dong STMFD sp!, {r0-r11, lr} 960c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB sp, sp, #0x1e4 970c1bc742181ded4930842b46e9507372f0b1b963James Dong 980c1bc742181ded4930842b46e9507372f0b1b963James Dong CMP x0, #0 990c1bc742181ded4930842b46e9507372f0b1b963James Dong BLT do_fill ;// (x0 < 0) 1000c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR partW, [sp,#0x220] ;// partWidth 1010c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR width, [sp,#0x218] ;// width 1020c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tmpa, x0, partW ;// (x0+partWidth) 1030c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tmpa, tmpa, #5 ;// (x0+partW+5) 1040c1bc742181ded4930842b46e9507372f0b1b963James Dong CMP tmpa, width 1050c1bc742181ded4930842b46e9507372f0b1b963James Dong BHI do_fill ;// (x0+partW)>width 1060c1bc742181ded4930842b46e9507372f0b1b963James Dong 1070c1bc742181ded4930842b46e9507372f0b1b963James Dong CMP y0, #0 1080c1bc742181ded4930842b46e9507372f0b1b963James Dong BLT do_fill ;// (y0 < 0) 1090c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR partH, [sp,#0x224] ;// partHeight 1100c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR height, [sp,#0x21c] ;// height 1110c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tmp5, y0, partH ;// (y0+partHeight) 1120c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tmp5, tmp5, #5 ;// (y0+partH+5) 1130c1bc742181ded4930842b46e9507372f0b1b963James Dong CMP tmp5, height 1140c1bc742181ded4930842b46e9507372f0b1b963James Dong BLS skip_fill ;// no overfill needed 1150c1bc742181ded4930842b46e9507372f0b1b963James Dong 1160c1bc742181ded4930842b46e9507372f0b1b963James Dong 1170c1bc742181ded4930842b46e9507372f0b1b963James Dongdo_fill 1180c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR partH, [sp,#0x224] ;// partHeight 1190c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR partW, [sp,#0x220] ;// partWidth 1200c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR height, [sp,#0x21c] ;// height 1210c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tmp5, partH, #5 ;// tmp5 = partH + 5 1220c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tmpa, partW, #5 ;// tmpa = partW + 5 1230c1bc742181ded4930842b46e9507372f0b1b963James Dong STMIB sp, {height, tmpa} ;// sp+4 = height, sp+8 = partWidth+5 1240c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR width, [sp,#0x218] ;// width 1250c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tmp5, [sp,#0xc] ;// sp+c = partHeight+5 1260c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tmpa, [sp,#0x10] ;// sp+10 = partWidth+5 1270c1bc742181ded4930842b46e9507372f0b1b963James Dong STR width, [sp,#0] ;// sp+0 = width 1280c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD buff, sp, #0x28 ;// buff = p1[21*21/4+1] 1290c1bc742181ded4930842b46e9507372f0b1b963James Dong BL h264bsdFillBlock 1300c1bc742181ded4930842b46e9507372f0b1b963James Dong 1310c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV x0, #0 1320c1bc742181ded4930842b46e9507372f0b1b963James Dong STR x0,[sp,#0x1ec] ;// x0 = 0 1330c1bc742181ded4930842b46e9507372f0b1b963James Dong STR x0,[sp,#0x1f0] ;// y0 = 0 1340c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD ref,sp,#0x28 ;// ref = p1 1350c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tmpa, [sp,#0x218] ;// width = partWidth+5 1360c1bc742181ded4930842b46e9507372f0b1b963James Dong 1370c1bc742181ded4930842b46e9507372f0b1b963James Dong 1380c1bc742181ded4930842b46e9507372f0b1b963James Dongskip_fill 1390c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR x0 ,[sp,#0x1ec] ;// x0 1400c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR y0 ,[sp,#0x1f0] ;// y0 1410c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR width, [sp,#0x218] ;// width 1420c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR tmp6, [sp,#0x228] ;// horVerOffset 1430c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR mb, [sp, #0x1e8] ;// mb 1440c1bc742181ded4930842b46e9507372f0b1b963James Dong MLA tmp5, width, y0, x0 ;// y0*width+x0 1450c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD ref, ref, tmp5 ;// ref += y0*width+x0 1460c1bc742181ded4930842b46e9507372f0b1b963James Dong STR ref, [sp, #0x1e4] ;// store "ref" for vertical filtering 1470c1bc742181ded4930842b46e9507372f0b1b963James Dong AND tmp6, tmp6, #2 ;// calculate ref for horizontal filter 1480c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV tmpa, #2 1490c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tmp6, tmpa, tmp6, LSR #1 1500c1bc742181ded4930842b46e9507372f0b1b963James Dong MLA ref, tmp6, width, ref 1510c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD ref, ref, #8 ;// ref = ref+8 1520c1bc742181ded4930842b46e9507372f0b1b963James Dong 1530c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// pack values to count register 1540c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// [31:28] loop_x (partWidth-1) 1550c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// [27:24] loop_y (partHeight-1) 1560c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// [23:20] partWidth-1 1570c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// [19:16] partHeight-1 1580c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// [15:00] width 1590c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV count, width 1600c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB partW, partW, #1; 1610c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB partH, partH, #1; 1620c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tmp5, partH, partW, LSL #4 1630c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD count, count, tmp5, LSL #16 1640c1bc742181ded4930842b46e9507372f0b1b963James Dong 1650c1bc742181ded4930842b46e9507372f0b1b963James Dong 1660c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR mult_20_01, = 0x00140001 ;// constant multipliers 1670c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR mult_20_m5, = 0x0014FFFB ;// constant multipliers 1680c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV plus16, #16 ;// constant for add 1690c1bc742181ded4930842b46e9507372f0b1b963James Dong AND tmp4, count, #0x000F0000 ;// partHeight-1 1700c1bc742181ded4930842b46e9507372f0b1b963James Dong AND tmp6, count, #0x00F00000 ;// partWidth-1 1710c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD count, count, tmp4, LSL #8 ;// partH-1 to lower part of top byte 1720c1bc742181ded4930842b46e9507372f0b1b963James Dong 1730c1bc742181ded4930842b46e9507372f0b1b963James Dong;// HORIZONTAL PART 1740c1bc742181ded4930842b46e9507372f0b1b963James Dong 1750c1bc742181ded4930842b46e9507372f0b1b963James Dongloop_y_hor 1760c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR x_3_1, [ref, #-8] 1770c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD count, count, tmp6, LSL #8 ;// partW-1 to upper part of top byte 1780c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR x_7_5, [ref, #-4] 1790c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 x_2_0, x_3_1 1800c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 x_3_1, x_3_1, ROR #8 1810c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 x_6_4, x_7_5 1820c1bc742181ded4930842b46e9507372f0b1b963James Dong 1830c1bc742181ded4930842b46e9507372f0b1b963James Dongloop_x_hor 1840c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 x_7_5, x_7_5, ROR #8 1850c1bc742181ded4930842b46e9507372f0b1b963James Dong 1860c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAD tmp4, x_2_0, mult_20_01, plus16 1870c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATB tmp6, x_2_0, mult_20_01, plus16 1880c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATB tmp5, x_2_0, mult_20_m5, plus16 1890c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATB tmpa, x_3_1, mult_20_01, plus16 1900c1bc742181ded4930842b46e9507372f0b1b963James Dong 1910c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAD tmp4, x_3_1, mult_20_m5, tmp4 1920c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATB tmp6, x_3_1, mult_20_m5, tmp6 1930c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAD tmp5, x_3_1, mult_20_01, tmp5 1940c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR x_3_1, [ref], #4 1950c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAD tmpa, x_6_4, mult_20_m5, tmpa 1960c1bc742181ded4930842b46e9507372f0b1b963James Dong 1970c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB tmp4, x_6_4, mult_20_m5, tmp4 1980c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLADX tmp6, x_6_4, mult_20_m5, tmp6 1990c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLADX tmp5, x_6_4, mult_20_01, tmp5 2000c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLADX tmpa, x_7_5, mult_20_m5, tmpa 2010c1bc742181ded4930842b46e9507372f0b1b963James Dong 2020c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB tmp4, x_7_5, mult_20_01, tmp4 2030c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 x_2_0, x_3_1 2040c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB tmp5, x_7_5, mult_20_m5, tmp5 2050c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLADX tmp6, x_7_5, mult_20_01, tmp6 2060c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB tmpa, x_2_0, mult_20_01, tmpa 2070c1bc742181ded4930842b46e9507372f0b1b963James Dong 2080c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV tmp5, tmp5, ASR #5 2090c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV tmp4, tmp4, ASR #5 2100c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT tmp5, tmp5, tmpa, LSL #(16-5) 2110c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT tmp4, tmp4, tmp6, LSL #(16-5) 2120c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 tmp5, #8, tmp5 2130c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 tmp4, #8, tmp4 2140c1bc742181ded4930842b46e9507372f0b1b963James Dong 2150c1bc742181ded4930842b46e9507372f0b1b963James Dong SUBS count, count, #4<<28 2160c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR tmp4, tmp4, tmp5, LSL #8 2170c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tmp4, [mb], #4 2180c1bc742181ded4930842b46e9507372f0b1b963James Dong BCC next_y_hor 2190c1bc742181ded4930842b46e9507372f0b1b963James Dong 2200c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 x_3_1, x_3_1, ROR #8 2210c1bc742181ded4930842b46e9507372f0b1b963James Dong 2220c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAD tmp4, x_6_4, mult_20_01, plus16 2230c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATB tmp6, x_6_4, mult_20_01, plus16 2240c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATB tmp5, x_6_4, mult_20_m5, plus16 2250c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATB tmpa, x_7_5, mult_20_01, plus16 2260c1bc742181ded4930842b46e9507372f0b1b963James Dong 2270c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAD tmp4, x_7_5, mult_20_m5, tmp4 2280c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATB tmp6, x_7_5, mult_20_m5, tmp6 2290c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAD tmp5, x_7_5, mult_20_01, tmp5 2300c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR x_7_5, [ref], #4 2310c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAD tmpa, x_2_0, mult_20_m5, tmpa 2320c1bc742181ded4930842b46e9507372f0b1b963James Dong 2330c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB tmp4, x_2_0, mult_20_m5, tmp4 2340c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLADX tmp6, x_2_0, mult_20_m5, tmp6 2350c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLADX tmp5, x_2_0, mult_20_01, tmp5 2360c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLADX tmpa, x_3_1, mult_20_m5, tmpa 2370c1bc742181ded4930842b46e9507372f0b1b963James Dong 2380c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB tmp4, x_3_1, mult_20_01, tmp4 2390c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 x_6_4, x_7_5 2400c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB tmp5, x_3_1, mult_20_m5, tmp5 2410c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLADX tmp6, x_3_1, mult_20_01, tmp6 2420c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB tmpa, x_6_4, mult_20_01, tmpa 2430c1bc742181ded4930842b46e9507372f0b1b963James Dong 2440c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV tmp5, tmp5, ASR #5 2450c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV tmp4, tmp4, ASR #5 2460c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT tmp5, tmp5, tmpa, LSL #(16-5) 2470c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT tmp4, tmp4, tmp6, LSL #(16-5) 2480c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 tmp5, #8, tmp5 2490c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 tmp4, #8, tmp4 2500c1bc742181ded4930842b46e9507372f0b1b963James Dong 2510c1bc742181ded4930842b46e9507372f0b1b963James Dong SUBS count, count, #4<<28 2520c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR tmp4, tmp4, tmp5, LSL #8 2530c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tmp4, [mb], #4 2540c1bc742181ded4930842b46e9507372f0b1b963James Dong BCS loop_x_hor 2550c1bc742181ded4930842b46e9507372f0b1b963James Dong 2560c1bc742181ded4930842b46e9507372f0b1b963James Dongnext_y_hor 2570c1bc742181ded4930842b46e9507372f0b1b963James Dong AND tmp6, count, #0x00F00000 ;// partWidth-1 2580c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB ref, count, mult_20_01, ref ;// +width 2590c1bc742181ded4930842b46e9507372f0b1b963James Dong ADDS mb, mb, #16 ;// +16, Carry=0 2600c1bc742181ded4930842b46e9507372f0b1b963James Dong SBC mb, mb, tmp6, LSR #20 ;// -(partWidth-1)-1 2610c1bc742181ded4930842b46e9507372f0b1b963James Dong SBC ref, ref, tmp6, LSR #20 ;// -(partWidth-1)-1 2620c1bc742181ded4930842b46e9507372f0b1b963James Dong ADDS count, count, #(1<<28)-(1<<24) ;// decrement counter (partW) 2630c1bc742181ded4930842b46e9507372f0b1b963James Dong BGE loop_y_hor 2640c1bc742181ded4930842b46e9507372f0b1b963James Dong 2650c1bc742181ded4930842b46e9507372f0b1b963James Dong 2660c1bc742181ded4930842b46e9507372f0b1b963James Dong 2670c1bc742181ded4930842b46e9507372f0b1b963James Dong;// VERTICAL PART 2680c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 2690c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Approach to vertical interpolation 2700c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 2710c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Interpolation is done by using 32-bit loads and stores 2720c1bc742181ded4930842b46e9507372f0b1b963James Dong;// and by using 16 bit arithmetic. 4x4 block is processed 2730c1bc742181ded4930842b46e9507372f0b1b963James Dong;// in each round. 2740c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 2750c1bc742181ded4930842b46e9507372f0b1b963James Dong;// |a_11|a_11|a_11|a_11|...|a_1n|a_1n|a_1n|a_1n| 2760c1bc742181ded4930842b46e9507372f0b1b963James Dong;// |b_11|b_11|b_11|b_11|...|b_1n|b_1n|b_1n|b_1n| 2770c1bc742181ded4930842b46e9507372f0b1b963James Dong;// |c_11|c_11|c_11|c_11|...|c_1n|c_1n|c_1n|c_1n| 2780c1bc742181ded4930842b46e9507372f0b1b963James Dong;// |d_11|d_11|d_11|d_11|...|d_1n|d_1n|d_1n|d_1n| 2790c1bc742181ded4930842b46e9507372f0b1b963James Dong;// .. 2800c1bc742181ded4930842b46e9507372f0b1b963James Dong;// .. 2810c1bc742181ded4930842b46e9507372f0b1b963James Dong;// |a_m1|a_m1|a_m1|a_m1|... 2820c1bc742181ded4930842b46e9507372f0b1b963James Dong;// |b_m1|b_m1|b_m1|b_m1|... 2830c1bc742181ded4930842b46e9507372f0b1b963James Dong;// |c_m1|c_m1|c_m1|c_m1|... 2840c1bc742181ded4930842b46e9507372f0b1b963James Dong;// |d_m1|d_m1|d_m1|d_m1|... 2850c1bc742181ded4930842b46e9507372f0b1b963James Dong 2860c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Approach to bilinear interpolation to quarter pel position. 2870c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 4 bytes are processed parallel 2880c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 2890c1bc742181ded4930842b46e9507372f0b1b963James Dong;// algorithm (a+b+1)/2. Rouding upwards +1 can be achieved by 2900c1bc742181ded4930842b46e9507372f0b1b963James Dong;// negating second operand to get one's complement (instead of 2's) 2910c1bc742181ded4930842b46e9507372f0b1b963James Dong;// and using subtraction, EOR is used to correct sign. 2920c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 2930c1bc742181ded4930842b46e9507372f0b1b963James Dong;// MVN b, b 2940c1bc742181ded4930842b46e9507372f0b1b963James Dong;// UHSUB8 a, a, b 2950c1bc742181ded4930842b46e9507372f0b1b963James Dong;// EOR a, a, 0x80808080 2960c1bc742181ded4930842b46e9507372f0b1b963James Dong 2970c1bc742181ded4930842b46e9507372f0b1b963James Dong 2980c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR ref, [sp, #0x1e4] ;// ref 2990c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR tmpa, [sp, #0x228] ;// horVerOffset 3000c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR mb, [sp, #0x1e8] ;// mb 3010c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR width, [sp, #0x218] ;// width 3020c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD ref, ref, #2 ;// calculate correct position 3030c1bc742181ded4930842b46e9507372f0b1b963James Dong AND tmpa, tmpa, #1 3040c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD ref, ref, tmpa 3050c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR plus16, = 0x00100010 ;// +16 to lower and upperf halfwords 3060c1bc742181ded4930842b46e9507372f0b1b963James Dong AND count, count, #0x00FFFFFF ;// partWidth-1 3070c1bc742181ded4930842b46e9507372f0b1b963James Dong 3080c1bc742181ded4930842b46e9507372f0b1b963James Dong AND tmpa, count, #0x000F0000 ;// partHeight-1 3090c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD count, count, tmpa, LSL #8 3100c1bc742181ded4930842b46e9507372f0b1b963James Dong 3110c1bc742181ded4930842b46e9507372f0b1b963James Dongloop_y 3120c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD count, count, tmp6, LSL #8 ;// partWidth-1 3130c1bc742181ded4930842b46e9507372f0b1b963James Dong 3140c1bc742181ded4930842b46e9507372f0b1b963James Dongloop_x 3150c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR tmp1, [ref], width ;// |a4|a3|a2|a1| 3160c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR tmp2, [ref], width ;// |c4|c3|c2|c1| 3170c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR tmp3, [ref], width ;// |g4|g3|g2|g1| 3180c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR tmp4, [ref], width ;// |m4|m3|m2|m1| 3190c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR tmp5, [ref], width ;// |r4|r3|r2|r1| 3200c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR tmp6, [ref], width ;// |t4|t3|t2|t1| 3210c1bc742181ded4930842b46e9507372f0b1b963James Dong 3220c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// first four pixels 3230c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 tmpa, tmp3 ;// |g3|g1| 3240c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 tmpa, tmpa, tmp4 ;// |g3+m3|g1+m1| 3250c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 tmpb, tmp2 ;// |c3|c1| 3260c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M) 3270c1bc742181ded4930842b46e9507372f0b1b963James Dong 3280c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 tmpb, tmpb, tmp5 ;// |c3+r3|c1+r1| 3290c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M) 3300c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 tmpa, tmpa, tmp1 ;// 16+20(G+M)+A 3310c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 tmpa, tmpa, tmp6 ;// 16+20(G+M)+A+T 3320c1bc742181ded4930842b46e9507372f0b1b963James Dong 3330c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R) 3340c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R) 3350c1bc742181ded4930842b46e9507372f0b1b963James Dong 3360c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 tmpb, #13, tmpa ;// saturate 3370c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR res, = 0x00FF00FF 3380c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 tmpa, tmp3, ROR #8 ;// |g4|g2| 3390c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 tmpa, tmpa, tmp4, ROR #8 ;// |g4+m4|g2+m2| 3400c1bc742181ded4930842b46e9507372f0b1b963James Dong AND res, res, tmpb, LSR #5 ;// mask and divide by 32 3410c1bc742181ded4930842b46e9507372f0b1b963James Dong 3420c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M) 3430c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 tmpb, tmp2, ROR #8 ;// |c4|c2| 3440c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M) 3450c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 tmpb, tmpb, tmp5, ROR #8 ;// |c4+r4|c2+r2| 3460c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 tmpa, tmpa, tmp1, ROR #8 ;// 16+20(G+M)+A 3470c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 tmpa, tmpa, tmp6, ROR #8 ;// 16+20(G+M)+A+T 3480c1bc742181ded4930842b46e9507372f0b1b963James Dong 3490c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R) 3500c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R) 3510c1bc742181ded4930842b46e9507372f0b1b963James Dong 3520c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 tmpb, #13, tmpa ;// saturate 3530c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR tmp1, [mb] 3540c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR tmpa, = 0xFF00FF00 3550c1bc742181ded4930842b46e9507372f0b1b963James Dong MVN tmp1, tmp1 3560c1bc742181ded4930842b46e9507372f0b1b963James Dong AND tmpa, tmpa, tmpb, LSL #3 ;// mask and divede by 32 3570c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR res, res, tmpa 3580c1bc742181ded4930842b46e9507372f0b1b963James Dong 3590c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR tmpa, = 0x80808080 3600c1bc742181ded4930842b46e9507372f0b1b963James Dong UHSUB8 res, res, tmp1 ;// bilinear interpolation 3610c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR tmp1, [ref], width ;// load next row 3620c1bc742181ded4930842b46e9507372f0b1b963James Dong EOR res, res, tmpa ;// correct sign 3630c1bc742181ded4930842b46e9507372f0b1b963James Dong 3640c1bc742181ded4930842b46e9507372f0b1b963James Dong STR res, [mb], #16 ;// next row (mb) 3650c1bc742181ded4930842b46e9507372f0b1b963James Dong 3660c1bc742181ded4930842b46e9507372f0b1b963James Dong 3670c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// tmp2 = |a4|a3|a2|a1| 3680c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// tmp3 = |c4|c3|c2|c1| 3690c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// tmp4 = |g4|g3|g2|g1| 3700c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// tmp5 = |m4|m3|m2|m1| 3710c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// tmp6 = |r4|r3|r2|r1| 3720c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// tmp1 = |t4|t3|t2|t1| 3730c1bc742181ded4930842b46e9507372f0b1b963James Dong 3740c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// second four pixels 3750c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 tmpa, tmp4 ;// |g3|g1| 3760c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 tmpa, tmpa, tmp5 ;// |g3+m3|g1+m1| 3770c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 tmpb, tmp3 ;// |c3|c1| 3780c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M) 3790c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 tmpb, tmpb, tmp6 ;// |c3+r3|c1+r1| 3800c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M) 3810c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 tmpa, tmpa, tmp2 ;// 16+20(G+M)+A 3820c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 tmpa, tmpa, tmp1 ;// 16+20(G+M)+A+T 3830c1bc742181ded4930842b46e9507372f0b1b963James Dong 3840c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R) 3850c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R) 3860c1bc742181ded4930842b46e9507372f0b1b963James Dong 3870c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 tmpb, #13, tmpa ;// saturate 3880c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR res, = 0x00FF00FF 3890c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 tmpa, tmp4, ROR #8 ;// |g4|g2| 3900c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 tmpa, tmpa, tmp5, ROR #8 ;// |g4+m4|g2+m2| 3910c1bc742181ded4930842b46e9507372f0b1b963James Dong AND res, res, tmpb, LSR #5 ;// mask and divide by 32 3920c1bc742181ded4930842b46e9507372f0b1b963James Dong 3930c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M) 3940c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 tmpb, tmp3, ROR #8 ;// |c4|c2| 3950c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M) 3960c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 tmpb, tmpb, tmp6, ROR #8 ;// |c4+r4|c2+r2| 3970c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 tmpa, tmpa, tmp2, ROR #8 ;// 16+20(G+M)+A 3980c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 tmpa, tmpa, tmp1, ROR #8 ;// 16+20(G+M)+A+T 3990c1bc742181ded4930842b46e9507372f0b1b963James Dong 4000c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R) 4010c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R) 4020c1bc742181ded4930842b46e9507372f0b1b963James Dong 4030c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 tmpb, #13, tmpa ;// saturate 4040c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR tmp2, [mb] 4050c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR tmpa, = 0xFF00FF00 4060c1bc742181ded4930842b46e9507372f0b1b963James Dong MVN tmp2, tmp2 4070c1bc742181ded4930842b46e9507372f0b1b963James Dong 4080c1bc742181ded4930842b46e9507372f0b1b963James Dong AND tmpa, tmpa, tmpb, LSL #3 ;// mask and divide by 32 4090c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR res, res, tmpa 4100c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR tmpa, = 0x80808080 4110c1bc742181ded4930842b46e9507372f0b1b963James Dong UHSUB8 res, res, tmp2 ;// bilinear interpolation 4120c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR tmp2, [ref], width ;// load next row 4130c1bc742181ded4930842b46e9507372f0b1b963James Dong EOR res, res, tmpa ;// correct sign 4140c1bc742181ded4930842b46e9507372f0b1b963James Dong STR res, [mb], #16 ;// next row 4150c1bc742181ded4930842b46e9507372f0b1b963James Dong 4160c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// tmp3 = |a4|a3|a2|a1| 4170c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// tmp4 = |c4|c3|c2|c1| 4180c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// tmp5 = |g4|g3|g2|g1| 4190c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// tmp6 = |m4|m3|m2|m1| 4200c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// tmp1 = |r4|r3|r2|r1| 4210c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// tmp2 = |t4|t3|t2|t1| 4220c1bc742181ded4930842b46e9507372f0b1b963James Dong 4230c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// third four pixels 4240c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 tmpa, tmp5 ;// |g3|g1| 4250c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 tmpa, tmpa, tmp6 ;// |g3+m3|g1+m1| 4260c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 tmpb, tmp4 ;// |c3|c1| 4270c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M) 4280c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 tmpb, tmpb, tmp1 ;// |c3+r3|c1+r1| 4290c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M) 4300c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 tmpa, tmpa, tmp3 ;// 16+20(G+M)+A 4310c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 tmpa, tmpa, tmp2 ;// 16+20(G+M)+A+T 4320c1bc742181ded4930842b46e9507372f0b1b963James Dong 4330c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R) 4340c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R) 4350c1bc742181ded4930842b46e9507372f0b1b963James Dong 4360c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 tmpb, #13, tmpa ;// saturate 4370c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR res, = 0x00FF00FF 4380c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 tmpa, tmp5, ROR #8 ;// |g4|g2| 4390c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 tmpa, tmpa, tmp6, ROR #8 ;// |g4+m4|g2+m2| 4400c1bc742181ded4930842b46e9507372f0b1b963James Dong AND res, res, tmpb, LSR #5 ;// mask and divide by 32 4410c1bc742181ded4930842b46e9507372f0b1b963James Dong 4420c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M) 4430c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 tmpb, tmp4, ROR #8 ;// |c4|c2| 4440c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M) 4450c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 tmpb, tmpb, tmp1, ROR #8 ;// |c4+r4|c2+r2| 4460c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 tmpa, tmpa, tmp3, ROR #8 ;// 16+20(G+M)+A 4470c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 tmpa, tmpa, tmp2, ROR #8 ;// 16+20(G+M)+A+T 4480c1bc742181ded4930842b46e9507372f0b1b963James Dong 4490c1bc742181ded4930842b46e9507372f0b1b963James Dong 4500c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R) 4510c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R) 4520c1bc742181ded4930842b46e9507372f0b1b963James Dong 4530c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 tmpb, #13, tmpa ;// saturate 4540c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR tmp3, [mb] 4550c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR tmpa, = 0xFF00FF00 4560c1bc742181ded4930842b46e9507372f0b1b963James Dong MVN tmp3, tmp3 4570c1bc742181ded4930842b46e9507372f0b1b963James Dong 4580c1bc742181ded4930842b46e9507372f0b1b963James Dong AND tmpa, tmpa, tmpb, LSL #3 ;// mask and divide by 32 4590c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR res, res, tmpa 4600c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR tmpa, = 0x80808080 4610c1bc742181ded4930842b46e9507372f0b1b963James Dong UHSUB8 res, res, tmp3 ;// bilinear interpolation 4620c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR tmp3, [ref] ;// load next row 4630c1bc742181ded4930842b46e9507372f0b1b963James Dong EOR res, res, tmpa ;// correct sign 4640c1bc742181ded4930842b46e9507372f0b1b963James Dong STR res, [mb], #16 ;// next row 4650c1bc742181ded4930842b46e9507372f0b1b963James Dong 4660c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// tmp4 = |a4|a3|a2|a1| 4670c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// tmp5 = |c4|c3|c2|c1| 4680c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// tmp6 = |g4|g3|g2|g1| 4690c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// tmp1 = |m4|m3|m2|m1| 4700c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// tmp2 = |r4|r3|r2|r1| 4710c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// tmp3 = |t4|t3|t2|t1| 4720c1bc742181ded4930842b46e9507372f0b1b963James Dong 4730c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// fourth four pixels 4740c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 tmpa, tmp6 ;// |g3|g1| 4750c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 tmpa, tmpa, tmp1 ;// |g3+m3|g1+m1| 4760c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 tmpb, tmp5 ;// |c3|c1| 4770c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M) 4780c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 tmpb, tmpb, tmp2 ;// |c3+r3|c1+r1| 4790c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M) 4800c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 tmpa, tmpa, tmp4 ;// 16+20(G+M)+A 4810c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 tmpa, tmpa, tmp3 ;// 16+20(G+M)+A+T 4820c1bc742181ded4930842b46e9507372f0b1b963James Dong 4830c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R) 4840c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R) 4850c1bc742181ded4930842b46e9507372f0b1b963James Dong 4860c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 tmpb, #13, tmpa ;// saturate 4870c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR res, = 0x00FF00FF 4880c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 tmpa, tmp6, ROR #8 ;// |g4|g2| 4890c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 tmpa, tmpa, tmp1, ROR #8 ;// |g4+m4|g2+m2| 4900c1bc742181ded4930842b46e9507372f0b1b963James Dong AND res, res, tmpb, LSR #5 ;// mask and divide by 32 4910c1bc742181ded4930842b46e9507372f0b1b963James Dong 4920c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M) 4930c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 tmpb, tmp5, ROR #8 ;// |c4|c2| 4940c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M) 4950c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 tmpb, tmpb, tmp2, ROR #8 ;// |c4+r4|c2+r2| 4960c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 tmpa, tmpa, tmp4, ROR #8 ;// 16+20(G+M)+A 4970c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTAB16 tmpa, tmpa, tmp3, ROR #8 ;// 16+20(G+M)+A+T 4980c1bc742181ded4930842b46e9507372f0b1b963James Dong 4990c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R) 5000c1bc742181ded4930842b46e9507372f0b1b963James Dong SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R) 5010c1bc742181ded4930842b46e9507372f0b1b963James Dong 5020c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 tmpb, #13, tmpa ;// saturate 5030c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR tmp5, [mb] 5040c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR tmp4, = 0xFF00FF00 5050c1bc742181ded4930842b46e9507372f0b1b963James Dong MVN tmp5, tmp5 5060c1bc742181ded4930842b46e9507372f0b1b963James Dong 5070c1bc742181ded4930842b46e9507372f0b1b963James Dong AND tmpa, tmp4, tmpb, LSL #3 ;// mask and divide by 32 5080c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR res, res, tmpa 5090c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR tmpa, = 0x80808080 5100c1bc742181ded4930842b46e9507372f0b1b963James Dong UHSUB8 res, res, tmp5 ;// bilinear interpolation 5110c1bc742181ded4930842b46e9507372f0b1b963James Dong 5120c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// decrement loop_x counter 5130c1bc742181ded4930842b46e9507372f0b1b963James Dong SUBS count, count, #4<<28 ;// decrement x loop counter 5140c1bc742181ded4930842b46e9507372f0b1b963James Dong 5150c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// calculate "ref" address for next round 5160c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB ref, ref, width, LSL #3 ;// ref -= 8*width; 5170c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD ref, ref, #4 ;// next column (4 pixels) 5180c1bc742181ded4930842b46e9507372f0b1b963James Dong 5190c1bc742181ded4930842b46e9507372f0b1b963James Dong EOR res, res, tmpa ;// correct sign 5200c1bc742181ded4930842b46e9507372f0b1b963James Dong STR res, [mb], #-44 5210c1bc742181ded4930842b46e9507372f0b1b963James Dong 5220c1bc742181ded4930842b46e9507372f0b1b963James Dong BCS loop_x 5230c1bc742181ded4930842b46e9507372f0b1b963James Dong 5240c1bc742181ded4930842b46e9507372f0b1b963James Dong ADDS mb, mb, #64 ;// set Carry=0 5250c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD ref, ref, width, LSL #2 ;// ref += 4*width 5260c1bc742181ded4930842b46e9507372f0b1b963James Dong AND tmp6, count, #0x00F00000 ;// partWidth-1 5270c1bc742181ded4930842b46e9507372f0b1b963James Dong SBC ref, ref, tmp6, LSR #20 ;// -(partWidth-1)-1 5280c1bc742181ded4930842b46e9507372f0b1b963James Dong SBC mb, mb, tmp6, LSR #20 ;// -(partWidth-1)-1 5290c1bc742181ded4930842b46e9507372f0b1b963James Dong 5300c1bc742181ded4930842b46e9507372f0b1b963James Dong ADDS count, count, #0xC << 24 ;// decrement y loop counter 5310c1bc742181ded4930842b46e9507372f0b1b963James Dong BGE loop_y 5320c1bc742181ded4930842b46e9507372f0b1b963James Dong 5330c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD sp, sp, #0x1f4 5340c1bc742181ded4930842b46e9507372f0b1b963James Dong LDMFD sp!, {r4-r11, pc} 5350c1bc742181ded4930842b46e9507372f0b1b963James Dong 5360c1bc742181ded4930842b46e9507372f0b1b963James Dong END 537