10c1bc742181ded4930842b46e9507372f0b1b963James Dong; Copyright (C) 2009 The Android Open Source Project 20c1bc742181ded4930842b46e9507372f0b1b963James Dong; 30c1bc742181ded4930842b46e9507372f0b1b963James Dong; Licensed under the Apache License, Version 2.0 (the "License"); 40c1bc742181ded4930842b46e9507372f0b1b963James Dong; you may not use this file except in compliance with the License. 50c1bc742181ded4930842b46e9507372f0b1b963James Dong; You may obtain a copy of the License at 60c1bc742181ded4930842b46e9507372f0b1b963James Dong; 70c1bc742181ded4930842b46e9507372f0b1b963James Dong; http://www.apache.org/licenses/LICENSE-2.0 80c1bc742181ded4930842b46e9507372f0b1b963James Dong; 90c1bc742181ded4930842b46e9507372f0b1b963James Dong; Unless required by applicable law or agreed to in writing, software 100c1bc742181ded4930842b46e9507372f0b1b963James Dong; distributed under the License is distributed on an "AS IS" BASIS, 110c1bc742181ded4930842b46e9507372f0b1b963James Dong; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 120c1bc742181ded4930842b46e9507372f0b1b963James Dong; See the License for the specific language governing permissions and 130c1bc742181ded4930842b46e9507372f0b1b963James Dong; limitations under the License. 140c1bc742181ded4930842b46e9507372f0b1b963James Dong 150c1bc742181ded4930842b46e9507372f0b1b963James Dong;------------------------------------------------------------------------------- 160c1bc742181ded4930842b46e9507372f0b1b963James Dong;-- 170c1bc742181ded4930842b46e9507372f0b1b963James Dong;-- Abstract : ARMv6 optimized version of h264bsdInterpolateHorHalf function 180c1bc742181ded4930842b46e9507372f0b1b963James Dong;-- 190c1bc742181ded4930842b46e9507372f0b1b963James Dong;------------------------------------------------------------------------------- 200c1bc742181ded4930842b46e9507372f0b1b963James Dong 210c1bc742181ded4930842b46e9507372f0b1b963James Dong 220c1bc742181ded4930842b46e9507372f0b1b963James Dong IF :DEF: H264DEC_WINASM 230c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// We dont use REQUIRE8 and PRESERVE8 for winasm 240c1bc742181ded4930842b46e9507372f0b1b963James Dong ELSE 250c1bc742181ded4930842b46e9507372f0b1b963James Dong REQUIRE8 260c1bc742181ded4930842b46e9507372f0b1b963James Dong PRESERVE8 270c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 280c1bc742181ded4930842b46e9507372f0b1b963James Dong 290c1bc742181ded4930842b46e9507372f0b1b963James Dong AREA |.text|, CODE 300c1bc742181ded4930842b46e9507372f0b1b963James Dong 310c1bc742181ded4930842b46e9507372f0b1b963James Dong;// h264bsdInterpolateHorHalf register allocation 320c1bc742181ded4930842b46e9507372f0b1b963James Dong 330c1bc742181ded4930842b46e9507372f0b1b963James Dongref RN 0 340c1bc742181ded4930842b46e9507372f0b1b963James Dong 350c1bc742181ded4930842b46e9507372f0b1b963James Dongmb RN 1 360c1bc742181ded4930842b46e9507372f0b1b963James Dongbuff RN 1 370c1bc742181ded4930842b46e9507372f0b1b963James Dong 380c1bc742181ded4930842b46e9507372f0b1b963James Dongcount RN 2 390c1bc742181ded4930842b46e9507372f0b1b963James Dongx0 RN 2 400c1bc742181ded4930842b46e9507372f0b1b963James Dong 410c1bc742181ded4930842b46e9507372f0b1b963James Dongy0 RN 3 420c1bc742181ded4930842b46e9507372f0b1b963James Dongx_2_0 RN 3 430c1bc742181ded4930842b46e9507372f0b1b963James Dong 440c1bc742181ded4930842b46e9507372f0b1b963James Dongwidth RN 4 450c1bc742181ded4930842b46e9507372f0b1b963James Dongx_3_1 RN 4 460c1bc742181ded4930842b46e9507372f0b1b963James Dong 470c1bc742181ded4930842b46e9507372f0b1b963James Dongheight RN 5 480c1bc742181ded4930842b46e9507372f0b1b963James Dongx_6_4 RN 5 490c1bc742181ded4930842b46e9507372f0b1b963James Dong 500c1bc742181ded4930842b46e9507372f0b1b963James DongpartW RN 6 510c1bc742181ded4930842b46e9507372f0b1b963James Dongx_7_5 RN 6 520c1bc742181ded4930842b46e9507372f0b1b963James Dong 530c1bc742181ded4930842b46e9507372f0b1b963James DongpartH RN 7 540c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp1 RN 7 550c1bc742181ded4930842b46e9507372f0b1b963James Dong 560c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp2 RN 8 570c1bc742181ded4930842b46e9507372f0b1b963James Dong 580c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp3 RN 9 590c1bc742181ded4930842b46e9507372f0b1b963James Dong 600c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp4 RN 10 610c1bc742181ded4930842b46e9507372f0b1b963James Dong 620c1bc742181ded4930842b46e9507372f0b1b963James Dongmult_20_01 RN 11 630c1bc742181ded4930842b46e9507372f0b1b963James Dongmult_20_m5 RN 12 640c1bc742181ded4930842b46e9507372f0b1b963James Dong 650c1bc742181ded4930842b46e9507372f0b1b963James Dongplus16 RN 14 660c1bc742181ded4930842b46e9507372f0b1b963James Dong 670c1bc742181ded4930842b46e9507372f0b1b963James Dong 680c1bc742181ded4930842b46e9507372f0b1b963James Dong;// function exports and imports 690c1bc742181ded4930842b46e9507372f0b1b963James Dong 700c1bc742181ded4930842b46e9507372f0b1b963James Dong IMPORT h264bsdFillBlock 710c1bc742181ded4930842b46e9507372f0b1b963James Dong 720c1bc742181ded4930842b46e9507372f0b1b963James Dong EXPORT h264bsdInterpolateHorHalf 730c1bc742181ded4930842b46e9507372f0b1b963James Dong 740c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Horizontal filter approach 750c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 760c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Basic idea in horizontal filtering is to adjust coefficients 770c1bc742181ded4930842b46e9507372f0b1b963James Dong;// like below. Calculation is done with 16-bit maths. 780c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 790c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Reg x_2_0 x_3_1 x_6_4 x_7_5 x_2_0 800c1bc742181ded4930842b46e9507372f0b1b963James Dong;// [ 2 0 ] [ 3 1 ] [ 6 4 ] [ 7 5 ] [ 10 8 ] ... 810c1bc742181ded4930842b46e9507372f0b1b963James Dong;// y_0 = 20 1 20 -5 -5 1 820c1bc742181ded4930842b46e9507372f0b1b963James Dong;// y_1 = -5 20 1 1 20 -5 830c1bc742181ded4930842b46e9507372f0b1b963James Dong;// y_2 = 1 -5 -5 20 1 20 840c1bc742181ded4930842b46e9507372f0b1b963James Dong;// y_3 = 1 20 -5 -5 20 1 850c1bc742181ded4930842b46e9507372f0b1b963James Dong 860c1bc742181ded4930842b46e9507372f0b1b963James Dong 870c1bc742181ded4930842b46e9507372f0b1b963James Dongh264bsdInterpolateHorHalf 880c1bc742181ded4930842b46e9507372f0b1b963James Dong STMFD sp!, {r0-r11, lr} 890c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB sp, sp, #0x1e4 900c1bc742181ded4930842b46e9507372f0b1b963James Dong 910c1bc742181ded4930842b46e9507372f0b1b963James Dong CMP x0, #0 920c1bc742181ded4930842b46e9507372f0b1b963James Dong BLT do_fill ;// (x0 < 0) 930c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR partW, [sp,#0x220] ;// partWidth 940c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tmp4, x0, partW ;// (x0+partWidth) 950c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tmp4, tmp4, #5 ;// (y0+partW+5) 960c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR width, [sp,#0x218] ;// width 970c1bc742181ded4930842b46e9507372f0b1b963James Dong CMP tmp4, width 980c1bc742181ded4930842b46e9507372f0b1b963James Dong BHI do_fill ;// (x0+partW)>width 990c1bc742181ded4930842b46e9507372f0b1b963James Dong 1000c1bc742181ded4930842b46e9507372f0b1b963James Dong CMP y0, #0 1010c1bc742181ded4930842b46e9507372f0b1b963James Dong BLT do_fill ;// (y0 < 0) 1020c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR partH, [sp,#0x224] ;// partHeight 1030c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tmp2, y0, partH ;// (y0+partHeight) 1040c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR height, [sp,#0x21c] ;// height 1050c1bc742181ded4930842b46e9507372f0b1b963James Dong CMP tmp2, height 1060c1bc742181ded4930842b46e9507372f0b1b963James Dong BLS skip_fill ;// no overfill needed 1070c1bc742181ded4930842b46e9507372f0b1b963James Dong 1080c1bc742181ded4930842b46e9507372f0b1b963James Dong 1090c1bc742181ded4930842b46e9507372f0b1b963James Dongdo_fill 1100c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR partH, [sp,#0x224] ;// partHeight 1110c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR height, [sp,#0x21c] ;// height 1120c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR partW, [sp,#0x220] ;// partWidth 1130c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tmp4, partW, #5 ;// tmp4 = partW + 5; 1140c1bc742181ded4930842b46e9507372f0b1b963James Dong STMIB sp, {height, tmp4} ;// sp+4 = height, sp+8 = partWidth+5 1150c1bc742181ded4930842b46e9507372f0b1b963James Dong STR partH, [sp,#0xc] ;// sp+c = partHeight 1160c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tmp4, [sp,#0x10] ;// sp+10 = partWidth+5 1170c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR width, [sp,#0x218] ;// width 1180c1bc742181ded4930842b46e9507372f0b1b963James Dong STR width, [sp,#0] ;// sp+0 = width 1190c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD buff, sp, #0x28 ;// buff = p1[21*21/4+1] 1200c1bc742181ded4930842b46e9507372f0b1b963James Dong BL h264bsdFillBlock 1210c1bc742181ded4930842b46e9507372f0b1b963James Dong 1220c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV x0, #0 1230c1bc742181ded4930842b46e9507372f0b1b963James Dong STR x0,[sp,#0x1ec] ;// x0 = 0 1240c1bc742181ded4930842b46e9507372f0b1b963James Dong STR x0,[sp,#0x1f0] ;// y0 = 0 1250c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD ref,sp,#0x28 ;// ref = p1 1260c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tmp4, [sp,#0x218] ;// width = partWidth+5 1270c1bc742181ded4930842b46e9507372f0b1b963James Dong 1280c1bc742181ded4930842b46e9507372f0b1b963James Dong 1290c1bc742181ded4930842b46e9507372f0b1b963James Dongskip_fill 1300c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR x0 ,[sp,#0x1ec] ;// x0 1310c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR y0 ,[sp,#0x1f0] ;// y0 1320c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR width, [sp,#0x218] ;// width 1330c1bc742181ded4930842b46e9507372f0b1b963James Dong MLA tmp2, width, y0, x0 ;// y0*width+x0 1340c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD ref, ref, tmp2 ;// ref += y0*width+x0 1350c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD ref, ref, #8 ;// ref = ref+8 1360c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR mb, [sp, #0x1e8] ;// mb 1370c1bc742181ded4930842b46e9507372f0b1b963James Dong 1380c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// pack values to count register 1390c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// [31:28] loop_x (partWidth-1) 1400c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// [27:24] loop_y (partHeight-1) 1410c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// [23:20] partWidth-1 1420c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// [19:16] partHeight-1 1430c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// [15:00] width 1440c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV count, width 1450c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB partW, partW, #1; 1460c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB partH, partH, #1; 1470c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tmp2, partH, partW, LSL #4 1480c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD count, count, tmp2, LSL #16 1490c1bc742181ded4930842b46e9507372f0b1b963James Dong 1500c1bc742181ded4930842b46e9507372f0b1b963James Dong 1510c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR mult_20_01, = 0x00140001 1520c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR mult_20_m5, = 0x0014FFFB 1530c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV plus16, #16 1540c1bc742181ded4930842b46e9507372f0b1b963James Dong AND tmp1, count, #0x000F0000 ;// partHeight-1 1550c1bc742181ded4930842b46e9507372f0b1b963James Dong AND tmp3, count, #0x00F00000 ;// partWidth-1 1560c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD count, count, tmp1, LSL #8 1570c1bc742181ded4930842b46e9507372f0b1b963James Dongloop_y 1580c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR x_3_1, [ref, #-8] 1590c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD count, count, tmp3, LSL #8 1600c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR x_7_5, [ref, #-4] 1610c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 x_2_0, x_3_1 1620c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 x_3_1, x_3_1, ROR #8 1630c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 x_6_4, x_7_5 1640c1bc742181ded4930842b46e9507372f0b1b963James Dong 1650c1bc742181ded4930842b46e9507372f0b1b963James Dongloop_x 1660c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 x_7_5, x_7_5, ROR #8 1670c1bc742181ded4930842b46e9507372f0b1b963James Dong 1680c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAD tmp1, x_2_0, mult_20_01, plus16 1690c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATB tmp3, x_2_0, mult_20_01, plus16 1700c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATB tmp2, x_2_0, mult_20_m5, plus16 1710c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATB tmp4, x_3_1, mult_20_01, plus16 1720c1bc742181ded4930842b46e9507372f0b1b963James Dong 1730c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAD tmp1, x_3_1, mult_20_m5, tmp1 1740c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATB tmp3, x_3_1, mult_20_m5, tmp3 1750c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAD tmp2, x_3_1, mult_20_01, tmp2 1760c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR x_3_1, [ref], #4 1770c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAD tmp4, x_6_4, mult_20_m5, tmp4 1780c1bc742181ded4930842b46e9507372f0b1b963James Dong 1790c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB tmp1, x_6_4, mult_20_m5, tmp1 1800c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLADX tmp3, x_6_4, mult_20_m5, tmp3 1810c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLADX tmp2, x_6_4, mult_20_01, tmp2 1820c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLADX tmp4, x_7_5, mult_20_m5, tmp4 1830c1bc742181ded4930842b46e9507372f0b1b963James Dong 1840c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB tmp1, x_7_5, mult_20_01, tmp1 1850c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 x_2_0, x_3_1 1860c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB tmp2, x_7_5, mult_20_m5, tmp2 1870c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLADX tmp3, x_7_5, mult_20_01, tmp3 1880c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB tmp4, x_2_0, mult_20_01, tmp4 1890c1bc742181ded4930842b46e9507372f0b1b963James Dong 1900c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV tmp2, tmp2, ASR #5 1910c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV tmp1, tmp1, ASR #5 1920c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT tmp2, tmp2, tmp4, LSL #(16-5) 1930c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT tmp1, tmp1, tmp3, LSL #(16-5) 1940c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 tmp2, #8, tmp2 1950c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 tmp1, #8, tmp1 1960c1bc742181ded4930842b46e9507372f0b1b963James Dong 1970c1bc742181ded4930842b46e9507372f0b1b963James Dong SUBS count, count, #4<<28 1980c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR tmp1, tmp1, tmp2, LSL #8 1990c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tmp1, [mb], #4 2000c1bc742181ded4930842b46e9507372f0b1b963James Dong BCC next_y 2010c1bc742181ded4930842b46e9507372f0b1b963James Dong 2020c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 x_3_1, x_3_1, ROR #8 2030c1bc742181ded4930842b46e9507372f0b1b963James Dong 2040c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAD tmp1, x_6_4, mult_20_01, plus16 2050c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATB tmp3, x_6_4, mult_20_01, plus16 2060c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATB tmp2, x_6_4, mult_20_m5, plus16 2070c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATB tmp4, x_7_5, mult_20_01, plus16 2080c1bc742181ded4930842b46e9507372f0b1b963James Dong 2090c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAD tmp1, x_7_5, mult_20_m5, tmp1 2100c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATB tmp3, x_7_5, mult_20_m5, tmp3 2110c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAD tmp2, x_7_5, mult_20_01, tmp2 2120c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR x_7_5, [ref], #4 2130c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAD tmp4, x_2_0, mult_20_m5, tmp4 2140c1bc742181ded4930842b46e9507372f0b1b963James Dong 2150c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB tmp1, x_2_0, mult_20_m5, tmp1 2160c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLADX tmp3, x_2_0, mult_20_m5, tmp3 2170c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLADX tmp2, x_2_0, mult_20_01, tmp2 2180c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLADX tmp4, x_3_1, mult_20_m5, tmp4 2190c1bc742181ded4930842b46e9507372f0b1b963James Dong 2200c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB tmp1, x_3_1, mult_20_01, tmp1 2210c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 x_6_4, x_7_5 2220c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB tmp2, x_3_1, mult_20_m5, tmp2 2230c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLADX tmp3, x_3_1, mult_20_01, tmp3 2240c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB tmp4, x_6_4, mult_20_01, tmp4 2250c1bc742181ded4930842b46e9507372f0b1b963James Dong 2260c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV tmp2, tmp2, ASR #5 2270c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV tmp1, tmp1, ASR #5 2280c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT tmp2, tmp2, tmp4, LSL #(16-5) 2290c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT tmp1, tmp1, tmp3, LSL #(16-5) 2300c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 tmp2, #8, tmp2 2310c1bc742181ded4930842b46e9507372f0b1b963James Dong USAT16 tmp1, #8, tmp1 2320c1bc742181ded4930842b46e9507372f0b1b963James Dong 2330c1bc742181ded4930842b46e9507372f0b1b963James Dong SUBS count, count, #4<<28 2340c1bc742181ded4930842b46e9507372f0b1b963James Dong ORR tmp1, tmp1, tmp2, LSL #8 2350c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tmp1, [mb], #4 2360c1bc742181ded4930842b46e9507372f0b1b963James Dong BCS loop_x 2370c1bc742181ded4930842b46e9507372f0b1b963James Dong 2380c1bc742181ded4930842b46e9507372f0b1b963James Dongnext_y 2390c1bc742181ded4930842b46e9507372f0b1b963James Dong AND tmp3, count, #0x00F00000 ;// partWidth-1 2400c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB ref, count, mult_20_01, ref ;// +width 2410c1bc742181ded4930842b46e9507372f0b1b963James Dong ADDS mb, mb, #16 ;// +16, Carry=0 2420c1bc742181ded4930842b46e9507372f0b1b963James Dong SBC mb, mb, tmp3, LSR #20 ;// -(partWidth-1)-1 2430c1bc742181ded4930842b46e9507372f0b1b963James Dong SBC ref, ref, tmp3, LSR #20 ;// -(partWidth-1)-1 2440c1bc742181ded4930842b46e9507372f0b1b963James Dong ADDS count, count, #(1<<28)-(1<<24) 2450c1bc742181ded4930842b46e9507372f0b1b963James Dong BGE loop_y 2460c1bc742181ded4930842b46e9507372f0b1b963James Dong 2470c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD sp,sp,#0x1f4 2480c1bc742181ded4930842b46e9507372f0b1b963James Dong LDMFD sp!, {r4-r11, pc} 2490c1bc742181ded4930842b46e9507372f0b1b963James Dong 2500c1bc742181ded4930842b46e9507372f0b1b963James Dong END 2510c1bc742181ded4930842b46e9507372f0b1b963James Dong 252