10c1bc742181ded4930842b46e9507372f0b1b963James Dong; Copyright (C) 2009 The Android Open Source Project 20c1bc742181ded4930842b46e9507372f0b1b963James Dong; 30c1bc742181ded4930842b46e9507372f0b1b963James Dong; Licensed under the Apache License, Version 2.0 (the "License"); 40c1bc742181ded4930842b46e9507372f0b1b963James Dong; you may not use this file except in compliance with the License. 50c1bc742181ded4930842b46e9507372f0b1b963James Dong; You may obtain a copy of the License at 60c1bc742181ded4930842b46e9507372f0b1b963James Dong; 70c1bc742181ded4930842b46e9507372f0b1b963James Dong; http://www.apache.org/licenses/LICENSE-2.0 80c1bc742181ded4930842b46e9507372f0b1b963James Dong; 90c1bc742181ded4930842b46e9507372f0b1b963James Dong; Unless required by applicable law or agreed to in writing, software 100c1bc742181ded4930842b46e9507372f0b1b963James Dong; distributed under the License is distributed on an "AS IS" BASIS, 110c1bc742181ded4930842b46e9507372f0b1b963James Dong; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 120c1bc742181ded4930842b46e9507372f0b1b963James Dong; See the License for the specific language governing permissions and 130c1bc742181ded4930842b46e9507372f0b1b963James Dong; limitations under the License. 140c1bc742181ded4930842b46e9507372f0b1b963James Dong 150c1bc742181ded4930842b46e9507372f0b1b963James Dong;------------------------------------------------------------------------------- 160c1bc742181ded4930842b46e9507372f0b1b963James Dong;-- 170c1bc742181ded4930842b46e9507372f0b1b963James Dong;-- Abstract : ARMv6 optimized version of h264bsdInterpolateChromaHorVer 180c1bc742181ded4930842b46e9507372f0b1b963James Dong;-- function 190c1bc742181ded4930842b46e9507372f0b1b963James Dong;-- 200c1bc742181ded4930842b46e9507372f0b1b963James Dong;------------------------------------------------------------------------------- 210c1bc742181ded4930842b46e9507372f0b1b963James Dong 220c1bc742181ded4930842b46e9507372f0b1b963James Dong 230c1bc742181ded4930842b46e9507372f0b1b963James Dong IF :DEF: H264DEC_WINASM 240c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// We dont use REQUIRE8 and PRESERVE8 for winasm 250c1bc742181ded4930842b46e9507372f0b1b963James Dong ELSE 260c1bc742181ded4930842b46e9507372f0b1b963James Dong REQUIRE8 270c1bc742181ded4930842b46e9507372f0b1b963James Dong PRESERVE8 280c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 290c1bc742181ded4930842b46e9507372f0b1b963James Dong 300c1bc742181ded4930842b46e9507372f0b1b963James Dong AREA |.text|, CODE 310c1bc742181ded4930842b46e9507372f0b1b963James Dong 320c1bc742181ded4930842b46e9507372f0b1b963James Dong 330c1bc742181ded4930842b46e9507372f0b1b963James Dong;// h264bsdInterpolateChromaHorVer register allocation 340c1bc742181ded4930842b46e9507372f0b1b963James Dong 350c1bc742181ded4930842b46e9507372f0b1b963James Dongref RN 0 360c1bc742181ded4930842b46e9507372f0b1b963James DongptrA RN 0 370c1bc742181ded4930842b46e9507372f0b1b963James Dong 380c1bc742181ded4930842b46e9507372f0b1b963James Dongmb RN 1 390c1bc742181ded4930842b46e9507372f0b1b963James Dongblock RN 1 400c1bc742181ded4930842b46e9507372f0b1b963James Dong 410c1bc742181ded4930842b46e9507372f0b1b963James Dongx0 RN 2 420c1bc742181ded4930842b46e9507372f0b1b963James Dongcount RN 2 430c1bc742181ded4930842b46e9507372f0b1b963James Dong 440c1bc742181ded4930842b46e9507372f0b1b963James Dongy0 RN 3 450c1bc742181ded4930842b46e9507372f0b1b963James DongvalY RN 3 460c1bc742181ded4930842b46e9507372f0b1b963James Dong 470c1bc742181ded4930842b46e9507372f0b1b963James Dongwidth RN 4 480c1bc742181ded4930842b46e9507372f0b1b963James Dong 490c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp4 RN 5 500c1bc742181ded4930842b46e9507372f0b1b963James Dongheight RN 5 510c1bc742181ded4930842b46e9507372f0b1b963James Dong 520c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp1 RN 6 530c1bc742181ded4930842b46e9507372f0b1b963James Dong 540c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp2 RN 7 550c1bc742181ded4930842b46e9507372f0b1b963James Dong 560c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp3 RN 8 570c1bc742181ded4930842b46e9507372f0b1b963James Dong 580c1bc742181ded4930842b46e9507372f0b1b963James DongvalX RN 9 590c1bc742181ded4930842b46e9507372f0b1b963James Dong 600c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp5 RN 10 610c1bc742181ded4930842b46e9507372f0b1b963James DongchrPW RN 10 620c1bc742181ded4930842b46e9507372f0b1b963James Dong 630c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp6 RN 11 640c1bc742181ded4930842b46e9507372f0b1b963James DongchrPH RN 11 650c1bc742181ded4930842b46e9507372f0b1b963James Dong 660c1bc742181ded4930842b46e9507372f0b1b963James DongxFrac RN 12 670c1bc742181ded4930842b46e9507372f0b1b963James Dong 680c1bc742181ded4930842b46e9507372f0b1b963James Dongc32 RN 14 690c1bc742181ded4930842b46e9507372f0b1b963James DongyFrac RN 14 700c1bc742181ded4930842b46e9507372f0b1b963James Dong 710c1bc742181ded4930842b46e9507372f0b1b963James Dong;// function exports and imports 720c1bc742181ded4930842b46e9507372f0b1b963James Dong 730c1bc742181ded4930842b46e9507372f0b1b963James Dong IMPORT h264bsdFillBlock 740c1bc742181ded4930842b46e9507372f0b1b963James Dong 750c1bc742181ded4930842b46e9507372f0b1b963James Dong EXPORT h264bsdInterpolateChromaHorVer 760c1bc742181ded4930842b46e9507372f0b1b963James Dong 770c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Function arguments 780c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 790c1bc742181ded4930842b46e9507372f0b1b963James Dong;// u8 *ref, : 0xc4 800c1bc742181ded4930842b46e9507372f0b1b963James Dong;// u8 *predPartChroma, : 0xc8 810c1bc742181ded4930842b46e9507372f0b1b963James Dong;// i32 x0, : 0xcc 820c1bc742181ded4930842b46e9507372f0b1b963James Dong;// i32 y0, : 0xd0 830c1bc742181ded4930842b46e9507372f0b1b963James Dong;// u32 width, : 0xf8 840c1bc742181ded4930842b46e9507372f0b1b963James Dong;// u32 height, : 0xfc 850c1bc742181ded4930842b46e9507372f0b1b963James Dong;// u32 xFrac, : 0x100 860c1bc742181ded4930842b46e9507372f0b1b963James Dong;// u32 yFrac, : 0x104 870c1bc742181ded4930842b46e9507372f0b1b963James Dong;// u32 chromaPartWidth, : 0x108 880c1bc742181ded4930842b46e9507372f0b1b963James Dong;// u32 chromaPartHeight : 0x10c 890c1bc742181ded4930842b46e9507372f0b1b963James Dong 900c1bc742181ded4930842b46e9507372f0b1b963James Dongh264bsdInterpolateChromaHorVer 910c1bc742181ded4930842b46e9507372f0b1b963James Dong STMFD sp!, {r0-r11,lr} 920c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB sp, sp, #0xc4 930c1bc742181ded4930842b46e9507372f0b1b963James Dong 940c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR chrPW, [sp, #0x108] ;// chromaPartWidth 950c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xFrac, [sp, #0x100] ;// xFrac 960c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR width, [sp, #0xf8] ;// width 970c1bc742181ded4930842b46e9507372f0b1b963James Dong CMP x0, #0 980c1bc742181ded4930842b46e9507372f0b1b963James Dong BLT do_fill 990c1bc742181ded4930842b46e9507372f0b1b963James Dong 1000c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tmp1, x0, chrPW ;// tmp1 = x0+ chromaPartWidth 1010c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tmp1, tmp1, #1 ;// tmp1 = x0+ chromaPartWidth+1 1020c1bc742181ded4930842b46e9507372f0b1b963James Dong CMP tmp1, width ;// x0+chromaPartWidth+1 > width 1030c1bc742181ded4930842b46e9507372f0b1b963James Dong BHI do_fill 1040c1bc742181ded4930842b46e9507372f0b1b963James Dong 1050c1bc742181ded4930842b46e9507372f0b1b963James Dong CMP y0, #0 1060c1bc742181ded4930842b46e9507372f0b1b963James Dong BLT do_fill 1070c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR chrPH, [sp, #0x10c] ;// chromaPartHeight 1080c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR height, [sp, #0xfc] ;// height 1090c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tmp1, y0, chrPH ;// tmp1 = y0 + chromaPartHeight 1100c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tmp1, tmp1, #1 ;// tmp1 = y0 + chromaPartHeight + 1 1110c1bc742181ded4930842b46e9507372f0b1b963James Dong CMP tmp1, height 1120c1bc742181ded4930842b46e9507372f0b1b963James Dong BLS skip_fill 1130c1bc742181ded4930842b46e9507372f0b1b963James Dong 1140c1bc742181ded4930842b46e9507372f0b1b963James Dongdo_fill 1150c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR chrPH, [sp, #0x10c] ;// chromaPartHeight 1160c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR height, [sp, #0xfc] ;// height 1170c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tmp3, chrPW, #1 ;// tmp3 = chromaPartWidth+1 1180c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tmp1, chrPW, #1 ;// tmp1 = chromaPartWidth+1 1190c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tmp2, chrPH, #1 ;// tmp2 = chromaPartHeight+1 1200c1bc742181ded4930842b46e9507372f0b1b963James Dong STMIA sp,{width,height,tmp1,tmp2,tmp3} 1210c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD block, sp, #0x1c ;// block 1220c1bc742181ded4930842b46e9507372f0b1b963James Dong BL h264bsdFillBlock 1230c1bc742181ded4930842b46e9507372f0b1b963James Dong 1240c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR x0, [sp, #0xcc] 1250c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR y0, [sp, #0xd0] 1260c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR ref, [sp, #0xc4] ;// ref 1270c1bc742181ded4930842b46e9507372f0b1b963James Dong STMIA sp,{width,height,tmp1,tmp2,tmp3} 1280c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD block, sp, #0x1c ;// block 1290c1bc742181ded4930842b46e9507372f0b1b963James Dong MLA ref, height, width, ref ;// ref += width * height; 1300c1bc742181ded4930842b46e9507372f0b1b963James Dong MLA block, tmp2, tmp1, block;// block + (chromaPW+1)*(chromaPH+1) 1310c1bc742181ded4930842b46e9507372f0b1b963James Dong BL h264bsdFillBlock 1320c1bc742181ded4930842b46e9507372f0b1b963James Dong 1330c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV x0, #0 ;// x0 = 0 1340c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV y0, #0 ;// y0 = 0 1350c1bc742181ded4930842b46e9507372f0b1b963James Dong STR x0, [sp, #0xcc] 1360c1bc742181ded4930842b46e9507372f0b1b963James Dong STR y0, [sp, #0xd0] 1370c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD ref, sp, #0x1c ;// ref = block 1380c1bc742181ded4930842b46e9507372f0b1b963James Dong STR ref, [sp, #0xc4] ;// ref 1390c1bc742181ded4930842b46e9507372f0b1b963James Dong 1400c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tmp2, [sp, #0xfc] ;// height 1410c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tmp1, [sp, #0xf8] ;// width 1420c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV width, tmp1 1430c1bc742181ded4930842b46e9507372f0b1b963James Dong 1440c1bc742181ded4930842b46e9507372f0b1b963James Dongskip_fill 1450c1bc742181ded4930842b46e9507372f0b1b963James Dong MLA tmp3, y0, width, x0 ;// tmp3 = y0*width+x0 1460c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR yFrac, [sp, #0x104] ;// yFrac 1470c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR xFrac, [sp, #0x100] 1480c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD ptrA, ref, tmp3 ;// ptrA = ref + y0*width+x0 1490c1bc742181ded4930842b46e9507372f0b1b963James Dong RSB valX, xFrac, #8 ;// valX = 8-xFrac 1500c1bc742181ded4930842b46e9507372f0b1b963James Dong RSB valY, yFrac, #8 ;// valY = 8-yFrac 1510c1bc742181ded4930842b46e9507372f0b1b963James Dong 1520c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR mb, [sp, #0xc8] ;// predPartChroma 1530c1bc742181ded4930842b46e9507372f0b1b963James Dong 1540c1bc742181ded4930842b46e9507372f0b1b963James Dong 1550c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// pack values to count register 1560c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// [31:28] loop_x (chromaPartWidth-1) 1570c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// [27:24] loop_y (chromaPartHeight-1) 1580c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// [23:20] chromaPartWidth-1 1590c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// [19:16] chromaPartHeight-1 1600c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// [15:00] nothing 1610c1bc742181ded4930842b46e9507372f0b1b963James Dong 1620c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tmp2, chrPH, #1 ;// chromaPartHeight-1 1630c1bc742181ded4930842b46e9507372f0b1b963James Dong SUB tmp1, chrPW, #1 ;// chromaPartWidth-1 1640c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD count, count, tmp2, LSL #16 ;// chromaPartHeight-1 1650c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD count, count, tmp2, LSL #24 ;// loop_y 1660c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD count, count, tmp1, LSL #20 ;// chromaPartWidth-1 1670c1bc742181ded4930842b46e9507372f0b1b963James Dong AND tmp2, count, #0x00F00000 ;// loop_x 1680c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT valY, valY, yFrac, LSL #16 ;// |yFrac|valY | 1690c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV c32, #32 1700c1bc742181ded4930842b46e9507372f0b1b963James Dong 1710c1bc742181ded4930842b46e9507372f0b1b963James Dong 1720c1bc742181ded4930842b46e9507372f0b1b963James Dong ;/////////////////////////////////////////////////////////////////////////// 1730c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Cb 1740c1bc742181ded4930842b46e9507372f0b1b963James Dong ;/////////////////////////////////////////////////////////////////////////// 1750c1bc742181ded4930842b46e9507372f0b1b963James Dong 1760c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 2x2 pels per iteration 1770c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// bilinear vertical and horizontal interpolation 1780c1bc742181ded4930842b46e9507372f0b1b963James Dong 1790c1bc742181ded4930842b46e9507372f0b1b963James Dongloop1_y 1800c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tmp1, [ptrA] 1810c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tmp3, [ptrA, width] 1820c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tmp5, [ptrA, width, LSL #1] 1830c1bc742181ded4930842b46e9507372f0b1b963James Dong 1840c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT tmp1, tmp1, tmp3, LSL #16 ;// |t3|t1| 1850c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT tmp3, tmp3, tmp5, LSL #16 ;// |t5|t3| 1860c1bc742181ded4930842b46e9507372f0b1b963James Dong 1870c1bc742181ded4930842b46e9507372f0b1b963James Dong SMUAD tmp1, tmp1, valY ;// t1=(t1*valY + t3*yFrac) 1880c1bc742181ded4930842b46e9507372f0b1b963James Dong SMUAD tmp3, tmp3, valY ;// t3=(t3*valY + t5*yFrac) 1890c1bc742181ded4930842b46e9507372f0b1b963James Dong 1900c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD count, count, tmp2, LSL #8 1910c1bc742181ded4930842b46e9507372f0b1b963James Dongloop1_x 1920c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// first 1930c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tmp2, [ptrA, #1]! 1940c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tmp4, [ptrA, width] 1950c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tmp6, [ptrA, width, LSL #1] 1960c1bc742181ded4930842b46e9507372f0b1b963James Dong 1970c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT tmp2, tmp2, tmp4, LSL #16 ;// |t4|t2| 1980c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT tmp4, tmp4, tmp6, LSL #16 ;// |t6|t4| 1990c1bc742181ded4930842b46e9507372f0b1b963James Dong 2000c1bc742181ded4930842b46e9507372f0b1b963James Dong SMUAD tmp2, tmp2, valY ;// t2=(t2*valY + t4*yFrac) 2010c1bc742181ded4930842b46e9507372f0b1b963James Dong MLA tmp5, tmp1, valX, c32 ;// t5=t1*valX+32 2020c1bc742181ded4930842b46e9507372f0b1b963James Dong MLA tmp5, tmp2, xFrac, tmp5 ;// t5=t2*xFrac+t5 2030c1bc742181ded4930842b46e9507372f0b1b963James Dong 2040c1bc742181ded4930842b46e9507372f0b1b963James Dong SMUAD tmp4, tmp4, valY ;// t4=(t4*valY + t6*yFrac) 2050c1bc742181ded4930842b46e9507372f0b1b963James Dong MLA tmp6, tmp3, valX, c32 ;// t3=t3*valX+32 2060c1bc742181ded4930842b46e9507372f0b1b963James Dong MLA tmp6, tmp4, xFrac, tmp6 ;// t6=t4*xFrac+t6 2070c1bc742181ded4930842b46e9507372f0b1b963James Dong 2080c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV tmp6, tmp6, LSR #6 ;// scale down 2090c1bc742181ded4930842b46e9507372f0b1b963James Dong STRB tmp6, [mb, #8] ;// store pixel 2100c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV tmp5, tmp5, LSR #6 ;// scale down 2110c1bc742181ded4930842b46e9507372f0b1b963James Dong STRB tmp5, [mb], #1 ;// store pixel 2120c1bc742181ded4930842b46e9507372f0b1b963James Dong 2130c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// second 2140c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tmp1, [ptrA, #1]! 2150c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tmp3, [ptrA, width] 2160c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tmp5, [ptrA, width, LSL #1] 2170c1bc742181ded4930842b46e9507372f0b1b963James Dong 2180c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT tmp1, tmp1, tmp3, LSL #16 ;// |t3|t1| 2190c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT tmp3, tmp3, tmp5, LSL #16 ;// |t5|t3| 2200c1bc742181ded4930842b46e9507372f0b1b963James Dong 2210c1bc742181ded4930842b46e9507372f0b1b963James Dong SMUAD tmp1, tmp1, valY ;// t1=(t1*valY + t3*yFrac) 2220c1bc742181ded4930842b46e9507372f0b1b963James Dong MLA tmp5, tmp1, xFrac, c32 ;// t1=t1*xFrac+32 2230c1bc742181ded4930842b46e9507372f0b1b963James Dong MLA tmp5, tmp2, valX, tmp5 ;// t5=t2*valX+t5 2240c1bc742181ded4930842b46e9507372f0b1b963James Dong 2250c1bc742181ded4930842b46e9507372f0b1b963James Dong SMUAD tmp3, tmp3, valY ;// t3=(t3*valY + t5*yFrac) 2260c1bc742181ded4930842b46e9507372f0b1b963James Dong MLA tmp6, tmp3, xFrac, c32 ;// t3=t3*xFrac+32 2270c1bc742181ded4930842b46e9507372f0b1b963James Dong MLA tmp6, tmp4, valX, tmp6 ;// t6=t4*valX+t6 2280c1bc742181ded4930842b46e9507372f0b1b963James Dong 2290c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV tmp6, tmp6, LSR #6 ;// scale down 2300c1bc742181ded4930842b46e9507372f0b1b963James Dong STRB tmp6, [mb, #8] ;// store pixel 2310c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV tmp5, tmp5, LSR #6 ;// scale down 2320c1bc742181ded4930842b46e9507372f0b1b963James Dong STRB tmp5, [mb], #1 ;// store pixel 2330c1bc742181ded4930842b46e9507372f0b1b963James Dong 2340c1bc742181ded4930842b46e9507372f0b1b963James Dong SUBS count, count, #2<<28 2350c1bc742181ded4930842b46e9507372f0b1b963James Dong BCS loop1_x 2360c1bc742181ded4930842b46e9507372f0b1b963James Dong 2370c1bc742181ded4930842b46e9507372f0b1b963James Dong AND tmp2, count, #0x00F00000 2380c1bc742181ded4930842b46e9507372f0b1b963James Dong 2390c1bc742181ded4930842b46e9507372f0b1b963James Dong ADDS mb, mb, #16 2400c1bc742181ded4930842b46e9507372f0b1b963James Dong SBC mb, mb, tmp2, LSR #20 2410c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD ptrA, ptrA, width, LSL #1 2420c1bc742181ded4930842b46e9507372f0b1b963James Dong SBC ptrA, ptrA, tmp2, LSR #20 2430c1bc742181ded4930842b46e9507372f0b1b963James Dong 2440c1bc742181ded4930842b46e9507372f0b1b963James Dong ADDS count, count, #0xE << 24 2450c1bc742181ded4930842b46e9507372f0b1b963James Dong BGE loop1_y 2460c1bc742181ded4930842b46e9507372f0b1b963James Dong 2470c1bc742181ded4930842b46e9507372f0b1b963James Dong ;/////////////////////////////////////////////////////////////////////////// 2480c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// Cr 2490c1bc742181ded4930842b46e9507372f0b1b963James Dong ;/////////////////////////////////////////////////////////////////////////// 2500c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR height, [sp,#0xfc] ;// height 2510c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR ref, [sp, #0xc4] ;// ref 2520c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR tmp1, [sp, #0xd0] ;// y0 2530c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR tmp2, [sp, #0xcc] ;// x0 2540c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR mb, [sp, #0xc8] ;// predPartChroma 2550c1bc742181ded4930842b46e9507372f0b1b963James Dong 2560c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD tmp1, height, tmp1 2570c1bc742181ded4930842b46e9507372f0b1b963James Dong MLA tmp3, tmp1, width, tmp2 2580c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD ptrA, ref, tmp3 2590c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD mb, mb, #64 2600c1bc742181ded4930842b46e9507372f0b1b963James Dong 2610c1bc742181ded4930842b46e9507372f0b1b963James Dong AND count, count, #0x00FFFFFF 2620c1bc742181ded4930842b46e9507372f0b1b963James Dong AND tmp1, count, #0x000F0000 2630c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD count, count, tmp1, LSL #8 2640c1bc742181ded4930842b46e9507372f0b1b963James Dong AND tmp2, count, #0x00F00000 2650c1bc742181ded4930842b46e9507372f0b1b963James Dong 2660c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// 2x2 pels per iteration 2670c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// bilinear vertical and horizontal interpolation 2680c1bc742181ded4930842b46e9507372f0b1b963James Dongloop2_y 2690c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tmp1, [ptrA] 2700c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tmp3, [ptrA, width] 2710c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tmp5, [ptrA, width, LSL #1] 2720c1bc742181ded4930842b46e9507372f0b1b963James Dong 2730c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT tmp1, tmp1, tmp3, LSL #16 ;// |t3|t1| 2740c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT tmp3, tmp3, tmp5, LSL #16 ;// |t5|t3| 2750c1bc742181ded4930842b46e9507372f0b1b963James Dong 2760c1bc742181ded4930842b46e9507372f0b1b963James Dong SMUAD tmp1, tmp1, valY ;// t1=(t1*valY + t3*yFrac) 2770c1bc742181ded4930842b46e9507372f0b1b963James Dong SMUAD tmp3, tmp3, valY ;// t3=(t3*valY + t5*yFrac) 2780c1bc742181ded4930842b46e9507372f0b1b963James Dong 2790c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD count, count, tmp2, LSL #8 2800c1bc742181ded4930842b46e9507372f0b1b963James Dongloop2_x 2810c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// first 2820c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tmp2, [ptrA, #1]! 2830c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tmp4, [ptrA, width] 2840c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tmp6, [ptrA, width, LSL #1] 2850c1bc742181ded4930842b46e9507372f0b1b963James Dong 2860c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT tmp2, tmp2, tmp4, LSL #16 ;// |t4|t2| 2870c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT tmp4, tmp4, tmp6, LSL #16 ;// |t6|t4| 2880c1bc742181ded4930842b46e9507372f0b1b963James Dong 2890c1bc742181ded4930842b46e9507372f0b1b963James Dong SMUAD tmp2, tmp2, valY ;// t2=(t2*valY + t4*yFrac) 2900c1bc742181ded4930842b46e9507372f0b1b963James Dong MLA tmp5, tmp1, valX, c32 ;// t5=t1*valX+32 2910c1bc742181ded4930842b46e9507372f0b1b963James Dong MLA tmp5, tmp2, xFrac, tmp5 ;// t5=t2*xFrac+t5 2920c1bc742181ded4930842b46e9507372f0b1b963James Dong 2930c1bc742181ded4930842b46e9507372f0b1b963James Dong SMUAD tmp4, tmp4, valY ;// t4=(t4*valY + t6*yFrac) 2940c1bc742181ded4930842b46e9507372f0b1b963James Dong MLA tmp6, tmp3, valX, c32 ;// t3=t3*valX+32 2950c1bc742181ded4930842b46e9507372f0b1b963James Dong MLA tmp6, tmp4, xFrac, tmp6 ;// t6=t4*xFrac+t6 2960c1bc742181ded4930842b46e9507372f0b1b963James Dong 2970c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV tmp6, tmp6, LSR #6 ;// scale down 2980c1bc742181ded4930842b46e9507372f0b1b963James Dong STRB tmp6, [mb, #8] ;// store pixel 2990c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV tmp5, tmp5, LSR #6 ;// scale down 3000c1bc742181ded4930842b46e9507372f0b1b963James Dong STRB tmp5, [mb], #1 ;// store pixel 3010c1bc742181ded4930842b46e9507372f0b1b963James Dong 3020c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// second 3030c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tmp1, [ptrA, #1]! 3040c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tmp3, [ptrA, width] 3050c1bc742181ded4930842b46e9507372f0b1b963James Dong LDRB tmp5, [ptrA, width, LSL #1] 3060c1bc742181ded4930842b46e9507372f0b1b963James Dong 3070c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT tmp1, tmp1, tmp3, LSL #16 ;// |t3|t1| 3080c1bc742181ded4930842b46e9507372f0b1b963James Dong PKHBT tmp3, tmp3, tmp5, LSL #16 ;// |t5|t3| 3090c1bc742181ded4930842b46e9507372f0b1b963James Dong 3100c1bc742181ded4930842b46e9507372f0b1b963James Dong SMUAD tmp1, tmp1, valY ;// t1=(t1*valY + t3*yFrac) 3110c1bc742181ded4930842b46e9507372f0b1b963James Dong MLA tmp5, tmp1, xFrac, c32 ;// t1=t1*xFrac+32 3120c1bc742181ded4930842b46e9507372f0b1b963James Dong MLA tmp5, tmp2, valX, tmp5 ;// t5=t2*valX+t5 3130c1bc742181ded4930842b46e9507372f0b1b963James Dong 3140c1bc742181ded4930842b46e9507372f0b1b963James Dong SMUAD tmp3, tmp3, valY ;// t3=(t3*valY + t5*yFrac) 3150c1bc742181ded4930842b46e9507372f0b1b963James Dong MLA tmp6, tmp3, xFrac, c32 ;// t3=t3*xFrac+32 3160c1bc742181ded4930842b46e9507372f0b1b963James Dong MLA tmp6, tmp4, valX, tmp6 ;// t6=t4*valX+t6 3170c1bc742181ded4930842b46e9507372f0b1b963James Dong 3180c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV tmp6, tmp6, LSR #6 ;// scale down 3190c1bc742181ded4930842b46e9507372f0b1b963James Dong STRB tmp6, [mb, #8] ;// store pixel 3200c1bc742181ded4930842b46e9507372f0b1b963James Dong MOV tmp5, tmp5, LSR #6 ;// scale down 3210c1bc742181ded4930842b46e9507372f0b1b963James Dong STRB tmp5, [mb], #1 ;// store pixel 3220c1bc742181ded4930842b46e9507372f0b1b963James Dong 3230c1bc742181ded4930842b46e9507372f0b1b963James Dong SUBS count, count, #2<<28 3240c1bc742181ded4930842b46e9507372f0b1b963James Dong BCS loop2_x 3250c1bc742181ded4930842b46e9507372f0b1b963James Dong 3260c1bc742181ded4930842b46e9507372f0b1b963James Dong AND tmp2, count, #0x00F00000 3270c1bc742181ded4930842b46e9507372f0b1b963James Dong 3280c1bc742181ded4930842b46e9507372f0b1b963James Dong ADDS mb, mb, #16 3290c1bc742181ded4930842b46e9507372f0b1b963James Dong SBC mb, mb, tmp2, LSR #20 3300c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD ptrA, ptrA, width, LSL #1 3310c1bc742181ded4930842b46e9507372f0b1b963James Dong SBC ptrA, ptrA, tmp2, LSR #20 3320c1bc742181ded4930842b46e9507372f0b1b963James Dong 3330c1bc742181ded4930842b46e9507372f0b1b963James Dong ADDS count, count, #0xE << 24 3340c1bc742181ded4930842b46e9507372f0b1b963James Dong BGE loop2_y 3350c1bc742181ded4930842b46e9507372f0b1b963James Dong 3360c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD sp,sp,#0xd4 3370c1bc742181ded4930842b46e9507372f0b1b963James Dong LDMFD sp!,{r4-r11,pc} 3380c1bc742181ded4930842b46e9507372f0b1b963James Dong 3390c1bc742181ded4930842b46e9507372f0b1b963James Dong END 340