10c1bc742181ded4930842b46e9507372f0b1b963James Dong; Copyright (C) 2009 The Android Open Source Project 20c1bc742181ded4930842b46e9507372f0b1b963James Dong; 30c1bc742181ded4930842b46e9507372f0b1b963James Dong; Licensed under the Apache License, Version 2.0 (the "License"); 40c1bc742181ded4930842b46e9507372f0b1b963James Dong; you may not use this file except in compliance with the License. 50c1bc742181ded4930842b46e9507372f0b1b963James Dong; You may obtain a copy of the License at 60c1bc742181ded4930842b46e9507372f0b1b963James Dong; 70c1bc742181ded4930842b46e9507372f0b1b963James Dong; http://www.apache.org/licenses/LICENSE-2.0 80c1bc742181ded4930842b46e9507372f0b1b963James Dong; 90c1bc742181ded4930842b46e9507372f0b1b963James Dong; Unless required by applicable law or agreed to in writing, software 100c1bc742181ded4930842b46e9507372f0b1b963James Dong; distributed under the License is distributed on an "AS IS" BASIS, 110c1bc742181ded4930842b46e9507372f0b1b963James Dong; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 120c1bc742181ded4930842b46e9507372f0b1b963James Dong; See the License for the specific language governing permissions and 130c1bc742181ded4930842b46e9507372f0b1b963James Dong; limitations under the License. 140c1bc742181ded4930842b46e9507372f0b1b963James Dong 150c1bc742181ded4930842b46e9507372f0b1b963James Dong;------------------------------------------------------------------------------- 160c1bc742181ded4930842b46e9507372f0b1b963James Dong;-- 170c1bc742181ded4930842b46e9507372f0b1b963James Dong;-- Abstract : ARMv6 optimized version horizontal part of 180c1bc742181ded4930842b46e9507372f0b1b963James Dong;-- h264bsdInterpolateMid functions 190c1bc742181ded4930842b46e9507372f0b1b963James Dong;-- 200c1bc742181ded4930842b46e9507372f0b1b963James Dong;------------------------------------------------------------------------------- 210c1bc742181ded4930842b46e9507372f0b1b963James Dong 220c1bc742181ded4930842b46e9507372f0b1b963James Dong 230c1bc742181ded4930842b46e9507372f0b1b963James Dong IF :DEF: H264DEC_WINASM 240c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// We dont use REQUIRE8 and PRESERVE8 for winasm 250c1bc742181ded4930842b46e9507372f0b1b963James Dong ELSE 260c1bc742181ded4930842b46e9507372f0b1b963James Dong REQUIRE8 270c1bc742181ded4930842b46e9507372f0b1b963James Dong PRESERVE8 280c1bc742181ded4930842b46e9507372f0b1b963James Dong ENDIF 290c1bc742181ded4930842b46e9507372f0b1b963James Dong 300c1bc742181ded4930842b46e9507372f0b1b963James Dong AREA |.text|, CODE 310c1bc742181ded4930842b46e9507372f0b1b963James Dong 320c1bc742181ded4930842b46e9507372f0b1b963James Dong 330c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Register allocation 340c1bc742181ded4930842b46e9507372f0b1b963James Dong 350c1bc742181ded4930842b46e9507372f0b1b963James Dongref RN 0 ;// pointer to current position in reference image 360c1bc742181ded4930842b46e9507372f0b1b963James Dongmb RN 1 ;// pointer to current position in interpolated mb 370c1bc742181ded4930842b46e9507372f0b1b963James Dongcount RN 2 ;// bit-packed width and count values 380c1bc742181ded4930842b46e9507372f0b1b963James Dong 390c1bc742181ded4930842b46e9507372f0b1b963James Dongx_2_0 RN 4 400c1bc742181ded4930842b46e9507372f0b1b963James Dongx_3_1 RN 5 410c1bc742181ded4930842b46e9507372f0b1b963James Dongx_6_4 RN 6 420c1bc742181ded4930842b46e9507372f0b1b963James Dongx_7_5 RN 7 430c1bc742181ded4930842b46e9507372f0b1b963James Dong 440c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp1 RN 8 450c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp2 RN 9 460c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp3 RN 10 470c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp4 RN 11 480c1bc742181ded4930842b46e9507372f0b1b963James Dong 490c1bc742181ded4930842b46e9507372f0b1b963James Dongmult_20_01 RN 12 ;// [20, 1] 500c1bc742181ded4930842b46e9507372f0b1b963James Dongmult_20_m5 RN 14 ;// [20, -5] 510c1bc742181ded4930842b46e9507372f0b1b963James Dong 520c1bc742181ded4930842b46e9507372f0b1b963James Dong 530c1bc742181ded4930842b46e9507372f0b1b963James Dong EXPORT h264bsdInterpolateMidHorPart 540c1bc742181ded4930842b46e9507372f0b1b963James Dong 550c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Horizontal filter approach 560c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 570c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Basic idea in horizontal filtering is to adjust coefficients 580c1bc742181ded4930842b46e9507372f0b1b963James Dong;// like below. Calculation is done with 16-bit maths. 590c1bc742181ded4930842b46e9507372f0b1b963James Dong;// 600c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Reg x_2_0 x_3_1 x_6_4 x_7_5 x_2_0 610c1bc742181ded4930842b46e9507372f0b1b963James Dong;// [ 2 0 ] [ 3 1 ] [ 6 4 ] [ 7 5 ] [ 10 8 ] ... 620c1bc742181ded4930842b46e9507372f0b1b963James Dong;// y_0 = 20 1 20 -5 -5 1 630c1bc742181ded4930842b46e9507372f0b1b963James Dong;// y_1 = -5 20 1 1 20 -5 640c1bc742181ded4930842b46e9507372f0b1b963James Dong;// y_2 = 1 -5 -5 20 1 20 650c1bc742181ded4930842b46e9507372f0b1b963James Dong;// y_3 = 1 20 -5 -5 20 1 660c1bc742181ded4930842b46e9507372f0b1b963James Dong 670c1bc742181ded4930842b46e9507372f0b1b963James Dong 680c1bc742181ded4930842b46e9507372f0b1b963James Dongh264bsdInterpolateMidHorPart 690c1bc742181ded4930842b46e9507372f0b1b963James Dong STMFD sp!, {r4-r11, lr} 700c1bc742181ded4930842b46e9507372f0b1b963James Dong 710c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// pack values to count register 720c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// [31:28] loop_x (partWidth-1) 730c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// [27:24] loop_y (partHeight-1) 740c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// [23:20] partWidth-1 750c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// [19:16] partHeight-1 760c1bc742181ded4930842b46e9507372f0b1b963James Dong ;// [15:00] width 770c1bc742181ded4930842b46e9507372f0b1b963James Dong 780c1bc742181ded4930842b46e9507372f0b1b963James Dong 790c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR mult_20_01, = 0x00140001 800c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR mult_20_m5, = 0x0014FFFB 810c1bc742181ded4930842b46e9507372f0b1b963James Dong AND tmp3, count, #0x000F0000 ;// partWidth-1 820c1bc742181ded4930842b46e9507372f0b1b963James Dongloop_y 830c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR x_3_1, [ref, #-8] 840c1bc742181ded4930842b46e9507372f0b1b963James Dong ADD count, count, tmp3, LSL #12 850c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR x_7_5, [ref, #-4] 860c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 x_2_0, x_3_1 870c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 x_3_1, x_3_1, ROR #8 880c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 x_6_4, x_7_5 890c1bc742181ded4930842b46e9507372f0b1b963James Dong 900c1bc742181ded4930842b46e9507372f0b1b963James Dongloop_x 910c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 x_7_5, x_7_5, ROR #8 920c1bc742181ded4930842b46e9507372f0b1b963James Dong 930c1bc742181ded4930842b46e9507372f0b1b963James Dong SMUAD tmp1, x_2_0, mult_20_01 940c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULTB tmp2, x_2_0, mult_20_m5 950c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULTB tmp3, x_2_0, mult_20_01 960c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULTB tmp4, x_3_1, mult_20_01 970c1bc742181ded4930842b46e9507372f0b1b963James Dong 980c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAD tmp1, x_3_1, mult_20_m5, tmp1 990c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAD tmp2, x_3_1, mult_20_01, tmp2 1000c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATB tmp3, x_3_1, mult_20_m5, tmp3 1010c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR x_3_1, [ref], #4 1020c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAD tmp4, x_6_4, mult_20_m5, tmp4 1030c1bc742181ded4930842b46e9507372f0b1b963James Dong 1040c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB tmp1, x_6_4, mult_20_m5, tmp1 1050c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLADX tmp2, x_6_4, mult_20_01, tmp2 1060c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLADX tmp3, x_6_4, mult_20_m5, tmp3 1070c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLADX tmp4, x_7_5, mult_20_m5, tmp4 1080c1bc742181ded4930842b46e9507372f0b1b963James Dong 1090c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB tmp1, x_7_5, mult_20_01, tmp1 1100c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB tmp2, x_7_5, mult_20_m5, tmp2 1110c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 x_2_0, x_3_1 1120c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLADX tmp3, x_7_5, mult_20_01, tmp3 1130c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB tmp4, x_2_0, mult_20_01, tmp4 1140c1bc742181ded4930842b46e9507372f0b1b963James Dong 1150c1bc742181ded4930842b46e9507372f0b1b963James Dong SUBS count, count, #4<<28 1160c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tmp1, [mb], #4 1170c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tmp2, [mb], #4 1180c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tmp3, [mb], #4 1190c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tmp4, [mb], #4 1200c1bc742181ded4930842b46e9507372f0b1b963James Dong BCC next_y 1210c1bc742181ded4930842b46e9507372f0b1b963James Dong 1220c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 x_3_1, x_3_1, ROR #8 1230c1bc742181ded4930842b46e9507372f0b1b963James Dong 1240c1bc742181ded4930842b46e9507372f0b1b963James Dong SMUAD tmp1, x_6_4, mult_20_01 1250c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULTB tmp2, x_6_4, mult_20_m5 1260c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULTB tmp3, x_6_4, mult_20_01 1270c1bc742181ded4930842b46e9507372f0b1b963James Dong SMULTB tmp4, x_7_5, mult_20_01 1280c1bc742181ded4930842b46e9507372f0b1b963James Dong 1290c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAD tmp1, x_7_5, mult_20_m5, tmp1 1300c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAD tmp2, x_7_5, mult_20_01, tmp2 1310c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLATB tmp3, x_7_5, mult_20_m5, tmp3 1320c1bc742181ded4930842b46e9507372f0b1b963James Dong LDR x_7_5, [ref], #4 1330c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLAD tmp4, x_2_0, mult_20_m5, tmp4 1340c1bc742181ded4930842b46e9507372f0b1b963James Dong 1350c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB tmp1, x_2_0, mult_20_m5, tmp1 1360c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLADX tmp2, x_2_0, mult_20_01, tmp2 1370c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLADX tmp3, x_2_0, mult_20_m5, tmp3 1380c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLADX tmp4, x_3_1, mult_20_m5, tmp4 1390c1bc742181ded4930842b46e9507372f0b1b963James Dong 1400c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB tmp1, x_3_1, mult_20_01, tmp1 1410c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB tmp2, x_3_1, mult_20_m5, tmp2 1420c1bc742181ded4930842b46e9507372f0b1b963James Dong UXTB16 x_6_4, x_7_5 1430c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLADX tmp3, x_3_1, mult_20_01, tmp3 1440c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB tmp4, x_6_4, mult_20_01, tmp4 1450c1bc742181ded4930842b46e9507372f0b1b963James Dong 1460c1bc742181ded4930842b46e9507372f0b1b963James Dong SUBS count, count, #4<<28 1470c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tmp1, [mb], #4 1480c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tmp2, [mb], #4 1490c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tmp3, [mb], #4 1500c1bc742181ded4930842b46e9507372f0b1b963James Dong STR tmp4, [mb], #4 1510c1bc742181ded4930842b46e9507372f0b1b963James Dong BCS loop_x 1520c1bc742181ded4930842b46e9507372f0b1b963James Dong 1530c1bc742181ded4930842b46e9507372f0b1b963James Dongnext_y 1540c1bc742181ded4930842b46e9507372f0b1b963James Dong AND tmp3, count, #0x000F0000 ;// partWidth-1 1550c1bc742181ded4930842b46e9507372f0b1b963James Dong SMLABB ref, count, mult_20_01, ref ;// +width 1560c1bc742181ded4930842b46e9507372f0b1b963James Dong SBC ref, ref, tmp3, LSR #16 ;// -(partWidth-1)-1 1570c1bc742181ded4930842b46e9507372f0b1b963James Dong ADDS count, count, #(1<<28)-(1<<20) 1580c1bc742181ded4930842b46e9507372f0b1b963James Dong BGE loop_y 1590c1bc742181ded4930842b46e9507372f0b1b963James Dong 1600c1bc742181ded4930842b46e9507372f0b1b963James Dong LDMFD sp!, {r4-r11, pc} 1610c1bc742181ded4930842b46e9507372f0b1b963James Dong 1620c1bc742181ded4930842b46e9507372f0b1b963James Dong END 1630c1bc742181ded4930842b46e9507372f0b1b963James Dong 164