10c1bc742181ded4930842b46e9507372f0b1b963James Dong; Copyright (C) 2009 The Android Open Source Project
20c1bc742181ded4930842b46e9507372f0b1b963James Dong;
30c1bc742181ded4930842b46e9507372f0b1b963James Dong; Licensed under the Apache License, Version 2.0 (the "License");
40c1bc742181ded4930842b46e9507372f0b1b963James Dong; you may not use this file except in compliance with the License.
50c1bc742181ded4930842b46e9507372f0b1b963James Dong; You may obtain a copy of the License at
60c1bc742181ded4930842b46e9507372f0b1b963James Dong;
70c1bc742181ded4930842b46e9507372f0b1b963James Dong;      http://www.apache.org/licenses/LICENSE-2.0
80c1bc742181ded4930842b46e9507372f0b1b963James Dong;
90c1bc742181ded4930842b46e9507372f0b1b963James Dong; Unless required by applicable law or agreed to in writing, software
100c1bc742181ded4930842b46e9507372f0b1b963James Dong; distributed under the License is distributed on an "AS IS" BASIS,
110c1bc742181ded4930842b46e9507372f0b1b963James Dong; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
120c1bc742181ded4930842b46e9507372f0b1b963James Dong; See the License for the specific language governing permissions and
130c1bc742181ded4930842b46e9507372f0b1b963James Dong; limitations under the License.
140c1bc742181ded4930842b46e9507372f0b1b963James Dong
150c1bc742181ded4930842b46e9507372f0b1b963James Dong;-------------------------------------------------------------------------------
160c1bc742181ded4930842b46e9507372f0b1b963James Dong;--
170c1bc742181ded4930842b46e9507372f0b1b963James Dong;-- Abstract : ARMv6 optimized version horizontal part of
180c1bc742181ded4930842b46e9507372f0b1b963James Dong;--            h264bsdInterpolateMid functions
190c1bc742181ded4930842b46e9507372f0b1b963James Dong;--
200c1bc742181ded4930842b46e9507372f0b1b963James Dong;-------------------------------------------------------------------------------
210c1bc742181ded4930842b46e9507372f0b1b963James Dong
220c1bc742181ded4930842b46e9507372f0b1b963James Dong
230c1bc742181ded4930842b46e9507372f0b1b963James Dong    IF :DEF: H264DEC_WINASM
240c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// We dont use REQUIRE8 and PRESERVE8 for winasm
250c1bc742181ded4930842b46e9507372f0b1b963James Dong    ELSE
260c1bc742181ded4930842b46e9507372f0b1b963James Dong        REQUIRE8
270c1bc742181ded4930842b46e9507372f0b1b963James Dong        PRESERVE8
280c1bc742181ded4930842b46e9507372f0b1b963James Dong    ENDIF
290c1bc742181ded4930842b46e9507372f0b1b963James Dong
300c1bc742181ded4930842b46e9507372f0b1b963James Dong    AREA    |.text|, CODE
310c1bc742181ded4930842b46e9507372f0b1b963James Dong
320c1bc742181ded4930842b46e9507372f0b1b963James Dong
330c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Register allocation
340c1bc742181ded4930842b46e9507372f0b1b963James Dong
350c1bc742181ded4930842b46e9507372f0b1b963James Dongref     RN 0    ;// pointer to current position in reference image
360c1bc742181ded4930842b46e9507372f0b1b963James Dongmb      RN 1    ;// pointer to current position in interpolated mb
370c1bc742181ded4930842b46e9507372f0b1b963James Dongcount   RN 2    ;// bit-packed width and count values
380c1bc742181ded4930842b46e9507372f0b1b963James Dong
390c1bc742181ded4930842b46e9507372f0b1b963James Dongx_2_0   RN 4
400c1bc742181ded4930842b46e9507372f0b1b963James Dongx_3_1   RN 5
410c1bc742181ded4930842b46e9507372f0b1b963James Dongx_6_4   RN 6
420c1bc742181ded4930842b46e9507372f0b1b963James Dongx_7_5   RN 7
430c1bc742181ded4930842b46e9507372f0b1b963James Dong
440c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp1    RN 8
450c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp2    RN 9
460c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp3    RN 10
470c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp4    RN 11
480c1bc742181ded4930842b46e9507372f0b1b963James Dong
490c1bc742181ded4930842b46e9507372f0b1b963James Dongmult_20_01  RN 12   ;// [20,  1]
500c1bc742181ded4930842b46e9507372f0b1b963James Dongmult_20_m5  RN 14   ;// [20, -5]
510c1bc742181ded4930842b46e9507372f0b1b963James Dong
520c1bc742181ded4930842b46e9507372f0b1b963James Dong
530c1bc742181ded4930842b46e9507372f0b1b963James Dong        EXPORT  h264bsdInterpolateMidHorPart
540c1bc742181ded4930842b46e9507372f0b1b963James Dong
550c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Horizontal filter approach
560c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
570c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Basic idea in horizontal filtering is to adjust coefficients
580c1bc742181ded4930842b46e9507372f0b1b963James Dong;// like below. Calculation is done with 16-bit maths.
590c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
600c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Reg     x_2_0     x_3_1     x_6_4     x_7_5     x_2_0
610c1bc742181ded4930842b46e9507372f0b1b963James Dong;//       [  2  0 ] [  3  1 ] [  6  4 ] [  7  5 ] [ 10  8 ] ...
620c1bc742181ded4930842b46e9507372f0b1b963James Dong;// y_0 =   20  1     20 -5        -5         1
630c1bc742181ded4930842b46e9507372f0b1b963James Dong;// y_1 =   -5        20  1      1 20        -5
640c1bc742181ded4930842b46e9507372f0b1b963James Dong;// y_2 =    1        -5        -5 20      1 20
650c1bc742181ded4930842b46e9507372f0b1b963James Dong;// y_3 =              1        20 -5     -5 20         1
660c1bc742181ded4930842b46e9507372f0b1b963James Dong
670c1bc742181ded4930842b46e9507372f0b1b963James Dong
680c1bc742181ded4930842b46e9507372f0b1b963James Dongh264bsdInterpolateMidHorPart
690c1bc742181ded4930842b46e9507372f0b1b963James Dong    STMFD   sp!, {r4-r11, lr}
700c1bc742181ded4930842b46e9507372f0b1b963James Dong
710c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// pack values to count register
720c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// [31:28] loop_x (partWidth-1)
730c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// [27:24] loop_y (partHeight-1)
740c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// [23:20] partWidth-1
750c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// [19:16] partHeight-1
760c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// [15:00] width
770c1bc742181ded4930842b46e9507372f0b1b963James Dong
780c1bc742181ded4930842b46e9507372f0b1b963James Dong
790c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     mult_20_01, = 0x00140001
800c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     mult_20_m5, = 0x0014FFFB
810c1bc742181ded4930842b46e9507372f0b1b963James Dong    AND     tmp3, count, #0x000F0000    ;// partWidth-1
820c1bc742181ded4930842b46e9507372f0b1b963James Dongloop_y
830c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     x_3_1, [ref, #-8]
840c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     count, count, tmp3, LSL #12
850c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     x_7_5, [ref, #-4]
860c1bc742181ded4930842b46e9507372f0b1b963James Dong    UXTB16  x_2_0, x_3_1
870c1bc742181ded4930842b46e9507372f0b1b963James Dong    UXTB16  x_3_1, x_3_1, ROR #8
880c1bc742181ded4930842b46e9507372f0b1b963James Dong    UXTB16  x_6_4, x_7_5
890c1bc742181ded4930842b46e9507372f0b1b963James Dong
900c1bc742181ded4930842b46e9507372f0b1b963James Dongloop_x
910c1bc742181ded4930842b46e9507372f0b1b963James Dong    UXTB16  x_7_5, x_7_5, ROR #8
920c1bc742181ded4930842b46e9507372f0b1b963James Dong
930c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMUAD   tmp1, x_2_0, mult_20_01
940c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMULTB  tmp2, x_2_0, mult_20_m5
950c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMULTB  tmp3, x_2_0, mult_20_01
960c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMULTB  tmp4, x_3_1, mult_20_01
970c1bc742181ded4930842b46e9507372f0b1b963James Dong
980c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMLAD   tmp1, x_3_1, mult_20_m5, tmp1
990c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMLAD   tmp2, x_3_1, mult_20_01, tmp2
1000c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMLATB  tmp3, x_3_1, mult_20_m5, tmp3
1010c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     x_3_1, [ref], #4
1020c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMLAD   tmp4, x_6_4, mult_20_m5, tmp4
1030c1bc742181ded4930842b46e9507372f0b1b963James Dong
1040c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMLABB  tmp1, x_6_4, mult_20_m5, tmp1
1050c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMLADX  tmp2, x_6_4, mult_20_01, tmp2
1060c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMLADX  tmp3, x_6_4, mult_20_m5, tmp3
1070c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMLADX  tmp4, x_7_5, mult_20_m5, tmp4
1080c1bc742181ded4930842b46e9507372f0b1b963James Dong
1090c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMLABB  tmp1, x_7_5, mult_20_01, tmp1
1100c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMLABB  tmp2, x_7_5, mult_20_m5, tmp2
1110c1bc742181ded4930842b46e9507372f0b1b963James Dong    UXTB16  x_2_0, x_3_1
1120c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMLADX  tmp3, x_7_5, mult_20_01, tmp3
1130c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMLABB  tmp4, x_2_0, mult_20_01, tmp4
1140c1bc742181ded4930842b46e9507372f0b1b963James Dong
1150c1bc742181ded4930842b46e9507372f0b1b963James Dong    SUBS    count, count, #4<<28
1160c1bc742181ded4930842b46e9507372f0b1b963James Dong    STR     tmp1, [mb], #4
1170c1bc742181ded4930842b46e9507372f0b1b963James Dong    STR     tmp2, [mb], #4
1180c1bc742181ded4930842b46e9507372f0b1b963James Dong    STR     tmp3, [mb], #4
1190c1bc742181ded4930842b46e9507372f0b1b963James Dong    STR     tmp4, [mb], #4
1200c1bc742181ded4930842b46e9507372f0b1b963James Dong    BCC     next_y
1210c1bc742181ded4930842b46e9507372f0b1b963James Dong
1220c1bc742181ded4930842b46e9507372f0b1b963James Dong    UXTB16  x_3_1, x_3_1, ROR #8
1230c1bc742181ded4930842b46e9507372f0b1b963James Dong
1240c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMUAD   tmp1, x_6_4, mult_20_01
1250c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMULTB  tmp2, x_6_4, mult_20_m5
1260c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMULTB  tmp3, x_6_4, mult_20_01
1270c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMULTB  tmp4, x_7_5, mult_20_01
1280c1bc742181ded4930842b46e9507372f0b1b963James Dong
1290c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMLAD   tmp1, x_7_5, mult_20_m5, tmp1
1300c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMLAD   tmp2, x_7_5, mult_20_01, tmp2
1310c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMLATB  tmp3, x_7_5, mult_20_m5, tmp3
1320c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     x_7_5, [ref], #4
1330c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMLAD   tmp4, x_2_0, mult_20_m5, tmp4
1340c1bc742181ded4930842b46e9507372f0b1b963James Dong
1350c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMLABB  tmp1, x_2_0, mult_20_m5, tmp1
1360c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMLADX  tmp2, x_2_0, mult_20_01, tmp2
1370c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMLADX  tmp3, x_2_0, mult_20_m5, tmp3
1380c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMLADX  tmp4, x_3_1, mult_20_m5, tmp4
1390c1bc742181ded4930842b46e9507372f0b1b963James Dong
1400c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMLABB  tmp1, x_3_1, mult_20_01, tmp1
1410c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMLABB  tmp2, x_3_1, mult_20_m5, tmp2
1420c1bc742181ded4930842b46e9507372f0b1b963James Dong    UXTB16  x_6_4, x_7_5
1430c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMLADX  tmp3, x_3_1, mult_20_01, tmp3
1440c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMLABB  tmp4, x_6_4, mult_20_01, tmp4
1450c1bc742181ded4930842b46e9507372f0b1b963James Dong
1460c1bc742181ded4930842b46e9507372f0b1b963James Dong    SUBS    count, count, #4<<28
1470c1bc742181ded4930842b46e9507372f0b1b963James Dong    STR     tmp1, [mb], #4
1480c1bc742181ded4930842b46e9507372f0b1b963James Dong    STR     tmp2, [mb], #4
1490c1bc742181ded4930842b46e9507372f0b1b963James Dong    STR     tmp3, [mb], #4
1500c1bc742181ded4930842b46e9507372f0b1b963James Dong    STR     tmp4, [mb], #4
1510c1bc742181ded4930842b46e9507372f0b1b963James Dong    BCS     loop_x
1520c1bc742181ded4930842b46e9507372f0b1b963James Dong
1530c1bc742181ded4930842b46e9507372f0b1b963James Dongnext_y
1540c1bc742181ded4930842b46e9507372f0b1b963James Dong    AND     tmp3, count, #0x000F0000    ;// partWidth-1
1550c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMLABB  ref, count, mult_20_01, ref   ;// +width
1560c1bc742181ded4930842b46e9507372f0b1b963James Dong    SBC     ref, ref, tmp3, LSR #16   ;// -(partWidth-1)-1
1570c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADDS    count, count, #(1<<28)-(1<<20)
1580c1bc742181ded4930842b46e9507372f0b1b963James Dong    BGE     loop_y
1590c1bc742181ded4930842b46e9507372f0b1b963James Dong
1600c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDMFD   sp!, {r4-r11, pc}
1610c1bc742181ded4930842b46e9507372f0b1b963James Dong
1620c1bc742181ded4930842b46e9507372f0b1b963James Dong    END
1630c1bc742181ded4930842b46e9507372f0b1b963James Dong
164