1; Copyright (C) 2009 The Android Open Source Project
2;
3; Licensed under the Apache License, Version 2.0 (the "License");
4; you may not use this file except in compliance with the License.
5; You may obtain a copy of the License at
6;
7;      http://www.apache.org/licenses/LICENSE-2.0
8;
9; Unless required by applicable law or agreed to in writing, software
10; distributed under the License is distributed on an "AS IS" BASIS,
11; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12; See the License for the specific language governing permissions and
13; limitations under the License.
14
15;-------------------------------------------------------------------------------
16;--
17;-- Abstract : ARMv6 optimized version horizontal part of
18;--            h264bsdInterpolateMid functions
19;--
20;-------------------------------------------------------------------------------
21
22
23    IF :DEF: H264DEC_WINASM
24        ;// We dont use REQUIRE8 and PRESERVE8 for winasm
25    ELSE
26        REQUIRE8
27        PRESERVE8
28    ENDIF
29
30    AREA    |.text|, CODE
31
32
33;// Register allocation
34
35ref     RN 0    ;// pointer to current position in reference image
36mb      RN 1    ;// pointer to current position in interpolated mb
37count   RN 2    ;// bit-packed width and count values
38
39x_2_0   RN 4
40x_3_1   RN 5
41x_6_4   RN 6
42x_7_5   RN 7
43
44tmp1    RN 8
45tmp2    RN 9
46tmp3    RN 10
47tmp4    RN 11
48
49mult_20_01  RN 12   ;// [20,  1]
50mult_20_m5  RN 14   ;// [20, -5]
51
52
53        EXPORT  h264bsdInterpolateMidHorPart
54
55;// Horizontal filter approach
56;//
57;// Basic idea in horizontal filtering is to adjust coefficients
58;// like below. Calculation is done with 16-bit maths.
59;//
60;// Reg     x_2_0     x_3_1     x_6_4     x_7_5     x_2_0
61;//       [  2  0 ] [  3  1 ] [  6  4 ] [  7  5 ] [ 10  8 ] ...
62;// y_0 =   20  1     20 -5        -5         1
63;// y_1 =   -5        20  1      1 20        -5
64;// y_2 =    1        -5        -5 20      1 20
65;// y_3 =              1        20 -5     -5 20         1
66
67
68h264bsdInterpolateMidHorPart
69    STMFD   sp!, {r4-r11, lr}
70
71    ;// pack values to count register
72    ;// [31:28] loop_x (partWidth-1)
73    ;// [27:24] loop_y (partHeight-1)
74    ;// [23:20] partWidth-1
75    ;// [19:16] partHeight-1
76    ;// [15:00] width
77
78
79    LDR     mult_20_01, = 0x00140001
80    LDR     mult_20_m5, = 0x0014FFFB
81    AND     tmp3, count, #0x000F0000    ;// partWidth-1
82loop_y
83    LDR     x_3_1, [ref, #-8]
84    ADD     count, count, tmp3, LSL #12
85    LDR     x_7_5, [ref, #-4]
86    UXTB16  x_2_0, x_3_1
87    UXTB16  x_3_1, x_3_1, ROR #8
88    UXTB16  x_6_4, x_7_5
89
90loop_x
91    UXTB16  x_7_5, x_7_5, ROR #8
92
93    SMUAD   tmp1, x_2_0, mult_20_01
94    SMULTB  tmp2, x_2_0, mult_20_m5
95    SMULTB  tmp3, x_2_0, mult_20_01
96    SMULTB  tmp4, x_3_1, mult_20_01
97
98    SMLAD   tmp1, x_3_1, mult_20_m5, tmp1
99    SMLAD   tmp2, x_3_1, mult_20_01, tmp2
100    SMLATB  tmp3, x_3_1, mult_20_m5, tmp3
101    LDR     x_3_1, [ref], #4
102    SMLAD   tmp4, x_6_4, mult_20_m5, tmp4
103
104    SMLABB  tmp1, x_6_4, mult_20_m5, tmp1
105    SMLADX  tmp2, x_6_4, mult_20_01, tmp2
106    SMLADX  tmp3, x_6_4, mult_20_m5, tmp3
107    SMLADX  tmp4, x_7_5, mult_20_m5, tmp4
108
109    SMLABB  tmp1, x_7_5, mult_20_01, tmp1
110    SMLABB  tmp2, x_7_5, mult_20_m5, tmp2
111    UXTB16  x_2_0, x_3_1
112    SMLADX  tmp3, x_7_5, mult_20_01, tmp3
113    SMLABB  tmp4, x_2_0, mult_20_01, tmp4
114
115    SUBS    count, count, #4<<28
116    STR     tmp1, [mb], #4
117    STR     tmp2, [mb], #4
118    STR     tmp3, [mb], #4
119    STR     tmp4, [mb], #4
120    BCC     next_y
121
122    UXTB16  x_3_1, x_3_1, ROR #8
123
124    SMUAD   tmp1, x_6_4, mult_20_01
125    SMULTB  tmp2, x_6_4, mult_20_m5
126    SMULTB  tmp3, x_6_4, mult_20_01
127    SMULTB  tmp4, x_7_5, mult_20_01
128
129    SMLAD   tmp1, x_7_5, mult_20_m5, tmp1
130    SMLAD   tmp2, x_7_5, mult_20_01, tmp2
131    SMLATB  tmp3, x_7_5, mult_20_m5, tmp3
132    LDR     x_7_5, [ref], #4
133    SMLAD   tmp4, x_2_0, mult_20_m5, tmp4
134
135    SMLABB  tmp1, x_2_0, mult_20_m5, tmp1
136    SMLADX  tmp2, x_2_0, mult_20_01, tmp2
137    SMLADX  tmp3, x_2_0, mult_20_m5, tmp3
138    SMLADX  tmp4, x_3_1, mult_20_m5, tmp4
139
140    SMLABB  tmp1, x_3_1, mult_20_01, tmp1
141    SMLABB  tmp2, x_3_1, mult_20_m5, tmp2
142    UXTB16  x_6_4, x_7_5
143    SMLADX  tmp3, x_3_1, mult_20_01, tmp3
144    SMLABB  tmp4, x_6_4, mult_20_01, tmp4
145
146    SUBS    count, count, #4<<28
147    STR     tmp1, [mb], #4
148    STR     tmp2, [mb], #4
149    STR     tmp3, [mb], #4
150    STR     tmp4, [mb], #4
151    BCS     loop_x
152
153next_y
154    AND     tmp3, count, #0x000F0000    ;// partWidth-1
155    SMLABB  ref, count, mult_20_01, ref   ;// +width
156    SBC     ref, ref, tmp3, LSR #16   ;// -(partWidth-1)-1
157    ADDS    count, count, #(1<<28)-(1<<20)
158    BGE     loop_y
159
160    LDMFD   sp!, {r4-r11, pc}
161
162    END
163
164