1; Copyright (C) 2009 The Android Open Source Project 2; 3; Licensed under the Apache License, Version 2.0 (the "License"); 4; you may not use this file except in compliance with the License. 5; You may obtain a copy of the License at 6; 7; http://www.apache.org/licenses/LICENSE-2.0 8; 9; Unless required by applicable law or agreed to in writing, software 10; distributed under the License is distributed on an "AS IS" BASIS, 11; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12; See the License for the specific language governing permissions and 13; limitations under the License. 14 15;------------------------------------------------------------------------------- 16;-- 17;-- Abstract : ARMv6 optimized version horizontal part of 18;-- h264bsdInterpolateMid functions 19;-- 20;------------------------------------------------------------------------------- 21 22 23 IF :DEF: H264DEC_WINASM 24 ;// We dont use REQUIRE8 and PRESERVE8 for winasm 25 ELSE 26 REQUIRE8 27 PRESERVE8 28 ENDIF 29 30 AREA |.text|, CODE 31 32 33;// Register allocation 34 35ref RN 0 ;// pointer to current position in reference image 36mb RN 1 ;// pointer to current position in interpolated mb 37count RN 2 ;// bit-packed width and count values 38 39x_2_0 RN 4 40x_3_1 RN 5 41x_6_4 RN 6 42x_7_5 RN 7 43 44tmp1 RN 8 45tmp2 RN 9 46tmp3 RN 10 47tmp4 RN 11 48 49mult_20_01 RN 12 ;// [20, 1] 50mult_20_m5 RN 14 ;// [20, -5] 51 52 53 EXPORT h264bsdInterpolateMidHorPart 54 55;// Horizontal filter approach 56;// 57;// Basic idea in horizontal filtering is to adjust coefficients 58;// like below. Calculation is done with 16-bit maths. 59;// 60;// Reg x_2_0 x_3_1 x_6_4 x_7_5 x_2_0 61;// [ 2 0 ] [ 3 1 ] [ 6 4 ] [ 7 5 ] [ 10 8 ] ... 62;// y_0 = 20 1 20 -5 -5 1 63;// y_1 = -5 20 1 1 20 -5 64;// y_2 = 1 -5 -5 20 1 20 65;// y_3 = 1 20 -5 -5 20 1 66 67 68h264bsdInterpolateMidHorPart 69 STMFD sp!, {r4-r11, lr} 70 71 ;// pack values to count register 72 ;// [31:28] loop_x (partWidth-1) 73 ;// [27:24] loop_y (partHeight-1) 74 ;// [23:20] partWidth-1 75 ;// [19:16] partHeight-1 76 ;// [15:00] width 77 78 79 LDR mult_20_01, = 0x00140001 80 LDR mult_20_m5, = 0x0014FFFB 81 AND tmp3, count, #0x000F0000 ;// partWidth-1 82loop_y 83 LDR x_3_1, [ref, #-8] 84 ADD count, count, tmp3, LSL #12 85 LDR x_7_5, [ref, #-4] 86 UXTB16 x_2_0, x_3_1 87 UXTB16 x_3_1, x_3_1, ROR #8 88 UXTB16 x_6_4, x_7_5 89 90loop_x 91 UXTB16 x_7_5, x_7_5, ROR #8 92 93 SMUAD tmp1, x_2_0, mult_20_01 94 SMULTB tmp2, x_2_0, mult_20_m5 95 SMULTB tmp3, x_2_0, mult_20_01 96 SMULTB tmp4, x_3_1, mult_20_01 97 98 SMLAD tmp1, x_3_1, mult_20_m5, tmp1 99 SMLAD tmp2, x_3_1, mult_20_01, tmp2 100 SMLATB tmp3, x_3_1, mult_20_m5, tmp3 101 LDR x_3_1, [ref], #4 102 SMLAD tmp4, x_6_4, mult_20_m5, tmp4 103 104 SMLABB tmp1, x_6_4, mult_20_m5, tmp1 105 SMLADX tmp2, x_6_4, mult_20_01, tmp2 106 SMLADX tmp3, x_6_4, mult_20_m5, tmp3 107 SMLADX tmp4, x_7_5, mult_20_m5, tmp4 108 109 SMLABB tmp1, x_7_5, mult_20_01, tmp1 110 SMLABB tmp2, x_7_5, mult_20_m5, tmp2 111 UXTB16 x_2_0, x_3_1 112 SMLADX tmp3, x_7_5, mult_20_01, tmp3 113 SMLABB tmp4, x_2_0, mult_20_01, tmp4 114 115 SUBS count, count, #4<<28 116 STR tmp1, [mb], #4 117 STR tmp2, [mb], #4 118 STR tmp3, [mb], #4 119 STR tmp4, [mb], #4 120 BCC next_y 121 122 UXTB16 x_3_1, x_3_1, ROR #8 123 124 SMUAD tmp1, x_6_4, mult_20_01 125 SMULTB tmp2, x_6_4, mult_20_m5 126 SMULTB tmp3, x_6_4, mult_20_01 127 SMULTB tmp4, x_7_5, mult_20_01 128 129 SMLAD tmp1, x_7_5, mult_20_m5, tmp1 130 SMLAD tmp2, x_7_5, mult_20_01, tmp2 131 SMLATB tmp3, x_7_5, mult_20_m5, tmp3 132 LDR x_7_5, [ref], #4 133 SMLAD tmp4, x_2_0, mult_20_m5, tmp4 134 135 SMLABB tmp1, x_2_0, mult_20_m5, tmp1 136 SMLADX tmp2, x_2_0, mult_20_01, tmp2 137 SMLADX tmp3, x_2_0, mult_20_m5, tmp3 138 SMLADX tmp4, x_3_1, mult_20_m5, tmp4 139 140 SMLABB tmp1, x_3_1, mult_20_01, tmp1 141 SMLABB tmp2, x_3_1, mult_20_m5, tmp2 142 UXTB16 x_6_4, x_7_5 143 SMLADX tmp3, x_3_1, mult_20_01, tmp3 144 SMLABB tmp4, x_6_4, mult_20_01, tmp4 145 146 SUBS count, count, #4<<28 147 STR tmp1, [mb], #4 148 STR tmp2, [mb], #4 149 STR tmp3, [mb], #4 150 STR tmp4, [mb], #4 151 BCS loop_x 152 153next_y 154 AND tmp3, count, #0x000F0000 ;// partWidth-1 155 SMLABB ref, count, mult_20_01, ref ;// +width 156 SBC ref, ref, tmp3, LSR #16 ;// -(partWidth-1)-1 157 ADDS count, count, #(1<<28)-(1<<20) 158 BGE loop_y 159 160 LDMFD sp!, {r4-r11, pc} 161 162 END 163 164