1; 2; Copyright (c) 2011 The WebM project authors. All Rights Reserved. 3; 4; Use of this source code is governed by a BSD-style license 5; that can be found in the LICENSE file in the root of the source 6; tree. An additional intellectual property rights grant can be found 7; in the file PATENTS. All contributing project authors may 8; be found in the AUTHORS file in the root of the source tree. 9; 10 11 12 EXPORT |vp8_subtract_mby_armv6| 13 EXPORT |vp8_subtract_mbuv_armv6| 14 EXPORT |vp8_subtract_b_armv6| 15 16 INCLUDE vp8_asm_enc_offsets.asm 17 18 ARM 19 REQUIRE8 20 PRESERVE8 21 22 AREA ||.text||, CODE, READONLY, ALIGN=2 23 24; r0 BLOCK *be 25; r1 BLOCKD *bd 26; r2 int pitch 27|vp8_subtract_b_armv6| PROC 28 29 stmfd sp!, {r4-r9} 30 31 ldr r4, [r0, #vp8_block_base_src] 32 ldr r5, [r0, #vp8_block_src] 33 ldr r6, [r0, #vp8_block_src_diff] 34 35 ldr r3, [r4] 36 ldr r7, [r0, #vp8_block_src_stride] 37 add r3, r3, r5 ; src = *base_src + src 38 ldr r8, [r1, #vp8_blockd_predictor] 39 40 mov r9, #4 ; loop count 41 42loop_block 43 44 ldr r0, [r3], r7 ; src 45 ldr r1, [r8], r2 ; pred 46 47 uxtb16 r4, r0 ; [s2 | s0] 48 uxtb16 r5, r1 ; [p2 | p0] 49 uxtb16 r0, r0, ror #8 ; [s3 | s1] 50 uxtb16 r1, r1, ror #8 ; [p3 | p1] 51 52 usub16 r4, r4, r5 ; [d2 | d0] 53 usub16 r5, r0, r1 ; [d3 | d1] 54 55 subs r9, r9, #1 ; decrement loop counter 56 57 pkhbt r0, r4, r5, lsl #16 ; [d1 | d0] 58 pkhtb r1, r5, r4, asr #16 ; [d3 | d2] 59 60 str r0, [r6, #0] ; diff 61 str r1, [r6, #4] ; diff 62 63 add r6, r6, r2, lsl #1 ; update diff pointer 64 bne loop_block 65 66 ldmfd sp!, {r4-r9} 67 mov pc, lr 68 69 ENDP 70 71 72; r0 short *diff 73; r1 unsigned char *usrc 74; r2 unsigned char *vsrc 75; r3 int src_stride 76; sp unsigned char *upred 77; sp unsigned char *vpred 78; sp int pred_stride 79|vp8_subtract_mbuv_armv6| PROC 80 81 stmfd sp!, {r4-r11} 82 83 add r0, r0, #512 ; set *diff point to Cb 84 mov r4, #8 ; loop count 85 ldr r5, [sp, #32] ; upred 86 ldr r12, [sp, #40] ; pred_stride 87 88 ; Subtract U block 89loop_u 90 ldr r6, [r1] ; usrc (A) 91 ldr r7, [r5] ; upred (A) 92 93 uxtb16 r8, r6 ; [s2 | s0] (A) 94 uxtb16 r9, r7 ; [p2 | p0] (A) 95 uxtb16 r10, r6, ror #8 ; [s3 | s1] (A) 96 uxtb16 r11, r7, ror #8 ; [p3 | p1] (A) 97 98 usub16 r6, r8, r9 ; [d2 | d0] (A) 99 usub16 r7, r10, r11 ; [d3 | d1] (A) 100 101 ldr r10, [r1, #4] ; usrc (B) 102 ldr r11, [r5, #4] ; upred (B) 103 104 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A) 105 pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A) 106 107 str r8, [r0], #4 ; diff (A) 108 uxtb16 r8, r10 ; [s2 | s0] (B) 109 str r9, [r0], #4 ; diff (A) 110 111 uxtb16 r9, r11 ; [p2 | p0] (B) 112 uxtb16 r10, r10, ror #8 ; [s3 | s1] (B) 113 uxtb16 r11, r11, ror #8 ; [p3 | p1] (B) 114 115 usub16 r6, r8, r9 ; [d2 | d0] (B) 116 usub16 r7, r10, r11 ; [d3 | d1] (B) 117 118 add r1, r1, r3 ; update usrc pointer 119 add r5, r5, r12 ; update upred pointer 120 121 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B) 122 pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B) 123 124 str r8, [r0], #4 ; diff (B) 125 subs r4, r4, #1 ; update loop counter 126 str r9, [r0], #4 ; diff (B) 127 128 bne loop_u 129 130 ldr r5, [sp, #36] ; vpred 131 mov r4, #8 ; loop count 132 133 ; Subtract V block 134loop_v 135 ldr r6, [r2] ; vsrc (A) 136 ldr r7, [r5] ; vpred (A) 137 138 uxtb16 r8, r6 ; [s2 | s0] (A) 139 uxtb16 r9, r7 ; [p2 | p0] (A) 140 uxtb16 r10, r6, ror #8 ; [s3 | s1] (A) 141 uxtb16 r11, r7, ror #8 ; [p3 | p1] (A) 142 143 usub16 r6, r8, r9 ; [d2 | d0] (A) 144 usub16 r7, r10, r11 ; [d3 | d1] (A) 145 146 ldr r10, [r2, #4] ; vsrc (B) 147 ldr r11, [r5, #4] ; vpred (B) 148 149 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A) 150 pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A) 151 152 str r8, [r0], #4 ; diff (A) 153 uxtb16 r8, r10 ; [s2 | s0] (B) 154 str r9, [r0], #4 ; diff (A) 155 156 uxtb16 r9, r11 ; [p2 | p0] (B) 157 uxtb16 r10, r10, ror #8 ; [s3 | s1] (B) 158 uxtb16 r11, r11, ror #8 ; [p3 | p1] (B) 159 160 usub16 r6, r8, r9 ; [d2 | d0] (B) 161 usub16 r7, r10, r11 ; [d3 | d1] (B) 162 163 add r2, r2, r3 ; update vsrc pointer 164 add r5, r5, r12 ; update vpred pointer 165 166 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B) 167 pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B) 168 169 str r8, [r0], #4 ; diff (B) 170 subs r4, r4, #1 ; update loop counter 171 str r9, [r0], #4 ; diff (B) 172 173 bne loop_v 174 175 ldmfd sp!, {r4-r11} 176 bx lr 177 178 ENDP 179 180 181; r0 short *diff 182; r1 unsigned char *src 183; r2 int src_stride 184; r3 unsigned char *pred 185; sp int pred_stride 186|vp8_subtract_mby_armv6| PROC 187 188 stmfd sp!, {r4-r11} 189 ldr r12, [sp, #32] ; pred_stride 190 mov r4, #16 191loop 192 ldr r6, [r1] ; src (A) 193 ldr r7, [r3] ; pred (A) 194 195 uxtb16 r8, r6 ; [s2 | s0] (A) 196 uxtb16 r9, r7 ; [p2 | p0] (A) 197 uxtb16 r10, r6, ror #8 ; [s3 | s1] (A) 198 uxtb16 r11, r7, ror #8 ; [p3 | p1] (A) 199 200 usub16 r6, r8, r9 ; [d2 | d0] (A) 201 usub16 r7, r10, r11 ; [d3 | d1] (A) 202 203 ldr r10, [r1, #4] ; src (B) 204 ldr r11, [r3, #4] ; pred (B) 205 206 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A) 207 pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A) 208 209 str r8, [r0], #4 ; diff (A) 210 uxtb16 r8, r10 ; [s2 | s0] (B) 211 str r9, [r0], #4 ; diff (A) 212 213 uxtb16 r9, r11 ; [p2 | p0] (B) 214 uxtb16 r10, r10, ror #8 ; [s3 | s1] (B) 215 uxtb16 r11, r11, ror #8 ; [p3 | p1] (B) 216 217 usub16 r6, r8, r9 ; [d2 | d0] (B) 218 usub16 r7, r10, r11 ; [d3 | d1] (B) 219 220 ldr r10, [r1, #8] ; src (C) 221 ldr r11, [r3, #8] ; pred (C) 222 223 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B) 224 pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B) 225 226 str r8, [r0], #4 ; diff (B) 227 uxtb16 r8, r10 ; [s2 | s0] (C) 228 str r9, [r0], #4 ; diff (B) 229 230 uxtb16 r9, r11 ; [p2 | p0] (C) 231 uxtb16 r10, r10, ror #8 ; [s3 | s1] (C) 232 uxtb16 r11, r11, ror #8 ; [p3 | p1] (C) 233 234 usub16 r6, r8, r9 ; [d2 | d0] (C) 235 usub16 r7, r10, r11 ; [d3 | d1] (C) 236 237 ldr r10, [r1, #12] ; src (D) 238 ldr r11, [r3, #12] ; pred (D) 239 240 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (C) 241 pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (C) 242 243 str r8, [r0], #4 ; diff (C) 244 uxtb16 r8, r10 ; [s2 | s0] (D) 245 str r9, [r0], #4 ; diff (C) 246 247 uxtb16 r9, r11 ; [p2 | p0] (D) 248 uxtb16 r10, r10, ror #8 ; [s3 | s1] (D) 249 uxtb16 r11, r11, ror #8 ; [p3 | p1] (D) 250 251 usub16 r6, r8, r9 ; [d2 | d0] (D) 252 usub16 r7, r10, r11 ; [d3 | d1] (D) 253 254 add r1, r1, r2 ; update src pointer 255 add r3, r3, r12 ; update pred pointer 256 257 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (D) 258 pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (D) 259 260 str r8, [r0], #4 ; diff (D) 261 subs r4, r4, #1 ; update loop counter 262 str r9, [r0], #4 ; diff (D) 263 264 bne loop 265 266 ldmfd sp!, {r4-r11} 267 bx lr 268 269 ENDP 270 271 END 272 273