1; 2; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3; 4; Use of this source code is governed by a BSD-style license 5; that can be found in the LICENSE file in the root of the source 6; tree. An additional intellectual property rights grant can be found 7; in the file PATENTS. All contributing project authors may 8; be found in the AUTHORS file in the root of the source tree. 9; 10 11 12 EXPORT |vp8cx_pack_tokens_into_partitions_armv5| 13 IMPORT |vp8_validate_buffer_arm| 14 15 INCLUDE vp8_asm_enc_offsets.asm 16 17 ARM 18 REQUIRE8 19 PRESERVE8 20 21 AREA |.text|, CODE, READONLY 22 23 ; macro for validating write buffer position 24 ; needs vp8_writer in r0 25 ; start shall not be in r1 26 MACRO 27 VALIDATE_POS $start, $pos 28 push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call 29 ldr r2, [r0, #vp8_writer_buffer_end] 30 ldr r3, [r0, #vp8_writer_error] 31 mov r1, $pos 32 mov r0, $start 33 bl vp8_validate_buffer_arm 34 pop {r0-r3, r12, lr} 35 MEND 36 37; r0 VP8_COMP *cpi 38; r1 unsigned char *cx_data 39; r2 const unsigned char *cx_data_end 40; r3 int num_part 41; s0 vp8_coef_encodings 42; s1 vp8_extra_bits, 43; s2 const vp8_tree_index * 44 45|vp8cx_pack_tokens_into_partitions_armv5| PROC 46 push {r4-r12, lr} 47 sub sp, sp, #40 48 49 ; Compute address of cpi->common.mb_rows 50 ldr r4, _VP8_COMP_common_ 51 ldr r6, _VP8_COMMON_MBrows_ 52 add r4, r0, r4 53 54 ldr r5, [r4, r6] ; load up mb_rows 55 56 str r5, [sp, #36] ; save mb_rows 57 str r1, [sp, #24] ; save ptr = cx_data 58 str r3, [sp, #20] ; save num_part 59 str r2, [sp, #8] ; save cx_data_end 60 61 ldr r4, _VP8_COMP_tplist_ 62 add r4, r0, r4 63 ldr r7, [r4, #0] ; dereference cpi->tp_list 64 str r7, [sp, #32] ; store start of cpi->tp_list 65 66 ldr r11, _VP8_COMP_bc_ ; load up vp8_writer out of cpi 67 add r0, r0, r11 68 69 mov r11, #0 70 str r11, [sp, #28] ; i 71 72numparts_loop 73 ldr r2, _vp8_writer_sz_ ; load up sizeof(vp8_writer) 74 add r0, r2 ; bc[i + 1] 75 76 ldr r10, [sp, #24] ; ptr 77 ldr r5, [sp, #36] ; move mb_rows to the counting section 78 subs r5, r5, r11 ; move start point with each partition 79 ; mb_rows starts at i 80 str r5, [sp, #12] 81 82 ; Reset all of the VP8 Writer data for each partition that 83 ; is processed. 84 ; start_encode 85 86 ldr r3, [sp, #8] 87 str r3, [r0, #vp8_writer_buffer_end] 88 89 mov r2, #0 ; vp8_writer_lowvalue 90 mov r5, #255 ; vp8_writer_range 91 mvn r3, #23 ; vp8_writer_count 92 93 str r2, [r0, #vp8_writer_pos] 94 str r10, [r0, #vp8_writer_buffer] 95 96 ble end_partition ; if (mb_rows <= 0) end partition 97 98mb_row_loop 99 100 ldr r1, [r7, #tokenlist_start] 101 ldr r9, [r7, #tokenlist_stop] 102 str r9, [sp, #0] ; save stop for later comparison 103 str r7, [sp, #16] ; tokenlist address for next time 104 105 b check_p_lt_stop 106 107 ; actual work gets done here! 108 109while_p_lt_stop 110 ldrb r6, [r1, #tokenextra_token] ; t 111 ldr r4, [sp, #80] ; vp8_coef_encodings 112 mov lr, #0 113 add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t 114 ldr r9, [r1, #tokenextra_context_tree] ; pp 115 116 ldrb r7, [r1, #tokenextra_skip_eob_node] 117 118 ldr r6, [r4, #vp8_token_value] ; v 119 ldr r8, [r4, #vp8_token_len] ; n 120 121 ; vp8 specific skip_eob_node 122 cmp r7, #0 123 movne lr, #2 ; i = 2 124 subne r8, r8, #1 ; --n 125 126 rsb r4, r8, #32 ; 32-n 127 ldr r10, [sp, #88] ; vp8_coef_tree 128 129 ; v is kept in r12 during the token pack loop 130 lsl r12, r6, r4 ; r12 = v << 32 - n 131 132; loop start 133token_loop 134 ldrb r4, [r9, lr, asr #1] ; pp [i>>1] 135 sub r7, r5, #1 ; range-1 136 137 ; Decisions are made based on the bit value shifted 138 ; off of v, so set a flag here based on this. 139 ; This value is refered to as "bb" 140 lsls r12, r12, #1 ; bb = v >> n 141 mul r6, r4, r7 ; ((range-1) * pp[i>>1])) 142 143 ; bb can only be 0 or 1. So only execute this statement 144 ; if bb == 1, otherwise it will act like i + 0 145 addcs lr, lr, #1 ; i + bb 146 147 mov r7, #1 148 ldrsb lr, [r10, lr] ; i = vp8_coef_tree[i+bb] 149 add r4, r7, r6, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8) 150 151 addcs r2, r2, r4 ; if (bb) lowvalue += split 152 subcs r4, r5, r4 ; if (bb) range = range-split 153 154 ; Counting the leading zeros is used to normalize range. 155 clz r6, r4 156 sub r6, r6, #24 ; shift 157 158 ; Flag is set on the sum of count. This flag is used later 159 ; to determine if count >= 0 160 adds r3, r3, r6 ; count += shift 161 lsl r5, r4, r6 ; range <<= shift 162 bmi token_count_lt_zero ; if(count >= 0) 163 164 sub r6, r6, r3 ; offset = shift - count 165 sub r4, r6, #1 ; offset-1 166 lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) 167 bpl token_high_bit_not_set 168 169 ldr r4, [r0, #vp8_writer_pos] ; x 170 sub r4, r4, #1 ; x = w->pos-1 171 b token_zero_while_start 172token_zero_while_loop 173 mov r10, #0 174 strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 175 sub r4, r4, #1 ; x-- 176token_zero_while_start 177 cmp r4, #0 178 ldrge r7, [r0, #vp8_writer_buffer] 179 ldrb r11, [r7, r4] 180 cmpge r11, #0xff 181 beq token_zero_while_loop 182 183 ldr r7, [r0, #vp8_writer_buffer] 184 ldrb r10, [r7, r4] ; w->buffer[x] 185 add r10, r10, #1 186 strb r10, [r7, r4] ; w->buffer[x] + 1 187token_high_bit_not_set 188 rsb r4, r6, #24 ; 24-offset 189 ldr r10, [r0, #vp8_writer_buffer] 190 lsr r7, r2, r4 ; lowvalue >> (24-offset) 191 ldr r4, [r0, #vp8_writer_pos] ; w->pos 192 lsl r2, r2, r6 ; lowvalue <<= offset 193 mov r6, r3 ; shift = count 194 add r11, r4, #1 ; w->pos++ 195 bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff 196 str r11, [r0, #vp8_writer_pos] 197 sub r3, r3, #8 ; count -= 8 198 199 VALIDATE_POS r10, r11 ; validate_buffer at pos 200 201 strb r7, [r10, r4] ; w->buffer[w->pos++] 202 203 ; r10 is used earlier in the loop, but r10 is used as 204 ; temp variable here. So after r10 is used, reload 205 ; vp8_coef_tree_dcd into r10 206 ldr r10, [sp, #88] ; vp8_coef_tree 207 208token_count_lt_zero 209 lsl r2, r2, r6 ; lowvalue <<= shift 210 211 subs r8, r8, #1 ; --n 212 bne token_loop 213 214 ldrb r6, [r1, #tokenextra_token] ; t 215 ldr r7, [sp, #84] ; vp8_extra_bits 216 ; Add t * sizeof (vp8_extra_bit_struct) to get the desired 217 ; element. Here vp8_extra_bit_struct == 16 218 add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t 219 220 ldr r4, [r12, #vp8_extra_bit_struct_base_val] 221 cmp r4, #0 222 beq skip_extra_bits 223 224; if( b->base_val) 225 ldr r8, [r12, #vp8_extra_bit_struct_len] ; L 226 ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra 227 cmp r8, #0 ; if( L) 228 beq no_extra_bits 229 230 ldr r9, [r12, #vp8_extra_bit_struct_prob] 231 asr r7, lr, #1 ; v=e>>1 232 233 ldr r10, [r12, #vp8_extra_bit_struct_tree] 234 str r10, [sp, #4] ; b->tree 235 236 rsb r4, r8, #32 237 lsl r12, r7, r4 238 239 mov lr, #0 ; i = 0 240 241extra_bits_loop 242 ldrb r4, [r9, lr, asr #1] ; pp[i>>1] 243 sub r7, r5, #1 ; range-1 244 lsls r12, r12, #1 ; v >> n 245 mul r6, r4, r7 ; (range-1) * pp[i>>1] 246 addcs lr, lr, #1 ; i + bb 247 248 mov r7, #1 249 ldrsb lr, [r10, lr] ; i = b->tree[i+bb] 250 add r4, r7, r6, lsr #8 ; split = 1 + (((range-1) * pp[i>>1]) >> 8) 251 252 addcs r2, r2, r4 ; if (bb) lowvalue += split 253 subcs r4, r5, r4 ; if (bb) range = range-split 254 255 clz r6, r4 256 sub r6, r6, #24 257 258 adds r3, r3, r6 ; count += shift 259 lsl r5, r4, r6 ; range <<= shift 260 bmi extra_count_lt_zero ; if(count >= 0) 261 262 sub r6, r6, r3 ; offset= shift - count 263 sub r4, r6, #1 ; offset-1 264 lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) 265 bpl extra_high_bit_not_set 266 267 ldr r4, [r0, #vp8_writer_pos] ; x 268 sub r4, r4, #1 ; x = w->pos - 1 269 b extra_zero_while_start 270extra_zero_while_loop 271 mov r10, #0 272 strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 273 sub r4, r4, #1 ; x-- 274extra_zero_while_start 275 cmp r4, #0 276 ldrge r7, [r0, #vp8_writer_buffer] 277 ldrb r11, [r7, r4] 278 cmpge r11, #0xff 279 beq extra_zero_while_loop 280 281 ldr r7, [r0, #vp8_writer_buffer] 282 ldrb r10, [r7, r4] 283 add r10, r10, #1 284 strb r10, [r7, r4] 285extra_high_bit_not_set 286 rsb r4, r6, #24 ; 24-offset 287 ldr r10, [r0, #vp8_writer_buffer] 288 lsr r7, r2, r4 ; lowvalue >> (24-offset) 289 ldr r4, [r0, #vp8_writer_pos] 290 lsl r2, r2, r6 ; lowvalue <<= offset 291 mov r6, r3 ; shift = count 292 add r11, r4, #1 ; w->pos++ 293 bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff 294 str r11, [r0, #vp8_writer_pos] 295 sub r3, r3, #8 ; count -= 8 296 297 VALIDATE_POS r10, r11 ; validate_buffer at pos 298 299 strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset)) 300 ldr r10, [sp, #4] ; b->tree 301extra_count_lt_zero 302 lsl r2, r2, r6 303 304 subs r8, r8, #1 ; --n 305 bne extra_bits_loop ; while (n) 306 307no_extra_bits 308 ldr lr, [r1, #4] ; e = p->Extra 309 add r4, r5, #1 ; range + 1 310 tst lr, #1 311 lsr r4, r4, #1 ; split = (range + 1) >> 1 312 addne r2, r2, r4 ; lowvalue += split 313 subne r4, r5, r4 ; range = range-split 314 tst r2, #0x80000000 ; lowvalue & 0x80000000 315 lsl r5, r4, #1 ; range <<= 1 316 beq end_high_bit_not_set 317 318 ldr r4, [r0, #vp8_writer_pos] 319 mov r7, #0 320 sub r4, r4, #1 321 b end_zero_while_start 322end_zero_while_loop 323 strb r7, [r6, r4] 324 sub r4, r4, #1 ; x-- 325end_zero_while_start 326 cmp r4, #0 327 ldrge r6, [r0, #vp8_writer_buffer] 328 ldrb r12, [r6, r4] 329 cmpge r12, #0xff 330 beq end_zero_while_loop 331 332 ldr r6, [r0, #vp8_writer_buffer] 333 ldrb r7, [r6, r4] 334 add r7, r7, #1 335 strb r7, [r6, r4] 336end_high_bit_not_set 337 adds r3, r3, #1 ; ++count 338 lsl r2, r2, #1 ; lowvalue <<= 1 339 bne end_count_zero 340 341 ldr r4, [r0, #vp8_writer_pos] 342 mvn r3, #7 ; count = -8 343 ldr r7, [r0, #vp8_writer_buffer] 344 lsr r6, r2, #24 ; lowvalue >> 24 345 add r12, r4, #1 ; w->pos++ 346 bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff 347 str r12, [r0, #vp8_writer_pos] 348 349 VALIDATE_POS r7, r12 ; validate_buffer at pos 350 351 strb r6, [r7, r4] 352end_count_zero 353skip_extra_bits 354 add r1, r1, #TOKENEXTRA_SZ ; ++p 355check_p_lt_stop 356 ldr r4, [sp, #0] ; stop 357 cmp r1, r4 ; while( p < stop) 358 bcc while_p_lt_stop 359 360 ldr r10, [sp, #20] ; num_parts 361 mov r1, #TOKENLIST_SZ 362 mul r1, r10, r1 363 364 ldr r6, [sp, #12] ; mb_rows 365 ldr r7, [sp, #16] ; tokenlist address 366 subs r6, r6, r10 367 add r7, r7, r1 ; next element in the array 368 str r6, [sp, #12] 369 bgt mb_row_loop 370 371end_partition 372 mov r12, #32 373 374stop_encode_loop 375 sub r7, r5, #1 ; range-1 376 377 mov r4, r7, lsl #7 ; ((range-1) * 128) 378 379 mov r7, #1 380 add r4, r7, r4, lsr #8 ; 1 + (((range-1) * 128) >> 8) 381 382 ; Counting the leading zeros is used to normalize range. 383 clz r6, r4 384 sub r6, r6, #24 ; shift 385 386 ; Flag is set on the sum of count. This flag is used later 387 ; to determine if count >= 0 388 adds r3, r3, r6 ; count += shift 389 lsl r5, r4, r6 ; range <<= shift 390 bmi token_count_lt_zero_se ; if(count >= 0) 391 392 sub r6, r6, r3 ; offset = shift - count 393 sub r4, r6, #1 ; offset-1 394 lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 ) 395 bpl token_high_bit_not_set_se 396 397 ldr r4, [r0, #vp8_writer_pos] ; x 398 sub r4, r4, #1 ; x = w->pos-1 399 b token_zero_while_start_se 400token_zero_while_loop_se 401 mov r10, #0 402 strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0 403 sub r4, r4, #1 ; x-- 404token_zero_while_start_se 405 cmp r4, #0 406 ldrge r7, [r0, #vp8_writer_buffer] 407 ldrb r11, [r7, r4] 408 cmpge r11, #0xff 409 beq token_zero_while_loop_se 410 411 ldr r7, [r0, #vp8_writer_buffer] 412 ldrb r10, [r7, r4] ; w->buffer[x] 413 add r10, r10, #1 414 strb r10, [r7, r4] ; w->buffer[x] + 1 415token_high_bit_not_set_se 416 rsb r4, r6, #24 ; 24-offset 417 ldr r10, [r0, #vp8_writer_buffer] 418 lsr r7, r2, r4 ; lowvalue >> (24-offset) 419 ldr r4, [r0, #vp8_writer_pos] ; w->pos 420 lsl r2, r2, r6 ; lowvalue <<= offset 421 mov r6, r3 ; shift = count 422 add r11, r4, #1 ; w->pos++ 423 bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff 424 str r11, [r0, #vp8_writer_pos] 425 sub r3, r3, #8 ; count -= 8 426 427 VALIDATE_POS r10, r11 ; validate_buffer at pos 428 429 strb r7, [r10, r4] ; w->buffer[w->pos++] 430 431token_count_lt_zero_se 432 lsl r2, r2, r6 ; lowvalue <<= shift 433 434 subs r12, r12, #1 435 bne stop_encode_loop 436 437 ldr r4, [r0, #vp8_writer_pos] ; w->pos 438 ldr r12, [sp, #24] ; ptr 439 add r12, r12, r4 ; ptr += w->pos 440 str r12, [sp, #24] 441 442 ldr r11, [sp, #28] ; i 443 ldr r10, [sp, #20] ; num_parts 444 445 add r11, r11, #1 ; i++ 446 str r11, [sp, #28] 447 448 ldr r7, [sp, #32] ; cpi->tp_list[i] 449 mov r1, #TOKENLIST_SZ 450 add r7, r7, r1 ; next element in cpi->tp_list 451 str r7, [sp, #32] ; cpi->tp_list[i+1] 452 453 cmp r10, r11 454 bgt numparts_loop 455 456 add sp, sp, #40 457 pop {r4-r12, pc} 458 ENDP 459 460_VP8_COMP_common_ 461 DCD vp8_comp_common 462_VP8_COMMON_MBrows_ 463 DCD vp8_common_mb_rows 464_VP8_COMP_tplist_ 465 DCD vp8_comp_tplist 466_VP8_COMP_bc_ 467 DCD vp8_comp_bc 468_vp8_writer_sz_ 469 DCD vp8_writer_sz 470 471 END 472