1233d2500723e5594f3e7c70896ffeeef32b9c950ywan; 2233d2500723e5594f3e7c70896ffeeef32b9c950ywan; Copyright (c) 2011 The WebM project authors. All Rights Reserved. 3233d2500723e5594f3e7c70896ffeeef32b9c950ywan; 4233d2500723e5594f3e7c70896ffeeef32b9c950ywan; Use of this source code is governed by a BSD-style license 5233d2500723e5594f3e7c70896ffeeef32b9c950ywan; that can be found in the LICENSE file in the root of the source 6233d2500723e5594f3e7c70896ffeeef32b9c950ywan; tree. An additional intellectual property rights grant can be found 7233d2500723e5594f3e7c70896ffeeef32b9c950ywan; in the file PATENTS. All contributing project authors may 8233d2500723e5594f3e7c70896ffeeef32b9c950ywan; be found in the AUTHORS file in the root of the source tree. 9233d2500723e5594f3e7c70896ffeeef32b9c950ywan; 10233d2500723e5594f3e7c70896ffeeef32b9c950ywan 11233d2500723e5594f3e7c70896ffeeef32b9c950ywan 12233d2500723e5594f3e7c70896ffeeef32b9c950ywan EXPORT |vp8_fast_quantize_b_armv6| 13233d2500723e5594f3e7c70896ffeeef32b9c950ywan 14233d2500723e5594f3e7c70896ffeeef32b9c950ywan INCLUDE vp8_asm_enc_offsets.asm 15233d2500723e5594f3e7c70896ffeeef32b9c950ywan 16233d2500723e5594f3e7c70896ffeeef32b9c950ywan ARM 17233d2500723e5594f3e7c70896ffeeef32b9c950ywan REQUIRE8 18233d2500723e5594f3e7c70896ffeeef32b9c950ywan PRESERVE8 19233d2500723e5594f3e7c70896ffeeef32b9c950ywan 20233d2500723e5594f3e7c70896ffeeef32b9c950ywan AREA ||.text||, CODE, READONLY, ALIGN=2 21233d2500723e5594f3e7c70896ffeeef32b9c950ywan 22233d2500723e5594f3e7c70896ffeeef32b9c950ywan; r0 BLOCK *b 23233d2500723e5594f3e7c70896ffeeef32b9c950ywan; r1 BLOCKD *d 24233d2500723e5594f3e7c70896ffeeef32b9c950ywan|vp8_fast_quantize_b_armv6| PROC 25233d2500723e5594f3e7c70896ffeeef32b9c950ywan stmfd sp!, {r1, r4-r11, lr} 26233d2500723e5594f3e7c70896ffeeef32b9c950ywan 27233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r3, [r0, #vp8_block_coeff] ; coeff 28233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r4, [r0, #vp8_block_quant_fast] ; quant_fast 29233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r5, [r0, #vp8_block_round] ; round 30233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r6, [r1, #vp8_blockd_qcoeff] ; qcoeff 31233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r7, [r1, #vp8_blockd_dqcoeff] ; dqcoeff 32233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r8, [r1, #vp8_blockd_dequant] ; dequant 33233d2500723e5594f3e7c70896ffeeef32b9c950ywan 34233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r2, loop_count ; loop_count=0x1000000. 'lsls' instruction 35233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; is used to update the counter so that 36233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; it can be used to mark nonzero 37233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; quantized coefficient pairs. 38233d2500723e5594f3e7c70896ffeeef32b9c950ywan 39233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov r1, #0 ; flags for quantized coeffs 40233d2500723e5594f3e7c70896ffeeef32b9c950ywan 41233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; PART 1: quantization and dequantization loop 42233d2500723e5594f3e7c70896ffeeef32b9c950ywanloop 43233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r9, [r3], #4 ; [z1 | z0] 44233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r10, [r5], #4 ; [r1 | r0] 45233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r11, [r4], #4 ; [q1 | q0] 46233d2500723e5594f3e7c70896ffeeef32b9c950ywan 47233d2500723e5594f3e7c70896ffeeef32b9c950ywan ssat16 lr, #1, r9 ; [sz1 | sz0] 48233d2500723e5594f3e7c70896ffeeef32b9c950ywan eor r9, r9, lr ; [z1 ^ sz1 | z0 ^ sz0] 49233d2500723e5594f3e7c70896ffeeef32b9c950ywan ssub16 r9, r9, lr ; x = (z ^ sz) - sz 50233d2500723e5594f3e7c70896ffeeef32b9c950ywan sadd16 r9, r9, r10 ; [x1+r1 | x0+r0] 51233d2500723e5594f3e7c70896ffeeef32b9c950ywan 52233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r12, [r3], #4 ; [z3 | z2] 53233d2500723e5594f3e7c70896ffeeef32b9c950ywan 54233d2500723e5594f3e7c70896ffeeef32b9c950ywan smulbb r0, r9, r11 ; [(x0+r0)*q0] 55233d2500723e5594f3e7c70896ffeeef32b9c950ywan smultt r9, r9, r11 ; [(x1+r1)*q1] 56233d2500723e5594f3e7c70896ffeeef32b9c950ywan 57233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r10, [r5], #4 ; [r3 | r2] 58233d2500723e5594f3e7c70896ffeeef32b9c950ywan 59233d2500723e5594f3e7c70896ffeeef32b9c950ywan ssat16 r11, #1, r12 ; [sz3 | sz2] 60233d2500723e5594f3e7c70896ffeeef32b9c950ywan eor r12, r12, r11 ; [z3 ^ sz3 | z2 ^ sz2] 61233d2500723e5594f3e7c70896ffeeef32b9c950ywan pkhtb r0, r9, r0, asr #16 ; [y1 | y0] 62233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r9, [r4], #4 ; [q3 | q2] 63233d2500723e5594f3e7c70896ffeeef32b9c950ywan ssub16 r12, r12, r11 ; x = (z ^ sz) - sz 64233d2500723e5594f3e7c70896ffeeef32b9c950ywan 65233d2500723e5594f3e7c70896ffeeef32b9c950ywan sadd16 r12, r12, r10 ; [x3+r3 | x2+r2] 66233d2500723e5594f3e7c70896ffeeef32b9c950ywan 67233d2500723e5594f3e7c70896ffeeef32b9c950ywan eor r0, r0, lr ; [(y1 ^ sz1) | (y0 ^ sz0)] 68233d2500723e5594f3e7c70896ffeeef32b9c950ywan 69233d2500723e5594f3e7c70896ffeeef32b9c950ywan smulbb r10, r12, r9 ; [(x2+r2)*q2] 70233d2500723e5594f3e7c70896ffeeef32b9c950ywan smultt r12, r12, r9 ; [(x3+r3)*q3] 71233d2500723e5594f3e7c70896ffeeef32b9c950ywan 72233d2500723e5594f3e7c70896ffeeef32b9c950ywan ssub16 r0, r0, lr ; x = (y ^ sz) - sz 73233d2500723e5594f3e7c70896ffeeef32b9c950ywan 74233d2500723e5594f3e7c70896ffeeef32b9c950ywan cmp r0, #0 ; check if zero 75233d2500723e5594f3e7c70896ffeeef32b9c950ywan orrne r1, r1, r2, lsr #24 ; add flag for nonzero coeffs 76233d2500723e5594f3e7c70896ffeeef32b9c950ywan 77233d2500723e5594f3e7c70896ffeeef32b9c950ywan str r0, [r6], #4 ; *qcoeff++ = x 78233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r9, [r8], #4 ; [dq1 | dq0] 79233d2500723e5594f3e7c70896ffeeef32b9c950ywan 80233d2500723e5594f3e7c70896ffeeef32b9c950ywan pkhtb r10, r12, r10, asr #16 ; [y3 | y2] 81233d2500723e5594f3e7c70896ffeeef32b9c950ywan eor r10, r10, r11 ; [(y3 ^ sz3) | (y2 ^ sz2)] 82233d2500723e5594f3e7c70896ffeeef32b9c950ywan ssub16 r10, r10, r11 ; x = (y ^ sz) - sz 83233d2500723e5594f3e7c70896ffeeef32b9c950ywan 84233d2500723e5594f3e7c70896ffeeef32b9c950ywan cmp r10, #0 ; check if zero 85233d2500723e5594f3e7c70896ffeeef32b9c950ywan orrne r1, r1, r2, lsr #23 ; add flag for nonzero coeffs 86233d2500723e5594f3e7c70896ffeeef32b9c950ywan 87233d2500723e5594f3e7c70896ffeeef32b9c950ywan str r10, [r6], #4 ; *qcoeff++ = x 88233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r11, [r8], #4 ; [dq3 | dq2] 89233d2500723e5594f3e7c70896ffeeef32b9c950ywan 90233d2500723e5594f3e7c70896ffeeef32b9c950ywan smulbb r12, r0, r9 ; [x0*dq0] 91233d2500723e5594f3e7c70896ffeeef32b9c950ywan smultt r0, r0, r9 ; [x1*dq1] 92233d2500723e5594f3e7c70896ffeeef32b9c950ywan 93233d2500723e5594f3e7c70896ffeeef32b9c950ywan smulbb r9, r10, r11 ; [x2*dq2] 94233d2500723e5594f3e7c70896ffeeef32b9c950ywan smultt r10, r10, r11 ; [x3*dq3] 95233d2500723e5594f3e7c70896ffeeef32b9c950ywan 96233d2500723e5594f3e7c70896ffeeef32b9c950ywan lsls r2, r2, #2 ; update loop counter 97233d2500723e5594f3e7c70896ffeeef32b9c950ywan strh r12, [r7, #0] ; dqcoeff[0] = [x0*dq0] 98233d2500723e5594f3e7c70896ffeeef32b9c950ywan strh r0, [r7, #2] ; dqcoeff[1] = [x1*dq1] 99233d2500723e5594f3e7c70896ffeeef32b9c950ywan strh r9, [r7, #4] ; dqcoeff[2] = [x2*dq2] 100233d2500723e5594f3e7c70896ffeeef32b9c950ywan strh r10, [r7, #6] ; dqcoeff[3] = [x3*dq3] 101233d2500723e5594f3e7c70896ffeeef32b9c950ywan add r7, r7, #8 ; dqcoeff += 8 102233d2500723e5594f3e7c70896ffeeef32b9c950ywan bne loop 103233d2500723e5594f3e7c70896ffeeef32b9c950ywan 104233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; PART 2: check position for eob... 105233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r11, [sp, #0] ; restore BLOCKD pointer 106233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov lr, #0 ; init eob 107233d2500723e5594f3e7c70896ffeeef32b9c950ywan cmp r1, #0 ; coeffs after quantization? 108233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r12, [r11, #vp8_blockd_eob] 109233d2500723e5594f3e7c70896ffeeef32b9c950ywan beq end ; skip eob calculations if all zero 110233d2500723e5594f3e7c70896ffeeef32b9c950ywan 111233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r0, [r11, #vp8_blockd_qcoeff] 112233d2500723e5594f3e7c70896ffeeef32b9c950ywan 113233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; check shortcut for nonzero qcoeffs 114233d2500723e5594f3e7c70896ffeeef32b9c950ywan tst r1, #0x80 115233d2500723e5594f3e7c70896ffeeef32b9c950ywan bne quant_coeff_15_14 116233d2500723e5594f3e7c70896ffeeef32b9c950ywan tst r1, #0x20 117233d2500723e5594f3e7c70896ffeeef32b9c950ywan bne quant_coeff_13_11 118233d2500723e5594f3e7c70896ffeeef32b9c950ywan tst r1, #0x8 119233d2500723e5594f3e7c70896ffeeef32b9c950ywan bne quant_coeff_12_7 120233d2500723e5594f3e7c70896ffeeef32b9c950ywan tst r1, #0x40 121233d2500723e5594f3e7c70896ffeeef32b9c950ywan bne quant_coeff_10_9 122233d2500723e5594f3e7c70896ffeeef32b9c950ywan tst r1, #0x10 123233d2500723e5594f3e7c70896ffeeef32b9c950ywan bne quant_coeff_8_3 124233d2500723e5594f3e7c70896ffeeef32b9c950ywan tst r1, #0x2 125233d2500723e5594f3e7c70896ffeeef32b9c950ywan bne quant_coeff_6_5 126233d2500723e5594f3e7c70896ffeeef32b9c950ywan tst r1, #0x4 127233d2500723e5594f3e7c70896ffeeef32b9c950ywan bne quant_coeff_4_2 128233d2500723e5594f3e7c70896ffeeef32b9c950ywan b quant_coeff_1_0 129233d2500723e5594f3e7c70896ffeeef32b9c950ywan 130233d2500723e5594f3e7c70896ffeeef32b9c950ywanquant_coeff_15_14 131233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrh r2, [r0, #30] ; rc=15, i=15 132233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov lr, #16 133233d2500723e5594f3e7c70896ffeeef32b9c950ywan cmp r2, #0 134233d2500723e5594f3e7c70896ffeeef32b9c950ywan bne end 135233d2500723e5594f3e7c70896ffeeef32b9c950ywan 136233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrh r3, [r0, #28] ; rc=14, i=14 137233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov lr, #15 138233d2500723e5594f3e7c70896ffeeef32b9c950ywan cmp r3, #0 139233d2500723e5594f3e7c70896ffeeef32b9c950ywan bne end 140233d2500723e5594f3e7c70896ffeeef32b9c950ywan 141233d2500723e5594f3e7c70896ffeeef32b9c950ywanquant_coeff_13_11 142233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrh r2, [r0, #22] ; rc=11, i=13 143233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov lr, #14 144233d2500723e5594f3e7c70896ffeeef32b9c950ywan cmp r2, #0 145233d2500723e5594f3e7c70896ffeeef32b9c950ywan bne end 146233d2500723e5594f3e7c70896ffeeef32b9c950ywan 147233d2500723e5594f3e7c70896ffeeef32b9c950ywanquant_coeff_12_7 148233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrh r3, [r0, #14] ; rc=7, i=12 149233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov lr, #13 150233d2500723e5594f3e7c70896ffeeef32b9c950ywan cmp r3, #0 151233d2500723e5594f3e7c70896ffeeef32b9c950ywan bne end 152233d2500723e5594f3e7c70896ffeeef32b9c950ywan 153233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrh r2, [r0, #20] ; rc=10, i=11 154233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov lr, #12 155233d2500723e5594f3e7c70896ffeeef32b9c950ywan cmp r2, #0 156233d2500723e5594f3e7c70896ffeeef32b9c950ywan bne end 157233d2500723e5594f3e7c70896ffeeef32b9c950ywan 158233d2500723e5594f3e7c70896ffeeef32b9c950ywanquant_coeff_10_9 159233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrh r3, [r0, #26] ; rc=13, i=10 160233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov lr, #11 161233d2500723e5594f3e7c70896ffeeef32b9c950ywan cmp r3, #0 162233d2500723e5594f3e7c70896ffeeef32b9c950ywan bne end 163233d2500723e5594f3e7c70896ffeeef32b9c950ywan 164233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrh r2, [r0, #24] ; rc=12, i=9 165233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov lr, #10 166233d2500723e5594f3e7c70896ffeeef32b9c950ywan cmp r2, #0 167233d2500723e5594f3e7c70896ffeeef32b9c950ywan bne end 168233d2500723e5594f3e7c70896ffeeef32b9c950ywan 169233d2500723e5594f3e7c70896ffeeef32b9c950ywanquant_coeff_8_3 170233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrh r3, [r0, #18] ; rc=9, i=8 171233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov lr, #9 172233d2500723e5594f3e7c70896ffeeef32b9c950ywan cmp r3, #0 173233d2500723e5594f3e7c70896ffeeef32b9c950ywan bne end 174233d2500723e5594f3e7c70896ffeeef32b9c950ywan 175233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrh r2, [r0, #12] ; rc=6, i=7 176233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov lr, #8 177233d2500723e5594f3e7c70896ffeeef32b9c950ywan cmp r2, #0 178233d2500723e5594f3e7c70896ffeeef32b9c950ywan bne end 179233d2500723e5594f3e7c70896ffeeef32b9c950ywan 180233d2500723e5594f3e7c70896ffeeef32b9c950ywanquant_coeff_6_5 181233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrh r3, [r0, #6] ; rc=3, i=6 182233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov lr, #7 183233d2500723e5594f3e7c70896ffeeef32b9c950ywan cmp r3, #0 184233d2500723e5594f3e7c70896ffeeef32b9c950ywan bne end 185233d2500723e5594f3e7c70896ffeeef32b9c950ywan 186233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrh r2, [r0, #4] ; rc=2, i=5 187233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov lr, #6 188233d2500723e5594f3e7c70896ffeeef32b9c950ywan cmp r2, #0 189233d2500723e5594f3e7c70896ffeeef32b9c950ywan bne end 190233d2500723e5594f3e7c70896ffeeef32b9c950ywan 191233d2500723e5594f3e7c70896ffeeef32b9c950ywanquant_coeff_4_2 192233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrh r3, [r0, #10] ; rc=5, i=4 193233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov lr, #5 194233d2500723e5594f3e7c70896ffeeef32b9c950ywan cmp r3, #0 195233d2500723e5594f3e7c70896ffeeef32b9c950ywan bne end 196233d2500723e5594f3e7c70896ffeeef32b9c950ywan 197233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrh r2, [r0, #16] ; rc=8, i=3 198233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov lr, #4 199233d2500723e5594f3e7c70896ffeeef32b9c950ywan cmp r2, #0 200233d2500723e5594f3e7c70896ffeeef32b9c950ywan bne end 201233d2500723e5594f3e7c70896ffeeef32b9c950ywan 202233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrh r3, [r0, #8] ; rc=4, i=2 203233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov lr, #3 204233d2500723e5594f3e7c70896ffeeef32b9c950ywan cmp r3, #0 205233d2500723e5594f3e7c70896ffeeef32b9c950ywan bne end 206233d2500723e5594f3e7c70896ffeeef32b9c950ywan 207233d2500723e5594f3e7c70896ffeeef32b9c950ywanquant_coeff_1_0 208233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrh r2, [r0, #2] ; rc=1, i=1 209233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov lr, #2 210233d2500723e5594f3e7c70896ffeeef32b9c950ywan cmp r2, #0 211233d2500723e5594f3e7c70896ffeeef32b9c950ywan bne end 212233d2500723e5594f3e7c70896ffeeef32b9c950ywan 213233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov lr, #1 ; rc=0, i=0 214233d2500723e5594f3e7c70896ffeeef32b9c950ywan 215233d2500723e5594f3e7c70896ffeeef32b9c950ywanend 216233d2500723e5594f3e7c70896ffeeef32b9c950ywan strb lr, [r12] 217233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldmfd sp!, {r1, r4-r11, pc} 218233d2500723e5594f3e7c70896ffeeef32b9c950ywan 219233d2500723e5594f3e7c70896ffeeef32b9c950ywan ENDP 220233d2500723e5594f3e7c70896ffeeef32b9c950ywan 221233d2500723e5594f3e7c70896ffeeef32b9c950ywanloop_count 222233d2500723e5594f3e7c70896ffeeef32b9c950ywan DCD 0x1000000 223233d2500723e5594f3e7c70896ffeeef32b9c950ywan 224233d2500723e5594f3e7c70896ffeeef32b9c950ywan END 225233d2500723e5594f3e7c70896ffeeef32b9c950ywan 226