1;
2;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3;
4;  Use of this source code is governed by a BSD-style license
5;  that can be found in the LICENSE file in the root of the source
6;  tree. An additional intellectual property rights grant can be found
7;  in the file PATENTS.  All contributing project authors may
8;  be found in the AUTHORS file in the root of the source tree.
9;
10
11
12    EXPORT |vp8cx_pack_tokens_into_partitions_armv5|
13    IMPORT |vp8_validate_buffer_arm|
14
15    INCLUDE vp8_asm_enc_offsets.asm
16
17    ARM
18    REQUIRE8
19    PRESERVE8
20
21    AREA    |.text|, CODE, READONLY
22
23    ; macro for validating write buffer position
24    ; needs vp8_writer in r0
25    ; start shall not be in r1
26    MACRO
27    VALIDATE_POS $start, $pos
28    push {r0-r3, r12, lr}        ; rest of regs are preserved by subroutine call
29    ldr  r2, [r0, #vp8_writer_buffer_end]
30    ldr  r3, [r0, #vp8_writer_error]
31    mov  r1, $pos
32    mov  r0, $start
33    bl   vp8_validate_buffer_arm
34    pop  {r0-r3, r12, lr}
35    MEND
36
37; r0 VP8_COMP *cpi
38; r1 unsigned char *cx_data
39; r2 const unsigned char *cx_data_end
40; r3 int num_part
41; s0 vp8_coef_encodings
42; s1 vp8_extra_bits,
43; s2 const vp8_tree_index *
44
45|vp8cx_pack_tokens_into_partitions_armv5| PROC
46    push    {r4-r12, lr}
47    sub     sp, sp, #40
48
49    ; Compute address of cpi->common.mb_rows
50    ldr     r4, _VP8_COMP_common_
51    ldr     r6, _VP8_COMMON_MBrows_
52    add     r4, r0, r4
53
54    ldr     r5, [r4, r6]                ; load up mb_rows
55
56    str     r5, [sp, #36]               ; save mb_rows
57    str     r1, [sp, #24]               ; save ptr = cx_data
58    str     r3, [sp, #20]               ; save num_part
59    str     r2, [sp, #8]                ; save cx_data_end
60
61    ldr     r4, _VP8_COMP_tplist_
62    add     r4, r0, r4
63    ldr     r7, [r4, #0]                ; dereference cpi->tp_list
64    str     r7, [sp, #32]               ; store start of cpi->tp_list
65
66    ldr     r11, _VP8_COMP_bc_          ; load up vp8_writer out of cpi
67    add     r0, r0, r11
68
69    mov     r11, #0
70    str     r11, [sp, #28]              ; i
71
72numparts_loop
73    ldr     r2, _vp8_writer_sz_         ; load up sizeof(vp8_writer)
74    add     r0, r2                      ; bc[i + 1]
75
76    ldr     r10, [sp, #24]              ; ptr
77    ldr     r5,  [sp, #36]              ; move mb_rows to the counting section
78    subs    r5, r5, r11                 ; move start point with each partition
79                                        ; mb_rows starts at i
80    str     r5,  [sp, #12]
81
82    ; Reset all of the VP8 Writer data for each partition that
83    ; is processed.
84    ; start_encode
85
86    ldr     r3, [sp, #8]
87    str     r3, [r0, #vp8_writer_buffer_end]
88
89    mov     r2, #0                      ; vp8_writer_lowvalue
90    mov     r5, #255                    ; vp8_writer_range
91    mvn     r3, #23                     ; vp8_writer_count
92
93    str     r2,  [r0, #vp8_writer_pos]
94    str     r10, [r0, #vp8_writer_buffer]
95
96    ble     end_partition               ; if (mb_rows <= 0) end partition
97
98mb_row_loop
99
100    ldr     r1, [r7, #tokenlist_start]
101    ldr     r9, [r7, #tokenlist_stop]
102    str     r9, [sp, #0]                ; save stop for later comparison
103    str     r7, [sp, #16]               ; tokenlist address for next time
104
105    b       check_p_lt_stop
106
107    ; actual work gets done here!
108
109while_p_lt_stop
110    ldrb    r6, [r1, #tokenextra_token] ; t
111    ldr     r4, [sp, #80]               ; vp8_coef_encodings
112    mov     lr, #0
113    add     r4, r4, r6, lsl #3          ; a = vp8_coef_encodings + t
114    ldr     r9, [r1, #tokenextra_context_tree]   ; pp
115
116    ldrb    r7, [r1, #tokenextra_skip_eob_node]
117
118    ldr     r6, [r4, #vp8_token_value]  ; v
119    ldr     r8, [r4, #vp8_token_len]    ; n
120
121    ; vp8 specific skip_eob_node
122    cmp     r7, #0
123    movne   lr, #2                      ; i = 2
124    subne   r8, r8, #1                  ; --n
125
126    rsb     r4, r8, #32                 ; 32-n
127    ldr     r10, [sp, #88]              ; vp8_coef_tree
128
129    ; v is kept in r12 during the token pack loop
130    lsl     r12, r6, r4                ; r12 = v << 32 - n
131
132; loop start
133token_loop
134    ldrb    r4, [r9, lr, asr #1]        ; pp [i>>1]
135    sub     r7, r5, #1                  ; range-1
136
137    ; Decisions are made based on the bit value shifted
138    ; off of v, so set a flag here based on this.
139    ; This value is refered to as "bb"
140    lsls    r12, r12, #1                ; bb = v >> n
141    mul     r6, r4, r7                  ; ((range-1) * pp[i>>1]))
142
143    ; bb can only be 0 or 1.  So only execute this statement
144    ; if bb == 1, otherwise it will act like i + 0
145    addcs   lr, lr, #1                  ; i + bb
146
147    mov     r7, #1
148    ldrsb   lr, [r10, lr]               ; i = vp8_coef_tree[i+bb]
149    add     r4, r7, r6, lsr #8          ; 1 + (((range-1) * pp[i>>1]) >> 8)
150
151    addcs   r2, r2, r4                  ; if  (bb) lowvalue += split
152    subcs   r4, r5, r4                  ; if  (bb) range = range-split
153
154    ; Counting the leading zeros is used to normalize range.
155    clz     r6, r4
156    sub     r6, r6, #24                 ; shift
157
158    ; Flag is set on the sum of count.  This flag is used later
159    ; to determine if count >= 0
160    adds    r3, r3, r6                  ; count += shift
161    lsl     r5, r4, r6                  ; range <<= shift
162    bmi     token_count_lt_zero         ; if(count >= 0)
163
164    sub     r6, r6, r3                  ; offset = shift - count
165    sub     r4, r6, #1                  ; offset-1
166    lsls    r4, r2, r4                  ; if((lowvalue<<(offset-1)) & 0x80000000 )
167    bpl     token_high_bit_not_set
168
169    ldr     r4, [r0, #vp8_writer_pos]   ; x
170    sub     r4, r4, #1                  ; x = w->pos-1
171    b       token_zero_while_start
172token_zero_while_loop
173    mov     r10, #0
174    strb    r10, [r7, r4]               ; w->buffer[x] =(unsigned char)0
175    sub     r4, r4, #1                  ; x--
176token_zero_while_start
177    cmp     r4, #0
178    ldrge   r7, [r0, #vp8_writer_buffer]
179    ldrb    r11, [r7, r4]
180    cmpge   r11, #0xff
181    beq     token_zero_while_loop
182
183    ldr     r7, [r0, #vp8_writer_buffer]
184    ldrb    r10, [r7, r4]               ; w->buffer[x]
185    add     r10, r10, #1
186    strb    r10, [r7, r4]               ; w->buffer[x] + 1
187token_high_bit_not_set
188    rsb     r4, r6, #24                 ; 24-offset
189    ldr     r10, [r0, #vp8_writer_buffer]
190    lsr     r7, r2, r4                  ; lowvalue >> (24-offset)
191    ldr     r4, [r0, #vp8_writer_pos]   ; w->pos
192    lsl     r2, r2, r6                  ; lowvalue <<= offset
193    mov     r6, r3                      ; shift = count
194    add     r11, r4, #1                 ; w->pos++
195    bic     r2, r2, #0xff000000         ; lowvalue &= 0xffffff
196    str     r11, [r0, #vp8_writer_pos]
197    sub     r3, r3, #8                  ; count -= 8
198
199    VALIDATE_POS r10, r11               ; validate_buffer at pos
200
201    strb    r7, [r10, r4]               ; w->buffer[w->pos++]
202
203    ; r10 is used earlier in the loop, but r10 is used as
204    ; temp variable here.  So after r10 is used, reload
205    ; vp8_coef_tree_dcd into r10
206    ldr     r10, [sp, #88]              ; vp8_coef_tree
207
208token_count_lt_zero
209    lsl     r2, r2, r6                  ; lowvalue <<= shift
210
211    subs    r8, r8, #1                  ; --n
212    bne     token_loop
213
214    ldrb    r6, [r1, #tokenextra_token] ; t
215    ldr     r7, [sp, #84]                ; vp8_extra_bits
216    ; Add t * sizeof (vp8_extra_bit_struct) to get the desired
217    ;  element.  Here vp8_extra_bit_struct == 16
218    add     r12, r7, r6, lsl #4         ; b = vp8_extra_bits + t
219
220    ldr     r4, [r12, #vp8_extra_bit_struct_base_val]
221    cmp     r4, #0
222    beq     skip_extra_bits
223
224;   if( b->base_val)
225    ldr     r8, [r12, #vp8_extra_bit_struct_len] ; L
226    ldrsh   lr, [r1, #tokenextra_extra] ; e = p->Extra
227    cmp     r8, #0                      ; if( L)
228    beq     no_extra_bits
229
230    ldr     r9, [r12, #vp8_extra_bit_struct_prob]
231    asr     r7, lr, #1                  ; v=e>>1
232
233    ldr     r10, [r12, #vp8_extra_bit_struct_tree]
234    str     r10, [sp, #4]               ; b->tree
235
236    rsb     r4, r8, #32
237    lsl     r12, r7, r4
238
239    mov     lr, #0                      ; i = 0
240
241extra_bits_loop
242    ldrb    r4, [r9, lr, asr #1]        ; pp[i>>1]
243    sub     r7, r5, #1                  ; range-1
244    lsls    r12, r12, #1                ; v >> n
245    mul     r6, r4, r7                  ; (range-1) * pp[i>>1]
246    addcs   lr, lr, #1                  ; i + bb
247
248    mov     r7, #1
249    ldrsb   lr, [r10, lr]               ; i = b->tree[i+bb]
250    add     r4, r7, r6, lsr #8          ; split = 1 +  (((range-1) * pp[i>>1]) >> 8)
251
252    addcs   r2, r2, r4                  ; if  (bb) lowvalue += split
253    subcs   r4, r5, r4                  ; if  (bb) range = range-split
254
255    clz     r6, r4
256    sub     r6, r6, #24
257
258    adds    r3, r3, r6                  ; count += shift
259    lsl     r5, r4, r6                  ; range <<= shift
260    bmi     extra_count_lt_zero         ; if(count >= 0)
261
262    sub     r6, r6, r3                  ; offset= shift - count
263    sub     r4, r6, #1                  ; offset-1
264    lsls    r4, r2, r4                  ; if((lowvalue<<(offset-1)) & 0x80000000 )
265    bpl     extra_high_bit_not_set
266
267    ldr     r4, [r0, #vp8_writer_pos]   ; x
268    sub     r4, r4, #1                  ; x = w->pos - 1
269    b       extra_zero_while_start
270extra_zero_while_loop
271    mov     r10, #0
272    strb    r10, [r7, r4]               ; w->buffer[x] =(unsigned char)0
273    sub     r4, r4, #1                  ; x--
274extra_zero_while_start
275    cmp     r4, #0
276    ldrge   r7, [r0, #vp8_writer_buffer]
277    ldrb    r11, [r7, r4]
278    cmpge   r11, #0xff
279    beq     extra_zero_while_loop
280
281    ldr     r7, [r0, #vp8_writer_buffer]
282    ldrb    r10, [r7, r4]
283    add     r10, r10, #1
284    strb    r10, [r7, r4]
285extra_high_bit_not_set
286    rsb     r4, r6, #24                 ; 24-offset
287    ldr     r10, [r0, #vp8_writer_buffer]
288    lsr     r7, r2, r4                  ; lowvalue >> (24-offset)
289    ldr     r4, [r0, #vp8_writer_pos]
290    lsl     r2, r2, r6                  ; lowvalue <<= offset
291    mov     r6, r3                      ; shift = count
292    add     r11, r4, #1                 ; w->pos++
293    bic     r2, r2, #0xff000000         ; lowvalue &= 0xffffff
294    str     r11, [r0, #vp8_writer_pos]
295    sub     r3, r3, #8                  ; count -= 8
296
297    VALIDATE_POS r10, r11               ; validate_buffer at pos
298
299    strb    r7, [r10, r4]               ; w->buffer[w->pos++]=(lowvalue >> (24-offset))
300    ldr     r10, [sp, #4]               ; b->tree
301extra_count_lt_zero
302    lsl     r2, r2, r6
303
304    subs    r8, r8, #1                  ; --n
305    bne     extra_bits_loop             ; while (n)
306
307no_extra_bits
308    ldr     lr, [r1, #4]                ; e = p->Extra
309    add     r4, r5, #1                  ; range + 1
310    tst     lr, #1
311    lsr     r4, r4, #1                  ; split = (range + 1) >> 1
312    addne   r2, r2, r4                  ; lowvalue += split
313    subne   r4, r5, r4                  ; range = range-split
314    tst     r2, #0x80000000             ; lowvalue & 0x80000000
315    lsl     r5, r4, #1                  ; range <<= 1
316    beq     end_high_bit_not_set
317
318    ldr     r4, [r0, #vp8_writer_pos]
319    mov     r7, #0
320    sub     r4, r4, #1
321    b       end_zero_while_start
322end_zero_while_loop
323    strb    r7, [r6, r4]
324    sub     r4, r4, #1                  ; x--
325end_zero_while_start
326    cmp     r4, #0
327    ldrge   r6, [r0, #vp8_writer_buffer]
328    ldrb    r12, [r6, r4]
329    cmpge   r12, #0xff
330    beq     end_zero_while_loop
331
332    ldr     r6, [r0, #vp8_writer_buffer]
333    ldrb    r7, [r6, r4]
334    add     r7, r7, #1
335    strb    r7, [r6, r4]
336end_high_bit_not_set
337    adds    r3, r3, #1                  ; ++count
338    lsl     r2, r2, #1                  ; lowvalue  <<= 1
339    bne     end_count_zero
340
341    ldr     r4, [r0, #vp8_writer_pos]
342    mvn     r3, #7                      ; count = -8
343    ldr     r7, [r0, #vp8_writer_buffer]
344    lsr     r6, r2, #24                 ; lowvalue >> 24
345    add     r12, r4, #1                 ; w->pos++
346    bic     r2, r2, #0xff000000         ; lowvalue &= 0xffffff
347    str     r12, [r0, #vp8_writer_pos]
348
349    VALIDATE_POS r7, r12                ; validate_buffer at pos
350
351    strb    r6, [r7, r4]
352end_count_zero
353skip_extra_bits
354    add     r1, r1, #TOKENEXTRA_SZ      ; ++p
355check_p_lt_stop
356    ldr     r4, [sp, #0]                ; stop
357    cmp     r1, r4                      ; while( p < stop)
358    bcc     while_p_lt_stop
359
360    ldr     r10, [sp, #20]              ; num_parts
361    mov     r1, #TOKENLIST_SZ
362    mul     r1, r10, r1
363
364    ldr     r6, [sp, #12]               ; mb_rows
365    ldr     r7, [sp, #16]               ; tokenlist address
366    subs    r6, r6, r10
367    add     r7, r7, r1                  ; next element in the array
368    str     r6, [sp, #12]
369    bgt     mb_row_loop
370
371end_partition
372    mov     r12, #32
373
374stop_encode_loop
375    sub     r7, r5, #1                  ; range-1
376
377    mov     r4, r7, lsl #7              ; ((range-1) * 128)
378
379    mov     r7, #1
380    add     r4, r7, r4, lsr #8          ; 1 + (((range-1) * 128) >> 8)
381
382    ; Counting the leading zeros is used to normalize range.
383    clz     r6, r4
384    sub     r6, r6, #24                 ; shift
385
386    ; Flag is set on the sum of count.  This flag is used later
387    ; to determine if count >= 0
388    adds    r3, r3, r6                  ; count += shift
389    lsl     r5, r4, r6                  ; range <<= shift
390    bmi     token_count_lt_zero_se      ; if(count >= 0)
391
392    sub     r6, r6, r3                  ; offset = shift - count
393    sub     r4, r6, #1                  ; offset-1
394    lsls    r4, r2, r4                  ; if((lowvalue<<(offset-1)) & 0x80000000 )
395    bpl     token_high_bit_not_set_se
396
397    ldr     r4, [r0, #vp8_writer_pos]   ; x
398    sub     r4, r4, #1                  ; x = w->pos-1
399    b       token_zero_while_start_se
400token_zero_while_loop_se
401    mov     r10, #0
402    strb    r10, [r7, r4]               ; w->buffer[x] =(unsigned char)0
403    sub     r4, r4, #1                  ; x--
404token_zero_while_start_se
405    cmp     r4, #0
406    ldrge   r7, [r0, #vp8_writer_buffer]
407    ldrb    r11, [r7, r4]
408    cmpge   r11, #0xff
409    beq     token_zero_while_loop_se
410
411    ldr     r7, [r0, #vp8_writer_buffer]
412    ldrb    r10, [r7, r4]               ; w->buffer[x]
413    add     r10, r10, #1
414    strb    r10, [r7, r4]               ; w->buffer[x] + 1
415token_high_bit_not_set_se
416    rsb     r4, r6, #24                 ; 24-offset
417    ldr     r10, [r0, #vp8_writer_buffer]
418    lsr     r7, r2, r4                  ; lowvalue >> (24-offset)
419    ldr     r4, [r0, #vp8_writer_pos]   ; w->pos
420    lsl     r2, r2, r6                  ; lowvalue <<= offset
421    mov     r6, r3                      ; shift = count
422    add     r11, r4, #1                 ; w->pos++
423    bic     r2, r2, #0xff000000         ; lowvalue &= 0xffffff
424    str     r11, [r0, #vp8_writer_pos]
425    sub     r3, r3, #8                  ; count -= 8
426
427    VALIDATE_POS r10, r11               ; validate_buffer at pos
428
429    strb    r7, [r10, r4]               ; w->buffer[w->pos++]
430
431token_count_lt_zero_se
432    lsl     r2, r2, r6                  ; lowvalue <<= shift
433
434    subs    r12, r12, #1
435    bne     stop_encode_loop
436
437    ldr     r4,  [r0, #vp8_writer_pos]  ; w->pos
438    ldr     r12, [sp, #24]              ; ptr
439    add     r12, r12, r4                ; ptr += w->pos
440    str     r12, [sp, #24]
441
442    ldr     r11, [sp, #28]              ; i
443    ldr     r10, [sp, #20]              ; num_parts
444
445    add     r11, r11, #1                ; i++
446    str     r11, [sp, #28]
447
448    ldr     r7, [sp, #32]               ; cpi->tp_list[i]
449    mov     r1, #TOKENLIST_SZ
450    add     r7, r7, r1                  ; next element in cpi->tp_list
451    str     r7, [sp, #32]               ; cpi->tp_list[i+1]
452
453    cmp     r10, r11
454    bgt     numparts_loop
455
456    add     sp, sp, #40
457    pop     {r4-r12, pc}
458    ENDP
459
460_VP8_COMP_common_
461    DCD     vp8_comp_common
462_VP8_COMMON_MBrows_
463    DCD     vp8_common_mb_rows
464_VP8_COMP_tplist_
465    DCD     vp8_comp_tplist
466_VP8_COMP_bc_
467    DCD     vp8_comp_bc
468_vp8_writer_sz_
469    DCD     vp8_writer_sz
470
471    END
472