1;
2;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3;
4;  Use of this source code is governed by a BSD-style license
5;  that can be found in the LICENSE file in the root of the source
6;  tree. An additional intellectual property rights grant can be found
7;  in the file PATENTS.  All contributing project authors may
8;  be found in the AUTHORS file in the root of the source tree.
9;
10
11
12    EXPORT  |horizontal_line_4_5_scale_armv4|
13    EXPORT  |vertical_band_4_5_scale_armv4|
14    EXPORT  |horizontal_line_2_3_scale_armv4|
15    EXPORT  |vertical_band_2_3_scale_armv4|
16    EXPORT  |horizontal_line_3_5_scale_armv4|
17    EXPORT  |vertical_band_3_5_scale_armv4|
18    EXPORT  |horizontal_line_3_4_scale_armv4|
19    EXPORT  |vertical_band_3_4_scale_armv4|
20    EXPORT  |horizontal_line_1_2_scale_armv4|
21    EXPORT  |vertical_band_1_2_scale_armv4|
22
23    AREA    |.text|, CODE, READONLY  ; name this block of code
24
25src         RN  r0
26srcw        RN  r1
27dest        RN  r2
28mask        RN  r12
29c51_205     RN  r10
30c102_154    RN  r11
31;/****************************************************************************
32; *
33; *  ROUTINE       : horizontal_line_4_5_scale_armv4
34; *
35; *  INPUTS        : const unsigned char *source : Pointer to source data.
36; *                  unsigned int source_width    : Stride of source.
37; *                  unsigned char *dest         : Pointer to destination data.
38; *                  unsigned int dest_width      : Stride of destination (NOT USED).
39; *
40; *  OUTPUTS       : None.
41; *
42; *  RETURNS       : void
43; *
44; *  FUNCTION      : Copies horizontal line of pixels from source to
45; *                  destination scaling up by 4 to 5.
46; *
47; *  SPECIAL NOTES : None.
48; *
49; ****************************************************************************/
50;void horizontal_line_4_5_scale_armv4
51;(
52;   r0 = UINT8 *source
53;   r1 = UINT32 source_width
54;   r2 = UINT8 *dest
55;   r3 = UINT32 dest_width
56;)
57|horizontal_line_4_5_scale_armv4| PROC
58    stmdb   sp!, {r4 - r11, lr}
59
60    mov     mask, #255              ; mask for selection
61    ldr     c51_205, =0x3300cd
62    ldr     c102_154, =0x66009a
63
64    ldr     r3, [src], #4
65
66hl45_loop
67
68    and     r4, r3, mask            ; a = src[0]
69    and     r5, mask, r3, lsr #8    ; b = src[1]
70    strb    r4, [dest], #1
71
72    orr     r6, r4, r5, lsl #16     ; b | a
73    and     r7, mask, r3, lsr #16   ; c = src[2]
74    mul     r6, c51_205, r6         ; a * 51 + 205 * b
75
76    orr     r5, r5, r7, lsl #16     ; c | b
77    mul     r5, c102_154, r5        ; b * 102 + 154 * c
78    add     r6, r6, #0x8000
79    and     r8, mask, r3, lsr #24   ; d = src[3]
80    mov     r6, r6, lsr #24
81    strb    r6, [dest], #1
82
83    orr     r7, r8, r7, lsl #16     ; c | d
84    mul     r7, c102_154, r7        ; c * 154 + 102 * d
85    add     r5, r5, #0x8000
86    ldr     r3, [src], #4
87    mov     r5, r5, lsr #24
88    strb    r5, [dest], #1
89
90    add     r7, r7, #0x8000
91    and     r9, mask, r3            ; e = src[4]
92    orr     r9, r9, r8, lsl #16     ; d | e
93    mul     r9, c51_205, r9         ; d * 205 + 51 * e
94    mov     r7, r7, lsr #24
95    strb    r7, [dest], #1
96
97    add     r9, r9, #0x8000
98    subs    srcw, srcw, #4
99    mov     r9, r9, lsr #24
100    strb    r9, [dest], #1
101
102    bne     hl45_loop
103
104    and     r4, r3, mask
105    and     r5, mask, r3, lsl #8
106    strb    r4, [dest], #1
107
108    orr     r6, r4, r5, lsl #16     ; b | a
109    mul     r6, c51_205, r6
110
111    and     r7, mask, r3, lsl #16
112    orr     r5, r5, r7, lsl #16     ; c | b
113    mul     r5, c102_154, r5
114    add     r6, r6, #0x8000
115    and     r8, mask, r3, lsl #24
116    mov     r6, r6, lsr #24
117    strb    r6, [dest], #1
118
119    orr     r7, r8, r7, lsl #16     ; c | d
120    mul     r7, c102_154, r7
121    add     r5, r5, #0x8000
122    mov     r5, r5, lsr #24
123    strb    r5, [dest], #1
124
125    add     r7, r7, #0x8000
126    mov     r7, r7, lsr #24
127    strb    r7, [dest], #1
128
129    ldrb    r3, [src]
130    strb    r3, [dest], #1
131
132    ldmia   sp!, {r4 - r11, pc}
133    ENDP    ;|vp8cx_horizontal_line_4_5_scale_c|
134
135;/****************************************************************************
136; *
137; *  ROUTINE       : vertical_band_4_5_scale_armv4
138; *
139; *  INPUTS        : unsigned char *dest    : Pointer to destination data.
140; *                  unsigned int dest_pitch : Stride of destination data.
141; *                  unsigned int dest_width : Width of destination data.
142; *
143; *  OUTPUTS       : None.
144; *
145; *  RETURNS       : void
146; *
147; *  FUNCTION      : Scales vertical band of pixels by scale 4 to 5. The
148; *                  height of the band scaled is 4-pixels.
149; *
150; *  SPECIAL NOTES : The routine uses the first line of the band below
151; *                  the current band.
152; *
153; ****************************************************************************/
154;void vertical_band_4_5_scale_armv4
155;(
156;   r0 = UINT8 *dest
157;   r1 = UINT32 dest_pitch
158;   r2 = UINT32 dest_width
159;)
160|vertical_band_4_5_scale_armv4| PROC
161    stmdb   sp!, {r4 - r11, lr}
162
163    ldr     c51_205, =0x3300cd
164    ldr     c102_154, =0x66009a
165
166vl45_loop
167    mov     r3, src
168    ldrb    r4, [r3], r1            ; a = des [0]
169    ldrb    r5, [r3], r1            ; b = des [dest_pitch]
170    ldrb    r7, [r3], r1            ; c = des[dest_pitch*2]
171    add     lr, src, r1
172
173    orr     r6, r4, r5, lsl #16     ; b | a
174    mul     r6, c51_205, r6         ; a * 51 + 205 * b
175
176    ldrb    r8, [r3], r1            ; d = des[dest_pitch*3]
177    orr     r5, r5, r7, lsl #16     ; c | b
178    mul     r5, c102_154, r5        ; b * 102 + 154 * c
179    add     r6, r6, #0x8000
180    orr     r7, r8, r7, lsl #16     ; c | d
181    mov     r6, r6, lsr #24
182    strb    r6, [lr], r1
183
184    ldrb    r9, [r3, r1]            ; e = des [dest_pitch * 5]
185    mul     r7, c102_154, r7        ; c * 154 + 102 * d
186    add     r5, r5, #0x8000
187    orr     r9, r9, r8, lsl #16     ; d | e
188    mov     r5, r5, lsr #24
189    strb    r5, [lr], r1
190
191    mul     r9, c51_205, r9         ; d * 205 + 51 * e
192    add     r7, r7, #0x8000
193    add     src, src, #1
194    mov     r7, r7, lsr #24
195    strb    r7, [lr], r1
196
197    add     r9, r9, #0x8000
198    subs    r2, r2, #1
199    mov     r9, r9, lsr #24
200    strb    r9, [lr], r1
201
202    bne     vl45_loop
203
204    ldmia   sp!, {r4 - r11, pc}
205    ENDP    ;|vertical_band_4_5_scale_armv4|
206
207;/****************************************************************************
208; *
209; *  ROUTINE       : horizontal_line_2_3_scale_armv4
210; *
211; *  INPUTS        : const unsigned char *source : Pointer to source data.
212; *                  unsigned int source_width    : Stride of source.
213; *                  unsigned char *dest         : Pointer to destination data.
214; *                  unsigned int dest_width      : Stride of destination (NOT USED).
215; *
216; *  OUTPUTS       : None.
217; *
218; *  RETURNS       : void
219; *
220; *  FUNCTION      : Copies horizontal line of pixels from source to
221; *                  destination scaling up by 2 to 3.
222; *
223; *  SPECIAL NOTES : None.
224; *
225; *
226; ****************************************************************************/
227;void horizontal_line_2_3_scale_armv4
228;(
229;   const unsigned char *source,
230;   unsigned int source_width,
231;   unsigned char *dest,
232;   unsigned int dest_width
233;)
234|horizontal_line_2_3_scale_armv4| PROC
235    stmdb   sp!, {r4 - r11, lr}
236    ldr     lr,  =85
237    ldr     r12, =171
238
239hl23_loop
240
241    ldrb    r3, [src], #1           ; a
242    ldrb    r4, [src], #1           ; b
243    ldrb    r5, [src]               ; c
244
245    strb    r3, [dest], #1
246    mul     r4, r12, r4             ; b * 171
247    mla     r6, lr, r3, r4          ; a * 85
248    mla     r7, lr, r5, r4          ; c * 85
249
250    add     r6, r6, #128
251    mov     r6, r6, lsr #8
252    strb    r6, [dest], #1
253
254    add     r7, r7, #128
255    mov     r7, r7, lsr #8
256    strb    r7, [dest], #1
257
258    subs    srcw, srcw, #2
259    bne     hl23_loop
260
261    ldrb    r4, [src, #1]           ; b
262    strb    r5, [dest], #1
263    strb    r4, [dest, #1]
264
265    mul     r4, r12, r4             ; b * 171
266    mla     r6, lr, r5, r4          ; a * 85 + b *171
267
268    add     r6, r6, #128
269    mov     r6, r6, lsr #8
270    strb    r6, [dest]
271
272    ldmia   sp!, {r4 - r11, pc}
273    ENDP    ;|horizontal_line_2_3_scale_armv4|
274
275;/****************************************************************************
276; *
277; *  ROUTINE       : vertical_band_2_3_scale_armv4
278; *
279; *  INPUTS        : unsigned char *dest    : Pointer to destination data.
280; *                  unsigned int dest_pitch : Stride of destination data.
281; *                  unsigned int dest_width : Width of destination data.
282; *
283; *  OUTPUTS       : None.
284; *
285; *  RETURNS       : void
286; *
287; *  FUNCTION      : Scales vertical band of pixels by scale 2 to 3. The
288; *                  height of the band scaled is 2-pixels.
289; *
290; *  SPECIAL NOTES : The routine uses the first line of the band below
291; *                  the current band.
292; *
293; ****************************************************************************/
294;void vertical_band_2_3_scale_armv4
295;(
296;   r0 = UINT8 *dest
297;   r1 = UINT32 dest_pitch
298;   r2 = UINT32 dest_width
299;)
300|vertical_band_2_3_scale_armv4| PROC
301    stmdb   sp!, {r4 - r8, lr}
302    ldr     lr,  =85
303    ldr     r12, =171
304    add     r3, r1, r1, lsl #1      ; 3 * dest_pitch
305
306vl23_loop
307    ldrb    r4, [src]               ; a = des [0]
308    ldrb    r5, [src, r1]           ; b = des [dest_pitch]
309    ldrb    r7, [src, r3]           ; c = des [dest_pitch*3]
310    subs    r2, r2, #1
311
312    mul     r5, r12, r5             ; b * 171
313    mla     r6, lr, r4, r5          ; a * 85
314    mla     r8, lr, r7, r5          ; c * 85
315
316    add     r6, r6, #128
317    mov     r6, r6, lsr #8
318    strb    r6, [src, r1]
319
320    add     r8, r8, #128
321    mov     r8, r8, lsr #8
322    strb    r8, [src, r1, lsl #1]
323
324    add     src, src, #1
325
326    bne     vl23_loop
327
328    ldmia   sp!, {r4 - r8, pc}
329    ENDP    ;|vertical_band_2_3_scale_armv4|
330
331;/****************************************************************************
332; *
333; *  ROUTINE       : vp8cx_horizontal_line_3_5_scale_c
334; *
335; *  INPUTS        : const unsigned char *source : Pointer to source data.
336; *                  unsigned int source_width    : Stride of source.
337; *                  unsigned char *dest         : Pointer to destination data.
338; *                  unsigned int dest_width      : Stride of destination (NOT USED).
339; *
340; *  OUTPUTS       : None.
341; *
342; *  RETURNS       : void
343; *
344; *  FUNCTION      : Copies horizontal line of pixels from source to
345; *                  destination scaling up by 3 to 5.
346; *
347; *  SPECIAL NOTES : None.
348; *
349; *
350; ****************************************************************************/
351;void vp8cx_horizontal_line_3_5_scale_c
352;(
353;   const unsigned char *source,
354;   unsigned int source_width,
355;   unsigned char *dest,
356;   unsigned int dest_width
357;)
358|horizontal_line_3_5_scale_armv4| PROC
359    stmdb   sp!, {r4 - r11, lr}
360
361    ldr     c51_205, =0x3300cd
362    ldr     c102_154, =0x66009a
363
364    ldrb    r4, [src], #1           ; a = src[0]
365
366hl35_loop
367
368    ldrb    r8, [src], #1           ; b = src[1]
369    strb    r4, [dest], #1
370
371    orr     r6, r4, r8, lsl #16     ; b | a
372    ldrb    r9, [src], #1           ; c = src[2]
373    mul     r6, c102_154, r6        ; a * 102 + 154 * b
374
375    orr     r5, r9, r8, lsl #16     ; b | c
376    mul     r5, c51_205, r5         ; b * 205 + 51 * c
377    add     r6, r6, #0x8000
378    ldrb    r4, [src], #1           ; d = src[3]
379    mov     r6, r6, lsr #24
380    strb    r6, [dest], #1
381
382    orr     r7, r8, r9, lsl #16     ; c | b
383    mul     r7, c51_205, r7         ; c * 205 + 154 * b
384    add     r5, r5, #0x8000
385    mov     r5, r5, lsr #24
386    strb    r5, [dest], #1
387
388    orr     r9, r4, r9, lsl #16     ; c | d
389    mul     r9, c102_154, r9        ; c * 154 + 102 * d
390    add     r7, r7, #0x8000
391    mov     r7, r7, lsr #24
392    strb    r7, [dest], #1
393
394    add     r9, r9, #0x8000
395    subs    srcw, srcw, #3
396    mov     r9, r9, lsr #24
397    strb    r9, [dest], #1
398
399    bpl     hl35_loop
400
401    ldrb    r5, [src], #1           ; b = src[1]
402    strb    r4, [dest], #1
403
404    orr     r6, r4, r8, lsl #16     ; b | a
405    ldrb    r9, [src], #1           ; c = src[2]
406    mul     r6, c102_154, r6        ; a * 102 + 154 * b
407
408    orr     r5, r9, r8, lsl #16     ; b | c
409    mul     r5, c51_205, r5         ; b * 205 + 51 * c
410    add     r6, r6, #0x8000
411    mov     r6, r6, lsr #24
412    strb    r6, [dest], #1
413
414    orr     r7, r8, r9, lsl #16     ; c | b
415    mul     r7, c51_205, r7         ; c * 205 + 154 * b
416    add     r5, r5, #0x8000
417    mov     r5, r5, lsr #24
418    strb    r5, [dest], #1
419
420    add     r7, r7, #0x8000
421    mov     r7, r7, lsr #24
422    strb    r7, [dest], #1
423    strb    r9, [dest], #1
424
425    ldmia   sp!, {r4 - r11, pc}
426    ENDP    ;|vp8cx_horizontal_line_3_5_scale_c|
427
428
429;/****************************************************************************
430; *
431; *  ROUTINE       : vp8cx_vertical_band_3_5_scale_c
432; *
433; *  INPUTS        : unsigned char *dest    : Pointer to destination data.
434; *                  unsigned int dest_pitch : Stride of destination data.
435; *                  unsigned int dest_width : Width of destination data.
436; *
437; *  OUTPUTS       : None.
438; *
439; *  RETURNS       : void
440; *
441; *  FUNCTION      : Scales vertical band of pixels by scale 3 to 5. The
442; *                  height of the band scaled is 3-pixels.
443; *
444; *  SPECIAL NOTES : The routine uses the first line of the band below
445; *                  the current band.
446; *
447; ****************************************************************************/
448;void vertical_band_4_5_scale_armv4
449;(
450;   r0 = UINT8 *dest
451;   r1 = UINT32 dest_pitch
452;   r2 = UINT32 dest_width
453;)
454|vertical_band_3_5_scale_armv4| PROC
455    stmdb   sp!, {r4 - r11, lr}
456
457    ldr     c51_205, =0x3300cd
458    ldr     c102_154, =0x66009a
459
460vl35_loop
461    mov     r3, src
462    ldrb    r4, [r3], r1            ; a = des [0]
463    ldrb    r5, [r3], r1            ; b = des [dest_pitch]
464    ldrb    r7, [r3], r1            ; c = des[dest_pitch*2]
465    add     lr, src, r1
466
467    orr     r8, r4, r5, lsl #16     ; b | a
468    mul     r6, c102_154, r8        ; a * 102 + 154 * b
469
470    ldrb    r8, [r3, r1, lsl #1]    ; d = des[dest_pitch*5]
471    orr     r3, r7, r5, lsl #16     ; b | c
472    mul     r9, c51_205, r3         ; b * 205 + 51 * c
473    add     r6, r6, #0x8000
474    orr     r3, r5, r7, lsl #16     ; c | b
475    mov     r6, r6, lsr #24
476    strb    r6, [lr], r1
477
478    mul     r5, c51_205, r3         ; c * 205 + 154 * b
479    add     r9, r9, #0x8000
480    orr     r3, r8, r7, lsl #16     ; c | d
481    mov     r9, r9, lsr #24
482    strb    r9, [lr], r1
483
484    mul     r7, c102_154, r3        ; c * 154 + 102 * d
485    add     r5, r5, #0x8000
486    add     src, src, #1
487    mov     r5, r5, lsr #24
488    strb    r5, [lr], r1
489
490    add     r7, r7, #0x8000
491    subs    r2, r2, #1
492    mov     r7, r7, lsr #24
493    strb    r7, [lr], r1
494
495
496    bne     vl35_loop
497
498    ldmia   sp!, {r4 - r11, pc}
499    ENDP    ;|vertical_band_3_5_scale_armv4|
500
501;/****************************************************************************
502; *
503; *  ROUTINE       : horizontal_line_3_4_scale_armv4
504; *
505; *  INPUTS        : const unsigned char *source : Pointer to source data.
506; *                  unsigned int source_width    : Stride of source.
507; *                  unsigned char *dest         : Pointer to destination data.
508; *                  unsigned int dest_width      : Stride of destination (NOT USED).
509; *
510; *  OUTPUTS       : None.
511; *
512; *  RETURNS       : void
513; *
514; *  FUNCTION      : Copies horizontal line of pixels from source to
515; *                  destination scaling up by 3 to 4.
516; *
517; *  SPECIAL NOTES : None.
518; *
519; *
520; ****************************************************************************/
521;void horizontal_line_3_4_scale_armv4
522;(
523;   const unsigned char *source,
524;   unsigned int source_width,
525;   unsigned char *dest,
526;   unsigned int dest_width
527;)
528|horizontal_line_3_4_scale_armv4| PROC
529    stmdb   sp!, {r4 - r11, lr}
530
531    ldr     r10, =64
532    ldr     r11, =192
533    mov     r9, #128
534
535    ldrb    r4, [src], #1           ; a = src[0]
536
537hl34_loop
538
539    ldrb    r8, [src], #1           ; b = src[1]
540    ldrb    r7, [src], #1           ; c = src[2]
541    strb    r4, [dest], #1
542
543    mla     r4, r10, r4, r9         ; a*64 + 128
544    mla     r4, r11, r8, r4         ; a*64 + b*192 + 1
545
546    add     r8, r8, #1              ; b + 1
547    add     r8, r8, r7              ; b + c + 1
548    mov     r8, r8, asr #1          ; (b + c + 1) >> 1
549
550    mov     r4, r4, asr #8          ; (a*64 + b*192 + 1) >> 8
551    strb    r4, [dest], #1
552
553    strb    r8, [dest], #1
554
555    ldrb    r4, [src], #1           ; [a+1]
556
557    mla     r7, r11, r7, r9         ; c*192 + 128
558    mla     r7, r4, r10, r7         ; a*64 + b*192 + 128
559
560    subs    srcw, srcw, #3
561
562    mov     r7, r7, asr #8          ; (a*64 + b*192 + 128) >> 8
563    strb    r7, [dest], #1
564
565    bpl     hl34_loop
566
567    ldrb    r8, [src], #1           ; b = src[1]
568    ldrb    r7, [src], #1           ; c = src[2]
569    strb    r4, [dest], #1
570
571    mla     r4, r10, r4, r9         ; a*64 + 128
572    mla     r4, r11, r8, r4         ; a*64 + b*192 + 1
573    mov     r4, r4, asr #8          ; (a*64 + b*192 + 1) >> 8
574    strb    r4, [dest], #1
575
576    add     r8, r8, #1              ; b + 1
577    add     r8, r8, r7              ; b + c + 1
578    mov     r8, r8, asr #1          ; (b + c + 1) >> 1
579    strb    r8, [dest], #1
580    strb    r7, [dest], #1
581
582    ldmia   sp!, {r4 - r11, pc}
583    ENDP    ;|vp8cx_horizontal_line_3_4_scale_c|
584
585
586;/****************************************************************************
587; *
588; *  ROUTINE       : vertical_band_3_4_scale_armv4
589; *
590; *  INPUTS        : unsigned char *dest    : Pointer to destination data.
591; *                  unsigned int dest_pitch : Stride of destination data.
592; *                  unsigned int dest_width : Width of destination data.
593; *
594; *  OUTPUTS       : None.
595; *
596; *  RETURNS       : void
597; *
598; *  FUNCTION      : Scales vertical band of pixels by scale 3 to 4. The
599; *                  height of the band scaled is 3-pixels.
600; *
601; *  SPECIAL NOTES : The routine uses the first line of the band below
602; *                  the current band.
603; *
604; ****************************************************************************/
605;void vertical_band_3_4_scale_armv4
606;(
607;   r0 = UINT8 *dest
608;   r1 = UINT32 dest_pitch
609;   r2 = UINT32 dest_width
610;)
611|vertical_band_3_4_scale_armv4| PROC
612    stmdb   sp!, {r4 - r11, lr}
613
614    ldr     r10, =64
615    ldr     r11, =192
616    mov     r9, #128
617
618;   ldr     r1,[r1]
619vl34_loop
620    mov     r3, src
621    ldrb    r4, [r3], r1            ; a = des [0]
622    ldrb    r5, [r3], r1            ; b = des [dest_pitch]
623    ldrb    r7, [r3], r1            ; c = des [dest_pitch*2]
624    add     lr, src, r1
625
626    mla     r4, r10, r4, r9         ; a*64 + 128
627    mla     r4, r11, r5, r4         ; a*64 + b*192 + 1
628
629    add     r5, r5, #1              ; b + 1
630    add     r5, r5, r7              ; b + c + 1
631    mov     r5, r5, asr #1          ; (b + c + 1) >> 1
632
633    mov     r4, r4, asr #8          ; (a*64 + b*192 + 1) >> 8
634    strb    r4, [lr], r1
635
636    ldrb    r4, [r3, r1]            ; a = des [dest_pitch*4]
637
638    strb    r5, [lr], r1
639
640    mla     r7, r11, r7, r9         ; c*192 + 128
641    mla     r7, r4, r10, r7         ; a*64 + b*192 + 128
642    mov     r7, r7, asr #8          ; (a*64 + b*192 + 128) >> 8
643
644    add     src, src, #1
645    subs    r2, r2, #1
646
647    strb    r7, [lr]
648
649    bne     vl34_loop
650
651    ldmia   sp!, {r4 - r11, pc}
652    ENDP    ;|vertical_band_3_4_scale_armv4|
653
654;/****************************************************************************
655; *
656; *  ROUTINE       : vp8cx_horizontal_line_1_2_scale_c
657; *
658; *  INPUTS        : const unsigned char *source : Pointer to source data.
659; *                  unsigned int source_width    : Stride of source.
660; *                  unsigned char *dest         : Pointer to destination data.
661; *                  unsigned int dest_width      : Stride of destination (NOT USED).
662; *
663; *  OUTPUTS       : None.
664; *
665; *  RETURNS       : void
666; *
667; *  FUNCTION      : Copies horizontal line of pixels from source to
668; *                  destination scaling up by 1 to 2.
669; *
670; *  SPECIAL NOTES : None.
671; *
672; ****************************************************************************/
673;void vp8cx_horizontal_line_1_2_scale_c
674;(
675;   const unsigned char *source,
676;   unsigned int source_width,
677;   unsigned char *dest,
678;   unsigned int dest_width
679;)
680|horizontal_line_1_2_scale_armv4| PROC
681    stmdb   sp!, {r4 - r5, lr}
682
683    sub     srcw, srcw, #1
684
685    ldrb    r3, [src], #1
686    ldrb    r4, [src], #1
687hl12_loop
688    subs    srcw, srcw, #1
689
690    add     r5, r3, r4
691    add     r5, r5, #1
692    mov     r5, r5, lsr #1
693
694    orr     r5, r3, r5, lsl #8
695    strh    r5, [dest], #2
696
697    mov     r3, r4
698
699    ldrneb  r4, [src], #1
700    bne     hl12_loop
701
702    orr     r5, r4, r4, lsl #8
703    strh    r5, [dest]
704
705    ldmia   sp!, {r4 - r5, pc}
706    ENDP    ;|vertical_band_3_5_scale_armv4|
707
708;/****************************************************************************
709; *
710; *  ROUTINE       : vp8cx_vertical_band_1_2_scale_c
711; *
712; *  INPUTS        : unsigned char *dest    : Pointer to destination data.
713; *                  unsigned int dest_pitch : Stride of destination data.
714; *                  unsigned int dest_width : Width of destination data.
715; *
716; *  OUTPUTS       : None.
717; *
718; *  RETURNS       : void
719; *
720; *  FUNCTION      : Scales vertical band of pixels by scale 1 to 2. The
721; *                  height of the band scaled is 1-pixel.
722; *
723; *  SPECIAL NOTES : The routine uses the first line of the band below
724; *                  the current band.
725; *
726; ****************************************************************************/
727;void vp8cx_vertical_band_1_2_scale_c
728;(
729;   r0 = UINT8 *dest
730;   r1 = UINT32 dest_pitch
731;   r2 = UINT32 dest_width
732;)
733|vertical_band_1_2_scale_armv4| PROC
734    stmdb   sp!, {r4 - r7, lr}
735
736    ldr     mask, =0xff00ff             ; mask for selection
737    ldr     lr, = 0x010001
738
739vl12_loop
740    mov     r3, src
741    ldr     r4, [r3], r1
742    ldr     r5, [r3, r1]
743
744    add     src, src, #4
745    subs    r2, r2, #4
746
747    and     r6, r4, mask
748    and     r7, r5, mask
749
750    add     r6, r7, r6
751    add     r6, r6, lr
752
753    and     r4, mask, r4, lsr #8
754    and     r5, mask, r5, lsr #8
755
756    mov     r6, r6, lsr #1
757    and     r6, r6, mask
758
759    add     r4, r5, r4
760    add     r4, r4, lr
761
762    mov     r4, r4, lsr #1
763    and     r4, r4, mask
764
765    orr     r5, r6, r4, lsl #8
766
767    str     r5, [r3]
768
769    bpl     vl12_loop
770
771    ldmia   sp!, {r4 - r7, pc}
772    ENDP    ;|vertical_band_3_5_scale_armv4|
773
774    END
775