1;
2;  Copyright (c) 2011 The WebM project authors. All Rights Reserved.
3;
4;  Use of this source code is governed by a BSD-style license
5;  that can be found in the LICENSE file in the root of the source
6;  tree. An additional intellectual property rights grant can be found
7;  in the file PATENTS.  All contributing project authors may
8;  be found in the AUTHORS file in the root of the source tree.
9;
10
11
12    EXPORT  |vp8_sad16x16_armv6|
13
14    ARM
15    REQUIRE8
16    PRESERVE8
17
18    AREA ||.text||, CODE, READONLY, ALIGN=2
19
20; r0    const unsigned char *src_ptr
21; r1    int  src_stride
22; r2    const unsigned char *ref_ptr
23; r3    int  ref_stride
24; stack max_sad (not used)
25|vp8_sad16x16_armv6| PROC
26    stmfd   sp!, {r4-r12, lr}
27
28    pld     [r0, r1, lsl #0]
29    pld     [r2, r3, lsl #0]
30    pld     [r0, r1, lsl #1]
31    pld     [r2, r3, lsl #1]
32
33    mov     r4, #0              ; sad = 0;
34    mov     r5, #8              ; loop count
35
36loop
37    ; 1st row
38    ldr     r6, [r0, #0x0]      ; load 4 src pixels (1A)
39    ldr     r8, [r2, #0x0]      ; load 4 ref pixels (1A)
40    ldr     r7, [r0, #0x4]      ; load 4 src pixels (1A)
41    ldr     r9, [r2, #0x4]      ; load 4 ref pixels (1A)
42    ldr     r10, [r0, #0x8]     ; load 4 src pixels (1B)
43    ldr     r11, [r0, #0xC]     ; load 4 src pixels (1B)
44
45    usada8  r4, r8, r6, r4      ; calculate sad for 4 pixels
46    usad8   r8, r7, r9          ; calculate sad for 4 pixels
47
48    ldr     r12, [r2, #0x8]     ; load 4 ref pixels (1B)
49    ldr     lr, [r2, #0xC]      ; load 4 ref pixels (1B)
50
51    add     r0, r0, r1          ; set src pointer to next row
52    add     r2, r2, r3          ; set dst pointer to next row
53
54    pld     [r0, r1, lsl #1]
55    pld     [r2, r3, lsl #1]
56
57    usada8  r4, r10, r12, r4    ; calculate sad for 4 pixels
58    usada8  r8, r11, lr, r8     ; calculate sad for 4 pixels
59
60    ldr     r6, [r0, #0x0]      ; load 4 src pixels (2A)
61    ldr     r7, [r0, #0x4]      ; load 4 src pixels (2A)
62    add     r4, r4, r8          ; add partial sad values
63
64    ; 2nd row
65    ldr     r8, [r2, #0x0]      ; load 4 ref pixels (2A)
66    ldr     r9, [r2, #0x4]      ; load 4 ref pixels (2A)
67    ldr     r10, [r0, #0x8]     ; load 4 src pixels (2B)
68    ldr     r11, [r0, #0xC]     ; load 4 src pixels (2B)
69
70    usada8  r4, r6, r8, r4      ; calculate sad for 4 pixels
71    usad8   r8, r7, r9          ; calculate sad for 4 pixels
72
73    ldr     r12, [r2, #0x8]     ; load 4 ref pixels (2B)
74    ldr     lr, [r2, #0xC]      ; load 4 ref pixels (2B)
75
76    add     r0, r0, r1          ; set src pointer to next row
77    add     r2, r2, r3          ; set dst pointer to next row
78
79    usada8  r4, r10, r12, r4    ; calculate sad for 4 pixels
80    usada8  r8, r11, lr, r8     ; calculate sad for 4 pixels
81
82    pld     [r0, r1, lsl #1]
83    pld     [r2, r3, lsl #1]
84
85    subs    r5, r5, #1          ; decrement loop counter
86    add     r4, r4, r8          ; add partial sad values
87
88    bne     loop
89
90    mov     r0, r4              ; return sad
91    ldmfd   sp!, {r4-r12, pc}
92
93    ENDP
94
95    END
96
97