1;
2;  Copyright (c) 2011 The WebM project authors. All Rights Reserved.
3;
4;  Use of this source code is governed by a BSD-style license
5;  that can be found in the LICENSE file in the root of the source
6;  tree. An additional intellectual property rights grant can be found
7;  in the file PATENTS.  All contributing project authors may
8;  be found in the AUTHORS file in the root of the source tree.
9;
10
11
12    EXPORT  |vpx_variance_halfpixvar16x16_v_media|
13
14    ARM
15    REQUIRE8
16    PRESERVE8
17
18    AREA ||.text||, CODE, READONLY, ALIGN=2
19
20; r0    unsigned char *src_ptr
21; r1    int source_stride
22; r2    unsigned char *ref_ptr
23; r3    int  recon_stride
24; stack unsigned int *sse
25|vpx_variance_halfpixvar16x16_v_media| PROC
26
27    stmfd   sp!, {r4-r12, lr}
28
29    pld     [r0, r1, lsl #0]
30    pld     [r2, r3, lsl #0]
31
32    mov     r8, #0              ; initialize sum = 0
33    ldr     r10, c80808080
34    mov     r11, #0             ; initialize sse = 0
35    mov     r12, #16            ; set loop counter to 16 (=block height)
36    mov     lr, #0              ; constant zero
37loop
38    add     r9, r0, r1          ; set src pointer to next row
39    ; 1st 4 pixels
40    ldr     r4, [r0, #0]        ; load 4 src pixels
41    ldr     r6, [r9, #0]        ; load 4 src pixels from next row
42    ldr     r5, [r2, #0]        ; load 4 ref pixels
43
44    ; bilinear interpolation
45    mvn     r6, r6
46    uhsub8  r4, r4, r6
47    eor     r4, r4, r10
48
49    usub8   r6, r4, r5          ; calculate difference
50    pld     [r0, r1, lsl #1]
51    sel     r7, r6, lr          ; select bytes with positive difference
52    usub8   r6, r5, r4          ; calculate difference with reversed operands
53    pld     [r2, r3, lsl #1]
54    sel     r6, r6, lr          ; select bytes with negative difference
55
56    ; calculate partial sums
57    usad8   r4, r7, lr          ; calculate sum of positive differences
58    usad8   r5, r6, lr          ; calculate sum of negative differences
59    orr     r6, r6, r7          ; differences of all 4 pixels
60    ; calculate total sum
61    adds    r8, r8, r4          ; add positive differences to sum
62    subs    r8, r8, r5          ; subtract negative differences from sum
63
64    ; calculate sse
65    uxtb16  r5, r6              ; byte (two pixels) to halfwords
66    uxtb16  r7, r6, ror #8      ; another two pixels to halfwords
67    smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
68
69    ; 2nd 4 pixels
70    ldr     r4, [r0, #4]        ; load 4 src pixels
71    ldr     r6, [r9, #4]        ; load 4 src pixels from next row
72    ldr     r5, [r2, #4]        ; load 4 ref pixels
73
74    ; bilinear interpolation
75    mvn     r6, r6
76    uhsub8  r4, r4, r6
77    eor     r4, r4, r10
78
79    smlad   r11, r7, r7, r11    ; dual signed multiply, add and accumulate (2)
80
81    usub8   r6, r4, r5          ; calculate difference
82    sel     r7, r6, lr          ; select bytes with positive difference
83    usub8   r6, r5, r4          ; calculate difference with reversed operands
84    sel     r6, r6, lr          ; select bytes with negative difference
85
86    ; calculate partial sums
87    usad8   r4, r7, lr          ; calculate sum of positive differences
88    usad8   r5, r6, lr          ; calculate sum of negative differences
89    orr     r6, r6, r7          ; differences of all 4 pixels
90
91    ; calculate total sum
92    add     r8, r8, r4          ; add positive differences to sum
93    sub     r8, r8, r5          ; subtract negative differences from sum
94
95    ; calculate sse
96    uxtb16  r5, r6              ; byte (two pixels) to halfwords
97    uxtb16  r7, r6, ror #8      ; another two pixels to halfwords
98    smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
99
100    ; 3rd 4 pixels
101    ldr     r4, [r0, #8]        ; load 4 src pixels
102    ldr     r6, [r9, #8]        ; load 4 src pixels from next row
103    ldr     r5, [r2, #8]        ; load 4 ref pixels
104
105    ; bilinear interpolation
106    mvn     r6, r6
107    uhsub8  r4, r4, r6
108    eor     r4, r4, r10
109
110    smlad   r11, r7, r7, r11    ; dual signed multiply, add and accumulate (2)
111
112    usub8   r6, r4, r5          ; calculate difference
113    sel     r7, r6, lr          ; select bytes with positive difference
114    usub8   r6, r5, r4          ; calculate difference with reversed operands
115    sel     r6, r6, lr          ; select bytes with negative difference
116
117    ; calculate partial sums
118    usad8   r4, r7, lr          ; calculate sum of positive differences
119    usad8   r5, r6, lr          ; calculate sum of negative differences
120    orr     r6, r6, r7          ; differences of all 4 pixels
121
122    ; calculate total sum
123    add     r8, r8, r4          ; add positive differences to sum
124    sub     r8, r8, r5          ; subtract negative differences from sum
125
126    ; calculate sse
127    uxtb16  r5, r6              ; byte (two pixels) to halfwords
128    uxtb16  r7, r6, ror #8      ; another two pixels to halfwords
129    smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
130
131    ; 4th 4 pixels
132    ldr     r4, [r0, #12]       ; load 4 src pixels
133    ldr     r6, [r9, #12]       ; load 4 src pixels from next row
134    ldr     r5, [r2, #12]       ; load 4 ref pixels
135
136    ; bilinear interpolation
137    mvn     r6, r6
138    uhsub8  r4, r4, r6
139    eor     r4, r4, r10
140
141    smlad   r11, r7, r7, r11    ; dual signed multiply, add and accumulate (2)
142
143    usub8   r6, r4, r5          ; calculate difference
144    add     r0, r0, r1          ; set src_ptr to next row
145    sel     r7, r6, lr          ; select bytes with positive difference
146    usub8   r6, r5, r4          ; calculate difference with reversed operands
147    add     r2, r2, r3          ; set dst_ptr to next row
148    sel     r6, r6, lr          ; select bytes with negative difference
149
150    ; calculate partial sums
151    usad8   r4, r7, lr          ; calculate sum of positive differences
152    usad8   r5, r6, lr          ; calculate sum of negative differences
153    orr     r6, r6, r7          ; differences of all 4 pixels
154
155    ; calculate total sum
156    add     r8, r8, r4          ; add positive differences to sum
157    sub     r8, r8, r5          ; subtract negative differences from sum
158
159    ; calculate sse
160    uxtb16  r5, r6              ; byte (two pixels) to halfwords
161    uxtb16  r7, r6, ror #8      ; another two pixels to halfwords
162    smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
163    smlad   r11, r7, r7, r11    ; dual signed multiply, add and accumulate (2)
164
165
166    subs    r12, r12, #1
167
168    bne     loop
169
170    ; return stuff
171    ldr     r6, [sp, #40]       ; get address of sse
172    mul     r0, r8, r8          ; sum * sum
173    str     r11, [r6]           ; store sse
174    sub     r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8))
175
176    ldmfd   sp!, {r4-r12, pc}
177
178    ENDP
179
180c80808080
181    DCD     0x80808080
182
183    END
184
185