179f15823c34ae1e423108295e416213200bb280fAndreas Huber; 279f15823c34ae1e423108295e416213200bb280fAndreas Huber; Copyright (c) 2011 The WebM project authors. All Rights Reserved. 379f15823c34ae1e423108295e416213200bb280fAndreas Huber; 479f15823c34ae1e423108295e416213200bb280fAndreas Huber; Use of this source code is governed by a BSD-style license 579f15823c34ae1e423108295e416213200bb280fAndreas Huber; that can be found in the LICENSE file in the root of the source 679f15823c34ae1e423108295e416213200bb280fAndreas Huber; tree. An additional intellectual property rights grant can be found 779f15823c34ae1e423108295e416213200bb280fAndreas Huber; in the file PATENTS. All contributing project authors may 879f15823c34ae1e423108295e416213200bb280fAndreas Huber; be found in the AUTHORS file in the root of the source tree. 979f15823c34ae1e423108295e416213200bb280fAndreas Huber; 1079f15823c34ae1e423108295e416213200bb280fAndreas Huber 1179f15823c34ae1e423108295e416213200bb280fAndreas Huber 1279f15823c34ae1e423108295e416213200bb280fAndreas Huber EXPORT |vp8_mse16x16_armv6| 1379f15823c34ae1e423108295e416213200bb280fAndreas Huber 1479f15823c34ae1e423108295e416213200bb280fAndreas Huber ARM 1579f15823c34ae1e423108295e416213200bb280fAndreas Huber 1679f15823c34ae1e423108295e416213200bb280fAndreas Huber AREA ||.text||, CODE, READONLY, ALIGN=2 1779f15823c34ae1e423108295e416213200bb280fAndreas Huber 1879f15823c34ae1e423108295e416213200bb280fAndreas Huber; r0 unsigned char *src_ptr 1979f15823c34ae1e423108295e416213200bb280fAndreas Huber; r1 int source_stride 2079f15823c34ae1e423108295e416213200bb280fAndreas Huber; r2 unsigned char *ref_ptr 2179f15823c34ae1e423108295e416213200bb280fAndreas Huber; r3 int recon_stride 2279f15823c34ae1e423108295e416213200bb280fAndreas Huber; stack unsigned int *sse 2379f15823c34ae1e423108295e416213200bb280fAndreas Huber; 2479f15823c34ae1e423108295e416213200bb280fAndreas Huber;note: Based on vp8_variance16x16_armv6. In this function, sum is never used. 2579f15823c34ae1e423108295e416213200bb280fAndreas Huber; So, we can remove this part of calculation. 2679f15823c34ae1e423108295e416213200bb280fAndreas Huber 2779f15823c34ae1e423108295e416213200bb280fAndreas Huber|vp8_mse16x16_armv6| PROC 2879f15823c34ae1e423108295e416213200bb280fAndreas Huber 2979f15823c34ae1e423108295e416213200bb280fAndreas Huber push {r4-r9, lr} 3079f15823c34ae1e423108295e416213200bb280fAndreas Huber 311b362b15af34006e6a11974088a46d42b903418eJohann pld [r0, r1, lsl #0] 321b362b15af34006e6a11974088a46d42b903418eJohann pld [r2, r3, lsl #0] 331b362b15af34006e6a11974088a46d42b903418eJohann 341b362b15af34006e6a11974088a46d42b903418eJohann mov r12, #16 ; set loop counter to 16 (=block height) 3579f15823c34ae1e423108295e416213200bb280fAndreas Huber mov r4, #0 ; initialize sse = 0 3679f15823c34ae1e423108295e416213200bb280fAndreas Huber 3779f15823c34ae1e423108295e416213200bb280fAndreas Huberloop 3879f15823c34ae1e423108295e416213200bb280fAndreas Huber ; 1st 4 pixels 3979f15823c34ae1e423108295e416213200bb280fAndreas Huber ldr r5, [r0, #0x0] ; load 4 src pixels 4079f15823c34ae1e423108295e416213200bb280fAndreas Huber ldr r6, [r2, #0x0] ; load 4 ref pixels 4179f15823c34ae1e423108295e416213200bb280fAndreas Huber 4279f15823c34ae1e423108295e416213200bb280fAndreas Huber mov lr, #0 ; constant zero 4379f15823c34ae1e423108295e416213200bb280fAndreas Huber 4479f15823c34ae1e423108295e416213200bb280fAndreas Huber usub8 r8, r5, r6 ; calculate difference 451b362b15af34006e6a11974088a46d42b903418eJohann pld [r0, r1, lsl #1] 4679f15823c34ae1e423108295e416213200bb280fAndreas Huber sel r7, r8, lr ; select bytes with positive difference 4779f15823c34ae1e423108295e416213200bb280fAndreas Huber usub8 r9, r6, r5 ; calculate difference with reversed operands 481b362b15af34006e6a11974088a46d42b903418eJohann pld [r2, r3, lsl #1] 4979f15823c34ae1e423108295e416213200bb280fAndreas Huber sel r8, r9, lr ; select bytes with negative difference 5079f15823c34ae1e423108295e416213200bb280fAndreas Huber 5179f15823c34ae1e423108295e416213200bb280fAndreas Huber ; calculate partial sums 5279f15823c34ae1e423108295e416213200bb280fAndreas Huber usad8 r5, r7, lr ; calculate sum of positive differences 5379f15823c34ae1e423108295e416213200bb280fAndreas Huber usad8 r6, r8, lr ; calculate sum of negative differences 5479f15823c34ae1e423108295e416213200bb280fAndreas Huber orr r8, r8, r7 ; differences of all 4 pixels 5579f15823c34ae1e423108295e416213200bb280fAndreas Huber 5679f15823c34ae1e423108295e416213200bb280fAndreas Huber ldr r5, [r0, #0x4] ; load 4 src pixels 5779f15823c34ae1e423108295e416213200bb280fAndreas Huber 5879f15823c34ae1e423108295e416213200bb280fAndreas Huber ; calculate sse 5979f15823c34ae1e423108295e416213200bb280fAndreas Huber uxtb16 r6, r8 ; byte (two pixels) to halfwords 6079f15823c34ae1e423108295e416213200bb280fAndreas Huber uxtb16 r7, r8, ror #8 ; another two pixels to halfwords 6179f15823c34ae1e423108295e416213200bb280fAndreas Huber smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1) 6279f15823c34ae1e423108295e416213200bb280fAndreas Huber 6379f15823c34ae1e423108295e416213200bb280fAndreas Huber ; 2nd 4 pixels 6479f15823c34ae1e423108295e416213200bb280fAndreas Huber ldr r6, [r2, #0x4] ; load 4 ref pixels 6579f15823c34ae1e423108295e416213200bb280fAndreas Huber smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2) 6679f15823c34ae1e423108295e416213200bb280fAndreas Huber 6779f15823c34ae1e423108295e416213200bb280fAndreas Huber usub8 r8, r5, r6 ; calculate difference 6879f15823c34ae1e423108295e416213200bb280fAndreas Huber sel r7, r8, lr ; select bytes with positive difference 6979f15823c34ae1e423108295e416213200bb280fAndreas Huber usub8 r9, r6, r5 ; calculate difference with reversed operands 7079f15823c34ae1e423108295e416213200bb280fAndreas Huber sel r8, r9, lr ; select bytes with negative difference 7179f15823c34ae1e423108295e416213200bb280fAndreas Huber 7279f15823c34ae1e423108295e416213200bb280fAndreas Huber ; calculate partial sums 7379f15823c34ae1e423108295e416213200bb280fAndreas Huber usad8 r5, r7, lr ; calculate sum of positive differences 7479f15823c34ae1e423108295e416213200bb280fAndreas Huber usad8 r6, r8, lr ; calculate sum of negative differences 7579f15823c34ae1e423108295e416213200bb280fAndreas Huber orr r8, r8, r7 ; differences of all 4 pixels 7679f15823c34ae1e423108295e416213200bb280fAndreas Huber ldr r5, [r0, #0x8] ; load 4 src pixels 7779f15823c34ae1e423108295e416213200bb280fAndreas Huber ; calculate sse 7879f15823c34ae1e423108295e416213200bb280fAndreas Huber uxtb16 r6, r8 ; byte (two pixels) to halfwords 7979f15823c34ae1e423108295e416213200bb280fAndreas Huber uxtb16 r7, r8, ror #8 ; another two pixels to halfwords 8079f15823c34ae1e423108295e416213200bb280fAndreas Huber smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1) 8179f15823c34ae1e423108295e416213200bb280fAndreas Huber 8279f15823c34ae1e423108295e416213200bb280fAndreas Huber ; 3rd 4 pixels 8379f15823c34ae1e423108295e416213200bb280fAndreas Huber ldr r6, [r2, #0x8] ; load 4 ref pixels 8479f15823c34ae1e423108295e416213200bb280fAndreas Huber smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2) 8579f15823c34ae1e423108295e416213200bb280fAndreas Huber 8679f15823c34ae1e423108295e416213200bb280fAndreas Huber usub8 r8, r5, r6 ; calculate difference 8779f15823c34ae1e423108295e416213200bb280fAndreas Huber sel r7, r8, lr ; select bytes with positive difference 8879f15823c34ae1e423108295e416213200bb280fAndreas Huber usub8 r9, r6, r5 ; calculate difference with reversed operands 8979f15823c34ae1e423108295e416213200bb280fAndreas Huber sel r8, r9, lr ; select bytes with negative difference 9079f15823c34ae1e423108295e416213200bb280fAndreas Huber 9179f15823c34ae1e423108295e416213200bb280fAndreas Huber ; calculate partial sums 9279f15823c34ae1e423108295e416213200bb280fAndreas Huber usad8 r5, r7, lr ; calculate sum of positive differences 9379f15823c34ae1e423108295e416213200bb280fAndreas Huber usad8 r6, r8, lr ; calculate sum of negative differences 9479f15823c34ae1e423108295e416213200bb280fAndreas Huber orr r8, r8, r7 ; differences of all 4 pixels 9579f15823c34ae1e423108295e416213200bb280fAndreas Huber 9679f15823c34ae1e423108295e416213200bb280fAndreas Huber ldr r5, [r0, #0xc] ; load 4 src pixels 9779f15823c34ae1e423108295e416213200bb280fAndreas Huber 9879f15823c34ae1e423108295e416213200bb280fAndreas Huber ; calculate sse 9979f15823c34ae1e423108295e416213200bb280fAndreas Huber uxtb16 r6, r8 ; byte (two pixels) to halfwords 10079f15823c34ae1e423108295e416213200bb280fAndreas Huber uxtb16 r7, r8, ror #8 ; another two pixels to halfwords 10179f15823c34ae1e423108295e416213200bb280fAndreas Huber smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1) 10279f15823c34ae1e423108295e416213200bb280fAndreas Huber 10379f15823c34ae1e423108295e416213200bb280fAndreas Huber ; 4th 4 pixels 10479f15823c34ae1e423108295e416213200bb280fAndreas Huber ldr r6, [r2, #0xc] ; load 4 ref pixels 10579f15823c34ae1e423108295e416213200bb280fAndreas Huber smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2) 10679f15823c34ae1e423108295e416213200bb280fAndreas Huber 10779f15823c34ae1e423108295e416213200bb280fAndreas Huber usub8 r8, r5, r6 ; calculate difference 10879f15823c34ae1e423108295e416213200bb280fAndreas Huber add r0, r0, r1 ; set src_ptr to next row 10979f15823c34ae1e423108295e416213200bb280fAndreas Huber sel r7, r8, lr ; select bytes with positive difference 11079f15823c34ae1e423108295e416213200bb280fAndreas Huber usub8 r9, r6, r5 ; calculate difference with reversed operands 11179f15823c34ae1e423108295e416213200bb280fAndreas Huber add r2, r2, r3 ; set dst_ptr to next row 11279f15823c34ae1e423108295e416213200bb280fAndreas Huber sel r8, r9, lr ; select bytes with negative difference 11379f15823c34ae1e423108295e416213200bb280fAndreas Huber 11479f15823c34ae1e423108295e416213200bb280fAndreas Huber ; calculate partial sums 11579f15823c34ae1e423108295e416213200bb280fAndreas Huber usad8 r5, r7, lr ; calculate sum of positive differences 11679f15823c34ae1e423108295e416213200bb280fAndreas Huber usad8 r6, r8, lr ; calculate sum of negative differences 11779f15823c34ae1e423108295e416213200bb280fAndreas Huber orr r8, r8, r7 ; differences of all 4 pixels 11879f15823c34ae1e423108295e416213200bb280fAndreas Huber 11979f15823c34ae1e423108295e416213200bb280fAndreas Huber subs r12, r12, #1 ; next row 12079f15823c34ae1e423108295e416213200bb280fAndreas Huber 12179f15823c34ae1e423108295e416213200bb280fAndreas Huber ; calculate sse 12279f15823c34ae1e423108295e416213200bb280fAndreas Huber uxtb16 r6, r8 ; byte (two pixels) to halfwords 12379f15823c34ae1e423108295e416213200bb280fAndreas Huber uxtb16 r7, r8, ror #8 ; another two pixels to halfwords 12479f15823c34ae1e423108295e416213200bb280fAndreas Huber smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1) 12579f15823c34ae1e423108295e416213200bb280fAndreas Huber smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2) 12679f15823c34ae1e423108295e416213200bb280fAndreas Huber 12779f15823c34ae1e423108295e416213200bb280fAndreas Huber bne loop 12879f15823c34ae1e423108295e416213200bb280fAndreas Huber 12979f15823c34ae1e423108295e416213200bb280fAndreas Huber ; return stuff 13079f15823c34ae1e423108295e416213200bb280fAndreas Huber ldr r1, [sp, #28] ; get address of sse 13179f15823c34ae1e423108295e416213200bb280fAndreas Huber mov r0, r4 ; return sse 13279f15823c34ae1e423108295e416213200bb280fAndreas Huber str r4, [r1] ; store sse 13379f15823c34ae1e423108295e416213200bb280fAndreas Huber 13479f15823c34ae1e423108295e416213200bb280fAndreas Huber pop {r4-r9, pc} 13579f15823c34ae1e423108295e416213200bb280fAndreas Huber 13679f15823c34ae1e423108295e416213200bb280fAndreas Huber ENDP 13779f15823c34ae1e423108295e416213200bb280fAndreas Huber 13879f15823c34ae1e423108295e416213200bb280fAndreas Huber END 139