1474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; 2474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; Copyright (c) 2011 The WebM project authors. All Rights Reserved. 3474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; 4474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; Use of this source code is governed by a BSD-style license 5474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; that can be found in the LICENSE file in the root of the source 6474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; tree. An additional intellectual property rights grant can be found 7474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; in the file PATENTS. All contributing project authors may 8474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; be found in the AUTHORS file in the root of the source tree. 9474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; 10474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 11474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 12474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org EXPORT |vp8_mse16x16_armv6| 13474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 14474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ARM 15474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 16474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org AREA ||.text||, CODE, READONLY, ALIGN=2 17474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 18474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; r0 unsigned char *src_ptr 19474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; r1 int source_stride 20474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; r2 unsigned char *ref_ptr 21474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; r3 int recon_stride 22474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; stack unsigned int *sse 23474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; 24474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;note: Based on vp8_variance16x16_armv6. In this function, sum is never used. 25474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; So, we can remove this part of calculation. 26474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 27474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org|vp8_mse16x16_armv6| PROC 28474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 29474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org push {r4-r9, lr} 30474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 31474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pld [r0, r1, lsl #0] 32474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pld [r2, r3, lsl #0] 33474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 34474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mov r12, #16 ; set loop counter to 16 (=block height) 35474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mov r4, #0 ; initialize sse = 0 36474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 37474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgloop 38474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ; 1st 4 pixels 39474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r5, [r0, #0x0] ; load 4 src pixels 40474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r6, [r2, #0x0] ; load 4 ref pixels 41474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 42474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mov lr, #0 ; constant zero 43474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 44474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org usub8 r8, r5, r6 ; calculate difference 45474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pld [r0, r1, lsl #1] 46474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org sel r7, r8, lr ; select bytes with positive difference 47474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org usub8 r9, r6, r5 ; calculate difference with reversed operands 48474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pld [r2, r3, lsl #1] 49474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org sel r8, r9, lr ; select bytes with negative difference 50474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 51474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ; calculate partial sums 52474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org usad8 r5, r7, lr ; calculate sum of positive differences 53474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org usad8 r6, r8, lr ; calculate sum of negative differences 54474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org orr r8, r8, r7 ; differences of all 4 pixels 55474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 56474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r5, [r0, #0x4] ; load 4 src pixels 57474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 58474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ; calculate sse 59474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org uxtb16 r6, r8 ; byte (two pixels) to halfwords 60474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org uxtb16 r7, r8, ror #8 ; another two pixels to halfwords 61474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1) 62474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 63474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ; 2nd 4 pixels 64474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r6, [r2, #0x4] ; load 4 ref pixels 65474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2) 66474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 67474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org usub8 r8, r5, r6 ; calculate difference 68474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org sel r7, r8, lr ; select bytes with positive difference 69474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org usub8 r9, r6, r5 ; calculate difference with reversed operands 70474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org sel r8, r9, lr ; select bytes with negative difference 71474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 72474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ; calculate partial sums 73474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org usad8 r5, r7, lr ; calculate sum of positive differences 74474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org usad8 r6, r8, lr ; calculate sum of negative differences 75474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org orr r8, r8, r7 ; differences of all 4 pixels 76474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r5, [r0, #0x8] ; load 4 src pixels 77474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ; calculate sse 78474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org uxtb16 r6, r8 ; byte (two pixels) to halfwords 79474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org uxtb16 r7, r8, ror #8 ; another two pixels to halfwords 80474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1) 81474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 82474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ; 3rd 4 pixels 83474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r6, [r2, #0x8] ; load 4 ref pixels 84474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2) 85474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 86474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org usub8 r8, r5, r6 ; calculate difference 87474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org sel r7, r8, lr ; select bytes with positive difference 88474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org usub8 r9, r6, r5 ; calculate difference with reversed operands 89474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org sel r8, r9, lr ; select bytes with negative difference 90474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 91474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ; calculate partial sums 92474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org usad8 r5, r7, lr ; calculate sum of positive differences 93474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org usad8 r6, r8, lr ; calculate sum of negative differences 94474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org orr r8, r8, r7 ; differences of all 4 pixels 95474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 96474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r5, [r0, #0xc] ; load 4 src pixels 97474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 98474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ; calculate sse 99474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org uxtb16 r6, r8 ; byte (two pixels) to halfwords 100474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org uxtb16 r7, r8, ror #8 ; another two pixels to halfwords 101474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1) 102474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 103474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ; 4th 4 pixels 104474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r6, [r2, #0xc] ; load 4 ref pixels 105474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2) 106474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 107474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org usub8 r8, r5, r6 ; calculate difference 108474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add r0, r0, r1 ; set src_ptr to next row 109474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org sel r7, r8, lr ; select bytes with positive difference 110474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org usub8 r9, r6, r5 ; calculate difference with reversed operands 111474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add r2, r2, r3 ; set dst_ptr to next row 112474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org sel r8, r9, lr ; select bytes with negative difference 113474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 114474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ; calculate partial sums 115474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org usad8 r5, r7, lr ; calculate sum of positive differences 116474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org usad8 r6, r8, lr ; calculate sum of negative differences 117474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org orr r8, r8, r7 ; differences of all 4 pixels 118474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 119474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org subs r12, r12, #1 ; next row 120474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 121474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ; calculate sse 122474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org uxtb16 r6, r8 ; byte (two pixels) to halfwords 123474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org uxtb16 r7, r8, ror #8 ; another two pixels to halfwords 124474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1) 125474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2) 126474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 127474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org bne loop 128474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 129474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ; return stuff 130474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r1, [sp, #28] ; get address of sse 131474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mov r0, r4 ; return sse 132474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org str r4, [r1] ; store sse 133474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 134474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pop {r4-r9, pc} 135474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 136474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ENDP 137474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 138474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org END 139