1474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; 2474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; 4474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; Use of this source code is governed by a BSD-style license 5474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; that can be found in the LICENSE file in the root of the source 6474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; tree. An additional intellectual property rights grant can be found 7474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; in the file PATENTS. All contributing project authors may 8474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; be found in the AUTHORS file in the root of the source tree. 9474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; 10474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 11474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 12474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org EXPORT |vp8_filter_block2d_first_pass_armv6| 13474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org EXPORT |vp8_filter_block2d_first_pass_16x16_armv6| 14474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org EXPORT |vp8_filter_block2d_first_pass_8x8_armv6| 15474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org EXPORT |vp8_filter_block2d_second_pass_armv6| 16474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org EXPORT |vp8_filter4_block2d_second_pass_armv6| 17474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org EXPORT |vp8_filter_block2d_first_pass_only_armv6| 18474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org EXPORT |vp8_filter_block2d_second_pass_only_armv6| 19474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 20474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org AREA |.text|, CODE, READONLY ; name this block of code 21474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;------------------------------------- 22474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; r0 unsigned char *src_ptr 23474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; r1 short *output_ptr 24474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; r2 unsigned int src_pixels_per_line 25474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; r3 unsigned int output_width 26474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; stack unsigned int output_height 27474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; stack const short *vp8_filter 28474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;------------------------------------- 29474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; vp8_filter the input and put in the output array. Apply the 6 tap FIR filter with 30474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; the output being a 2 byte value and the intput being a 1 byte value. 31474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org|vp8_filter_block2d_first_pass_armv6| PROC 32474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stmdb sp!, {r4 - r11, lr} 33474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 34474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r11, [sp, #40] ; vp8_filter address 35474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r7, [sp, #36] ; output height 36474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 37474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org sub r2, r2, r3 ; inside loop increments input array, 38474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ; so the height loop only needs to add 39474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ; r2 - width to the input pointer 40474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 41474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mov r3, r3, lsl #1 ; multiply width by 2 because using shorts 42474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add r12, r3, #16 ; square off the output 43474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org sub sp, sp, #4 44474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 45474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r4, [r11] ; load up packed filter coefficients 46474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r5, [r11, #4] 47474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r6, [r11, #8] 48474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 49474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org str r1, [sp] ; push destination to stack 50474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mov r7, r7, lsl #16 ; height is top part of counter 51474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 52474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; six tap filter 53474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org|height_loop_1st_6| 54474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrb r8, [r0, #-2] ; load source data 55474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrb r9, [r0, #-1] 56474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrb r10, [r0], #2 57474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org orr r7, r7, r3, lsr #2 ; construct loop counter 58474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 59474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org|width_loop_1st_6| 60474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrb r11, [r0, #-1] 61474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 62474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pkhbt lr, r8, r9, lsl #16 ; r9 | r8 63474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pkhbt r8, r9, r10, lsl #16 ; r10 | r9 64474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 65474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrb r9, [r0] 66474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 67474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smuad lr, lr, r4 ; apply the filter 68474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pkhbt r10, r10, r11, lsl #16 ; r11 | r10 69474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smuad r8, r8, r4 70474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pkhbt r11, r11, r9, lsl #16 ; r9 | r11 71474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 72474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smlad lr, r10, r5, lr 73474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrb r10, [r0, #1] 74474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smlad r8, r11, r5, r8 75474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrb r11, [r0, #2] 76474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 77474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org sub r7, r7, #1 78474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 79474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pkhbt r9, r9, r10, lsl #16 ; r10 | r9 80474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pkhbt r10, r10, r11, lsl #16 ; r11 | r10 81474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 82474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smlad lr, r9, r6, lr 83474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smlad r11, r10, r6, r8 84474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 85474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ands r10, r7, #0xff ; test loop counter 86474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 87474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add lr, lr, #0x40 ; round_shift_and_clamp 88474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrneb r8, [r0, #-2] ; load data for next loop 89474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org usat lr, #8, lr, asr #7 90474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add r11, r11, #0x40 91474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrneb r9, [r0, #-1] 92474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org usat r11, #8, r11, asr #7 93474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 94474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org strh lr, [r1], r12 ; result is transposed and stored, which 95474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ; will make second pass filtering easier. 96474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrneb r10, [r0], #2 97474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org strh r11, [r1], r12 98474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 99474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org bne width_loop_1st_6 100474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 101474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r1, [sp] ; load and update dst address 102474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org subs r7, r7, #0x10000 103474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add r0, r0, r2 ; move to next input line 104474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 105474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add r1, r1, #2 ; move over to next column 106474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org str r1, [sp] 107474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 108474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org bne height_loop_1st_6 109474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 110474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add sp, sp, #4 111474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldmia sp!, {r4 - r11, pc} 112474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 113474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ENDP 114474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 115474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; -------------------------- 116474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; 16x16 version 117474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; ----------------------------- 118474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org|vp8_filter_block2d_first_pass_16x16_armv6| PROC 119474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stmdb sp!, {r4 - r11, lr} 120474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 121474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r11, [sp, #40] ; vp8_filter address 122474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r7, [sp, #36] ; output height 123474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 124474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add r4, r2, #18 ; preload next low 125474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pld [r0, r4] 126474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 127474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org sub r2, r2, r3 ; inside loop increments input array, 128474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ; so the height loop only needs to add 129474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ; r2 - width to the input pointer 130474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 131474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mov r3, r3, lsl #1 ; multiply width by 2 because using shorts 132474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add r12, r3, #16 ; square off the output 133474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org sub sp, sp, #4 134474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 135474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r4, [r11] ; load up packed filter coefficients 136474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r5, [r11, #4] 137474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r6, [r11, #8] 138474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 139474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org str r1, [sp] ; push destination to stack 140474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mov r7, r7, lsl #16 ; height is top part of counter 141474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 142474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; six tap filter 143474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org|height_loop_1st_16_6| 144474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrb r8, [r0, #-2] ; load source data 145474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrb r9, [r0, #-1] 146474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrb r10, [r0], #2 147474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org orr r7, r7, r3, lsr #2 ; construct loop counter 148474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 149474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org|width_loop_1st_16_6| 150474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrb r11, [r0, #-1] 151474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 152474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pkhbt lr, r8, r9, lsl #16 ; r9 | r8 153474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pkhbt r8, r9, r10, lsl #16 ; r10 | r9 154474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 155474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrb r9, [r0] 156474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 157474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smuad lr, lr, r4 ; apply the filter 158474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pkhbt r10, r10, r11, lsl #16 ; r11 | r10 159474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smuad r8, r8, r4 160474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pkhbt r11, r11, r9, lsl #16 ; r9 | r11 161474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 162474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smlad lr, r10, r5, lr 163474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrb r10, [r0, #1] 164474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smlad r8, r11, r5, r8 165474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrb r11, [r0, #2] 166474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 167474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org sub r7, r7, #1 168474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 169474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pkhbt r9, r9, r10, lsl #16 ; r10 | r9 170474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pkhbt r10, r10, r11, lsl #16 ; r11 | r10 171474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 172474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smlad lr, r9, r6, lr 173474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smlad r11, r10, r6, r8 174474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 175474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ands r10, r7, #0xff ; test loop counter 176474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 177474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add lr, lr, #0x40 ; round_shift_and_clamp 178474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrneb r8, [r0, #-2] ; load data for next loop 179474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org usat lr, #8, lr, asr #7 180474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add r11, r11, #0x40 181474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrneb r9, [r0, #-1] 182474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org usat r11, #8, r11, asr #7 183474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 184474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org strh lr, [r1], r12 ; result is transposed and stored, which 185474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ; will make second pass filtering easier. 186474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrneb r10, [r0], #2 187474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org strh r11, [r1], r12 188474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 189474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org bne width_loop_1st_16_6 190474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 191474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r1, [sp] ; load and update dst address 192474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org subs r7, r7, #0x10000 193474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add r0, r0, r2 ; move to next input line 194474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 195474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add r11, r2, #34 ; adding back block width(=16) 196474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pld [r0, r11] ; preload next low 197474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 198474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add r1, r1, #2 ; move over to next column 199474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org str r1, [sp] 200474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 201474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org bne height_loop_1st_16_6 202474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 203474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add sp, sp, #4 204474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldmia sp!, {r4 - r11, pc} 205474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 206474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ENDP 207474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 208474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; -------------------------- 209474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; 8x8 version 210474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; ----------------------------- 211474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org|vp8_filter_block2d_first_pass_8x8_armv6| PROC 212474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stmdb sp!, {r4 - r11, lr} 213474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 214474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r11, [sp, #40] ; vp8_filter address 215474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r7, [sp, #36] ; output height 216474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 217474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add r4, r2, #10 ; preload next low 218474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pld [r0, r4] 219474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 220474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org sub r2, r2, r3 ; inside loop increments input array, 221474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ; so the height loop only needs to add 222474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ; r2 - width to the input pointer 223474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 224474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mov r3, r3, lsl #1 ; multiply width by 2 because using shorts 225474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add r12, r3, #16 ; square off the output 226474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org sub sp, sp, #4 227474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 228474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r4, [r11] ; load up packed filter coefficients 229474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r5, [r11, #4] 230474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r6, [r11, #8] 231474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 232474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org str r1, [sp] ; push destination to stack 233474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mov r7, r7, lsl #16 ; height is top part of counter 234474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 235474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; six tap filter 236474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org|height_loop_1st_8_6| 237474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrb r8, [r0, #-2] ; load source data 238474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrb r9, [r0, #-1] 239474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrb r10, [r0], #2 240474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org orr r7, r7, r3, lsr #2 ; construct loop counter 241474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 242474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org|width_loop_1st_8_6| 243474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrb r11, [r0, #-1] 244474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 245474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pkhbt lr, r8, r9, lsl #16 ; r9 | r8 246474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pkhbt r8, r9, r10, lsl #16 ; r10 | r9 247474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 248474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrb r9, [r0] 249474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 250474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smuad lr, lr, r4 ; apply the filter 251474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pkhbt r10, r10, r11, lsl #16 ; r11 | r10 252474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smuad r8, r8, r4 253474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pkhbt r11, r11, r9, lsl #16 ; r9 | r11 254474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 255474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smlad lr, r10, r5, lr 256474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrb r10, [r0, #1] 257474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smlad r8, r11, r5, r8 258474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrb r11, [r0, #2] 259474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 260474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org sub r7, r7, #1 261474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 262474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pkhbt r9, r9, r10, lsl #16 ; r10 | r9 263474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pkhbt r10, r10, r11, lsl #16 ; r11 | r10 264474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 265474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smlad lr, r9, r6, lr 266474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smlad r11, r10, r6, r8 267474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 268474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ands r10, r7, #0xff ; test loop counter 269474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 270474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add lr, lr, #0x40 ; round_shift_and_clamp 271474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrneb r8, [r0, #-2] ; load data for next loop 272474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org usat lr, #8, lr, asr #7 273474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add r11, r11, #0x40 274474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrneb r9, [r0, #-1] 275474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org usat r11, #8, r11, asr #7 276474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 277474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org strh lr, [r1], r12 ; result is transposed and stored, which 278474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ; will make second pass filtering easier. 279474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrneb r10, [r0], #2 280474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org strh r11, [r1], r12 281474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 282474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org bne width_loop_1st_8_6 283474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 284474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r1, [sp] ; load and update dst address 285474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org subs r7, r7, #0x10000 286474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add r0, r0, r2 ; move to next input line 287474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 288474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add r11, r2, #18 ; adding back block width(=8) 289474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pld [r0, r11] ; preload next low 290474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 291474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add r1, r1, #2 ; move over to next column 292474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org str r1, [sp] 293474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 294474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org bne height_loop_1st_8_6 295474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 296474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add sp, sp, #4 297474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldmia sp!, {r4 - r11, pc} 298474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 299474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ENDP 300474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 301474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;--------------------------------- 302474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; r0 short *src_ptr, 303474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; r1 unsigned char *output_ptr, 304474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; r2 unsigned int output_pitch, 305474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; r3 unsigned int cnt, 306474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; stack const short *vp8_filter 307474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;--------------------------------- 308474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org|vp8_filter_block2d_second_pass_armv6| PROC 309474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stmdb sp!, {r4 - r11, lr} 310474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 311474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r11, [sp, #36] ; vp8_filter address 312474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org sub sp, sp, #4 313474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mov r7, r3, lsl #16 ; height is top part of counter 314474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org str r1, [sp] ; push destination to stack 315474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 316474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r4, [r11] ; load up packed filter coefficients 317474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r5, [r11, #4] 318474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r6, [r11, #8] 319474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 320474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pkhbt r12, r5, r4 ; pack the filter differently 321474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pkhbt r11, r6, r5 322474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 323474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org sub r0, r0, #4 ; offset input buffer 324474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 325474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org|height_loop_2nd| 326474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r8, [r0] ; load the data 327474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r9, [r0, #4] 328474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org orr r7, r7, r3, lsr #1 ; loop counter 329474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 330474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org|width_loop_2nd| 331474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smuad lr, r4, r8 ; apply filter 332474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org sub r7, r7, #1 333474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smulbt r8, r4, r8 334474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 335474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r10, [r0, #8] 336474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 337474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smlad lr, r5, r9, lr 338474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smladx r8, r12, r9, r8 339474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 340474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrh r9, [r0, #12] 341474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 342474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smlad lr, r6, r10, lr 343474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smladx r8, r11, r10, r8 344474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 345474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add r0, r0, #4 346474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smlatb r10, r6, r9, r8 347474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 348474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add lr, lr, #0x40 ; round_shift_and_clamp 349474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ands r8, r7, #0xff 350474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org usat lr, #8, lr, asr #7 351474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add r10, r10, #0x40 352474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org strb lr, [r1], r2 ; the result is transposed back and stored 353474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org usat r10, #8, r10, asr #7 354474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 355474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrne r8, [r0] ; load data for next loop 356474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrne r9, [r0, #4] 357474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org strb r10, [r1], r2 358474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 359474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org bne width_loop_2nd 360474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 361474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r1, [sp] ; update dst for next loop 362474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org subs r7, r7, #0x10000 363474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add r0, r0, #16 ; updata src for next loop 364474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add r1, r1, #1 365474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org str r1, [sp] 366474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 367474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org bne height_loop_2nd 368474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 369474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add sp, sp, #4 370474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldmia sp!, {r4 - r11, pc} 371474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 372474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ENDP 373474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 374474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;--------------------------------- 375474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; r0 short *src_ptr, 376474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; r1 unsigned char *output_ptr, 377474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; r2 unsigned int output_pitch, 378474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; r3 unsigned int cnt, 379474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; stack const short *vp8_filter 380474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;--------------------------------- 381474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org|vp8_filter4_block2d_second_pass_armv6| PROC 382474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stmdb sp!, {r4 - r11, lr} 383474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 384474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r11, [sp, #36] ; vp8_filter address 385474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mov r7, r3, lsl #16 ; height is top part of counter 386474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 387474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r4, [r11] ; load up packed filter coefficients 388474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add lr, r1, r3 ; save final destination pointer 389474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r5, [r11, #4] 390474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r6, [r11, #8] 391474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 392474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pkhbt r12, r5, r4 ; pack the filter differently 393474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pkhbt r11, r6, r5 394474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mov r4, #0x40 ; rounding factor (for smlad{x}) 395474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 396474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org|height_loop_2nd_4| 39710a9a0d835561a7f2300c561c514efcf374554d6fgalligan@chromium.org ldrd r8, r9, [r0, #-4] ; load the data 398474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org orr r7, r7, r3, lsr #1 ; loop counter 399474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 400474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org|width_loop_2nd_4| 401474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r10, [r0, #4]! 402474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smladx r6, r9, r12, r4 ; apply filter 403474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pkhbt r8, r9, r8 404474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smlad r5, r8, r12, r4 405474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pkhbt r8, r10, r9 406474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smladx r6, r10, r11, r6 407474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org sub r7, r7, #1 408474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smlad r5, r8, r11, r5 409474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 410474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mov r8, r9 ; shift the data for the next loop 411474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mov r9, r10 412474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 413474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org usat r6, #8, r6, asr #7 ; shift and clamp 414474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org usat r5, #8, r5, asr #7 415474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 416474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org strb r5, [r1], r2 ; the result is transposed back and stored 417474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org tst r7, #0xff 418474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org strb r6, [r1], r2 419474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 420474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org bne width_loop_2nd_4 421474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 422474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org subs r7, r7, #0x10000 423474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add r0, r0, #16 ; update src for next loop 424474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org sub r1, lr, r7, lsr #16 ; update dst for next loop 425474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 426474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org bne height_loop_2nd_4 427474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 428474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldmia sp!, {r4 - r11, pc} 429474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 430474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ENDP 431474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 432474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;------------------------------------ 433474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; r0 unsigned char *src_ptr 434474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; r1 unsigned char *output_ptr, 435474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; r2 unsigned int src_pixels_per_line 436474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; r3 unsigned int cnt, 437474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; stack unsigned int output_pitch, 438474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; stack const short *vp8_filter 439474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;------------------------------------ 440474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org|vp8_filter_block2d_first_pass_only_armv6| PROC 441474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stmdb sp!, {r4 - r11, lr} 442474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 443474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add r7, r2, r3 ; preload next low 444474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add r7, r7, #2 445474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pld [r0, r7] 446474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 447474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r4, [sp, #36] ; output pitch 448474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r11, [sp, #40] ; HFilter address 449474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org sub sp, sp, #8 450474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 451474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mov r7, r3 452474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org sub r2, r2, r3 ; inside loop increments input array, 453474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ; so the height loop only needs to add 454474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ; r2 - width to the input pointer 455474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 456474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org sub r4, r4, r3 457474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org str r4, [sp] ; save modified output pitch 458474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org str r2, [sp, #4] 459474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 460474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mov r2, #0x40 461474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 462474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r4, [r11] ; load up packed filter coefficients 463474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r5, [r11, #4] 464474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r6, [r11, #8] 465474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 466474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; six tap filter 467474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org|height_loop_1st_only_6| 468474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrb r8, [r0, #-2] ; load data 469474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrb r9, [r0, #-1] 470474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrb r10, [r0], #2 471474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 472474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mov r12, r3, lsr #1 ; loop counter 473474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 474474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org|width_loop_1st_only_6| 475474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrb r11, [r0, #-1] 476474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 477474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pkhbt lr, r8, r9, lsl #16 ; r9 | r8 478474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pkhbt r8, r9, r10, lsl #16 ; r10 | r9 479474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 480474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrb r9, [r0] 481474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 482474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;; smuad lr, lr, r4 483474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smlad lr, lr, r4, r2 484474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pkhbt r10, r10, r11, lsl #16 ; r11 | r10 485474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;; smuad r8, r8, r4 486474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smlad r8, r8, r4, r2 487474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pkhbt r11, r11, r9, lsl #16 ; r9 | r11 488474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 489474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smlad lr, r10, r5, lr 490474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrb r10, [r0, #1] 491474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smlad r8, r11, r5, r8 492474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrb r11, [r0, #2] 493474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 494474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org subs r12, r12, #1 495474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 496474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pkhbt r9, r9, r10, lsl #16 ; r10 | r9 497474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pkhbt r10, r10, r11, lsl #16 ; r11 | r10 498474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 499474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smlad lr, r9, r6, lr 500474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smlad r10, r10, r6, r8 501474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 502474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;; add lr, lr, #0x40 ; round_shift_and_clamp 503474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrneb r8, [r0, #-2] ; load data for next loop 504474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org usat lr, #8, lr, asr #7 505474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;; add r10, r10, #0x40 506474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org strb lr, [r1], #1 ; store the result 507474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org usat r10, #8, r10, asr #7 508474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 509474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrneb r9, [r0, #-1] 510474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org strb r10, [r1], #1 511474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrneb r10, [r0], #2 512474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 513474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org bne width_loop_1st_only_6 514474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 515474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr lr, [sp] ; load back output pitch 516474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r12, [sp, #4] ; load back output pitch 517474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org subs r7, r7, #1 518474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add r0, r0, r12 ; updata src for next loop 519474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 520474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add r11, r12, r3 ; preload next low 521474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add r11, r11, #2 522474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pld [r0, r11] 523474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 524474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add r1, r1, lr ; update dst for next loop 525474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 526474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org bne height_loop_1st_only_6 527474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 528474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add sp, sp, #8 529474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldmia sp!, {r4 - r11, pc} 530474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ENDP ; |vp8_filter_block2d_first_pass_only_armv6| 531474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 532474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 533474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;------------------------------------ 534474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; r0 unsigned char *src_ptr, 535474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; r1 unsigned char *output_ptr, 536474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; r2 unsigned int src_pixels_per_line 537474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; r3 unsigned int cnt, 538474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; stack unsigned int output_pitch, 539474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; stack const short *vp8_filter 540474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;------------------------------------ 541474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org|vp8_filter_block2d_second_pass_only_armv6| PROC 542474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stmdb sp!, {r4 - r11, lr} 543474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 544474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r11, [sp, #40] ; VFilter address 545474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r12, [sp, #36] ; output pitch 546474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 547474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mov r7, r3, lsl #16 ; height is top part of counter 548474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org sub r0, r0, r2, lsl #1 ; need 6 elements for filtering, 2 before, 3 after 549474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 550474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org sub sp, sp, #8 551474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 552474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r4, [r11] ; load up packed filter coefficients 553474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r5, [r11, #4] 554474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r6, [r11, #8] 555474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 556474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org str r0, [sp] ; save r0 to stack 557474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org str r1, [sp, #4] ; save dst to stack 558474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 559474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; six tap filter 560474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org|width_loop_2nd_only_6| 561474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrb r8, [r0], r2 ; load data 562474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org orr r7, r7, r3 ; loop counter 563474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrb r9, [r0], r2 564474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrb r10, [r0], r2 565474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 566474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org|height_loop_2nd_only_6| 567474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ; filter first column in this inner loop, than, move to next colum. 568474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrb r11, [r0], r2 569474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 570474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pkhbt lr, r8, r9, lsl #16 ; r9 | r8 571474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pkhbt r8, r9, r10, lsl #16 ; r10 | r9 572474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 573474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrb r9, [r0], r2 574474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 575474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smuad lr, lr, r4 576474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pkhbt r10, r10, r11, lsl #16 ; r11 | r10 577474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smuad r8, r8, r4 578474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pkhbt r11, r11, r9, lsl #16 ; r9 | r11 579474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 580474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smlad lr, r10, r5, lr 581474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrb r10, [r0], r2 582474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smlad r8, r11, r5, r8 583474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrb r11, [r0] 584474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 585474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org sub r7, r7, #2 586474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org sub r0, r0, r2, lsl #2 587474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 588474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pkhbt r9, r9, r10, lsl #16 ; r10 | r9 589474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org pkhbt r10, r10, r11, lsl #16 ; r11 | r10 590474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 591474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smlad lr, r9, r6, lr 592474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org smlad r10, r10, r6, r8 593474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 594474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ands r9, r7, #0xff 595474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 596474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add lr, lr, #0x40 ; round_shift_and_clamp 597474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrneb r8, [r0], r2 ; load data for next loop 598474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org usat lr, #8, lr, asr #7 599474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add r10, r10, #0x40 600474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org strb lr, [r1], r12 ; store the result for the column 601474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org usat r10, #8, r10, asr #7 602474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 603474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrneb r9, [r0], r2 604474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org strb r10, [r1], r12 605474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldrneb r10, [r0], r2 606474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 607474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org bne height_loop_2nd_only_6 608474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 609474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r0, [sp] 610474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldr r1, [sp, #4] 611474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org subs r7, r7, #0x10000 612474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add r0, r0, #1 ; move to filter next column 613474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org str r0, [sp] 614474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add r1, r1, #1 615474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org str r1, [sp, #4] 616474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 617474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org bne width_loop_2nd_only_6 618474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 619474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add sp, sp, #8 620474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 621474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ldmia sp!, {r4 - r11, pc} 622474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ENDP ; |vp8_filter_block2d_second_pass_only_armv6| 623474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 624474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org END 625