1233d2500723e5594f3e7c70896ffeeef32b9c950ywan; 2233d2500723e5594f3e7c70896ffeeef32b9c950ywan; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3233d2500723e5594f3e7c70896ffeeef32b9c950ywan; 4233d2500723e5594f3e7c70896ffeeef32b9c950ywan; Use of this source code is governed by a BSD-style license 5233d2500723e5594f3e7c70896ffeeef32b9c950ywan; that can be found in the LICENSE file in the root of the source 6233d2500723e5594f3e7c70896ffeeef32b9c950ywan; tree. An additional intellectual property rights grant can be found 7233d2500723e5594f3e7c70896ffeeef32b9c950ywan; in the file PATENTS. All contributing project authors may 8233d2500723e5594f3e7c70896ffeeef32b9c950ywan; be found in the AUTHORS file in the root of the source tree. 9233d2500723e5594f3e7c70896ffeeef32b9c950ywan; 10233d2500723e5594f3e7c70896ffeeef32b9c950ywan 11233d2500723e5594f3e7c70896ffeeef32b9c950ywan 12233d2500723e5594f3e7c70896ffeeef32b9c950ywan EXPORT |vp8_loop_filter_horizontal_edge_armv6| 13233d2500723e5594f3e7c70896ffeeef32b9c950ywan EXPORT |vp8_mbloop_filter_horizontal_edge_armv6| 14233d2500723e5594f3e7c70896ffeeef32b9c950ywan EXPORT |vp8_loop_filter_vertical_edge_armv6| 15233d2500723e5594f3e7c70896ffeeef32b9c950ywan EXPORT |vp8_mbloop_filter_vertical_edge_armv6| 16233d2500723e5594f3e7c70896ffeeef32b9c950ywan 17233d2500723e5594f3e7c70896ffeeef32b9c950ywan AREA |.text|, CODE, READONLY ; name this block of code 18233d2500723e5594f3e7c70896ffeeef32b9c950ywan 19233d2500723e5594f3e7c70896ffeeef32b9c950ywan MACRO 20233d2500723e5594f3e7c70896ffeeef32b9c950ywan TRANSPOSE_MATRIX $a0, $a1, $a2, $a3, $b0, $b1, $b2, $b3 21233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; input: $a0, $a1, $a2, $a3; output: $b0, $b1, $b2, $b3 22233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; a0: 03 02 01 00 23233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; a1: 13 12 11 10 24233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; a2: 23 22 21 20 25233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; a3: 33 32 31 30 26233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; b3 b2 b1 b0 27233d2500723e5594f3e7c70896ffeeef32b9c950ywan 28233d2500723e5594f3e7c70896ffeeef32b9c950ywan uxtb16 $b1, $a1 ; xx 12 xx 10 29233d2500723e5594f3e7c70896ffeeef32b9c950ywan uxtb16 $b0, $a0 ; xx 02 xx 00 30233d2500723e5594f3e7c70896ffeeef32b9c950ywan uxtb16 $b3, $a3 ; xx 32 xx 30 31233d2500723e5594f3e7c70896ffeeef32b9c950ywan uxtb16 $b2, $a2 ; xx 22 xx 20 32233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr $b1, $b0, $b1, lsl #8 ; 12 02 10 00 33233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr $b3, $b2, $b3, lsl #8 ; 32 22 30 20 34233d2500723e5594f3e7c70896ffeeef32b9c950ywan 35233d2500723e5594f3e7c70896ffeeef32b9c950ywan uxtb16 $a1, $a1, ror #8 ; xx 13 xx 11 36233d2500723e5594f3e7c70896ffeeef32b9c950ywan uxtb16 $a3, $a3, ror #8 ; xx 33 xx 31 37233d2500723e5594f3e7c70896ffeeef32b9c950ywan uxtb16 $a0, $a0, ror #8 ; xx 03 xx 01 38233d2500723e5594f3e7c70896ffeeef32b9c950ywan uxtb16 $a2, $a2, ror #8 ; xx 23 xx 21 39233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr $a0, $a0, $a1, lsl #8 ; 13 03 11 01 40233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr $a2, $a2, $a3, lsl #8 ; 33 23 31 21 41233d2500723e5594f3e7c70896ffeeef32b9c950ywan 42233d2500723e5594f3e7c70896ffeeef32b9c950ywan pkhtb $b2, $b3, $b1, asr #16 ; 32 22 12 02 -- p1 43233d2500723e5594f3e7c70896ffeeef32b9c950ywan pkhbt $b0, $b1, $b3, lsl #16 ; 30 20 10 00 -- p3 44233d2500723e5594f3e7c70896ffeeef32b9c950ywan 45233d2500723e5594f3e7c70896ffeeef32b9c950ywan pkhtb $b3, $a2, $a0, asr #16 ; 33 23 13 03 -- p0 46233d2500723e5594f3e7c70896ffeeef32b9c950ywan pkhbt $b1, $a0, $a2, lsl #16 ; 31 21 11 01 -- p2 47233d2500723e5594f3e7c70896ffeeef32b9c950ywan MEND 48233d2500723e5594f3e7c70896ffeeef32b9c950ywan 49233d2500723e5594f3e7c70896ffeeef32b9c950ywan 50233d2500723e5594f3e7c70896ffeeef32b9c950ywansrc RN r0 51233d2500723e5594f3e7c70896ffeeef32b9c950ywanpstep RN r1 52233d2500723e5594f3e7c70896ffeeef32b9c950ywancount RN r5 53233d2500723e5594f3e7c70896ffeeef32b9c950ywan 54233d2500723e5594f3e7c70896ffeeef32b9c950ywan;r0 unsigned char *src_ptr, 55233d2500723e5594f3e7c70896ffeeef32b9c950ywan;r1 int src_pixel_step, 56233d2500723e5594f3e7c70896ffeeef32b9c950ywan;r2 const char *blimit, 57233d2500723e5594f3e7c70896ffeeef32b9c950ywan;r3 const char *limit, 58233d2500723e5594f3e7c70896ffeeef32b9c950ywan;stack const char *thresh, 59233d2500723e5594f3e7c70896ffeeef32b9c950ywan;stack int count 60233d2500723e5594f3e7c70896ffeeef32b9c950ywan 61233d2500723e5594f3e7c70896ffeeef32b9c950ywan;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- 62233d2500723e5594f3e7c70896ffeeef32b9c950ywan|vp8_loop_filter_horizontal_edge_armv6| PROC 63233d2500723e5594f3e7c70896ffeeef32b9c950ywan;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- 64233d2500723e5594f3e7c70896ffeeef32b9c950ywan stmdb sp!, {r4 - r11, lr} 65233d2500723e5594f3e7c70896ffeeef32b9c950ywan 66233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub src, src, pstep, lsl #2 ; move src pointer down by 4 lines 67233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr count, [sp, #40] ; count for 8-in-parallel 68233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r6, [sp, #36] ; load thresh address 69233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub sp, sp, #16 ; create temp buffer 70233d2500723e5594f3e7c70896ffeeef32b9c950ywan 71233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r9, [src], pstep ; p3 72233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrb r4, [r2] ; blimit 73233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r10, [src], pstep ; p2 74233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrb r2, [r3] ; limit 75233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r11, [src], pstep ; p1 76233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r4, r4, r4, lsl #8 77233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrb r3, [r6] ; thresh 78233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r2, r2, r2, lsl #8 79233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov count, count, lsl #1 ; 4-in-parallel 80233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r4, r4, r4, lsl #16 81233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r3, r3, r3, lsl #8 82233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r2, r2, r2, lsl #16 83233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r3, r3, r3, lsl #16 84233d2500723e5594f3e7c70896ffeeef32b9c950ywan 85233d2500723e5594f3e7c70896ffeeef32b9c950ywan|Hnext8| 86233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; vp8_filter_mask() function 87233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; calculate breakout conditions 88233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r12, [src], pstep ; p0 89233d2500723e5594f3e7c70896ffeeef32b9c950ywan 90233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r6, r9, r10 ; p3 - p2 91233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r7, r10, r9 ; p2 - p3 92233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r8, r10, r11 ; p2 - p1 93233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r10, r11, r10 ; p1 - p2 94233d2500723e5594f3e7c70896ffeeef32b9c950ywan 95233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r6, r6, r7 ; abs (p3-p2) 96233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r8, r8, r10 ; abs (p2-p1) 97233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 lr, r6, r2 ; compare to limit. lr: vp8_filter_mask 98233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r8, r8, r2 ; compare to limit 99233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r6, r11, r12 ; p1 - p0 100233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr lr, lr, r8 101233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r7, r12, r11 ; p0 - p1 102233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r9, [src], pstep ; q0 103233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r10, [src], pstep ; q1 104233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r6, r6, r7 ; abs (p1-p0) 105233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r7, r6, r2 ; compare to limit 106233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r8, r6, r3 ; compare to thresh -- save r8 for later 107233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr lr, lr, r7 108233d2500723e5594f3e7c70896ffeeef32b9c950ywan 109233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r6, r11, r10 ; p1 - q1 110233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r7, r10, r11 ; q1 - p1 111233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r11, r12, r9 ; p0 - q0 112233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r12, r9, r12 ; q0 - p0 113233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r6, r6, r7 ; abs (p1-q1) 114233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r7, c0x7F7F7F7F 115233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r12, r11, r12 ; abs (p0-q0) 116233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r11, [src], pstep ; q2 117233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqadd8 r12, r12, r12 ; abs (p0-q0) * 2 118233d2500723e5594f3e7c70896ffeeef32b9c950ywan and r6, r7, r6, lsr #1 ; abs (p1-q1) / 2 119233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r7, r9, r10 ; q0 - q1 120233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqadd8 r12, r12, r6 ; abs (p0-q0)*2 + abs (p1-q1)/2 121233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r6, r10, r9 ; q1 - q0 122233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r12, r12, r4 ; compare to flimit 123233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r9, r11, r10 ; q2 - q1 124233d2500723e5594f3e7c70896ffeeef32b9c950ywan 125233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr lr, lr, r12 126233d2500723e5594f3e7c70896ffeeef32b9c950ywan 127233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r12, [src], pstep ; q3 128233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r10, r10, r11 ; q1 - q2 129233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r6, r7, r6 ; abs (q1-q0) 130233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r10, r9, r10 ; abs (q2-q1) 131233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r7, r6, r2 ; compare to limit 132233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r10, r10, r2 ; compare to limit 133233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r6, r6, r3 ; compare to thresh -- save r6 for later 134233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr lr, lr, r7 135233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr lr, lr, r10 136233d2500723e5594f3e7c70896ffeeef32b9c950ywan 137233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r10, r12, r11 ; q3 - q2 138233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r9, r11, r12 ; q2 - q3 139233d2500723e5594f3e7c70896ffeeef32b9c950ywan 140233d2500723e5594f3e7c70896ffeeef32b9c950ywan mvn r11, #0 ; r11 == -1 141233d2500723e5594f3e7c70896ffeeef32b9c950ywan 142233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r10, r10, r9 ; abs (q3-q2) 143233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r10, r10, r2 ; compare to limit 144233d2500723e5594f3e7c70896ffeeef32b9c950ywan 145233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov r12, #0 146233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr lr, lr, r10 147233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub src, src, pstep, lsl #2 148233d2500723e5594f3e7c70896ffeeef32b9c950ywan 149233d2500723e5594f3e7c70896ffeeef32b9c950ywan usub8 lr, r12, lr ; use usub8 instead of ssub8 150233d2500723e5594f3e7c70896ffeeef32b9c950ywan sel lr, r11, r12 ; filter mask: lr 151233d2500723e5594f3e7c70896ffeeef32b9c950ywan 152233d2500723e5594f3e7c70896ffeeef32b9c950ywan cmp lr, #0 153233d2500723e5594f3e7c70896ffeeef32b9c950ywan beq hskip_filter ; skip filtering 154233d2500723e5594f3e7c70896ffeeef32b9c950ywan 155233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub src, src, pstep, lsl #1 ; move src pointer down by 6 lines 156233d2500723e5594f3e7c70896ffeeef32b9c950ywan 157233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;vp8_hevmask() function 158233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;calculate high edge variance 159233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r10, r6, r8 ; calculate vp8_hevmask 160233d2500723e5594f3e7c70896ffeeef32b9c950ywan 161233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r7, [src], pstep ; p1 162233d2500723e5594f3e7c70896ffeeef32b9c950ywan 163233d2500723e5594f3e7c70896ffeeef32b9c950ywan usub8 r10, r12, r10 ; use usub8 instead of ssub8 164233d2500723e5594f3e7c70896ffeeef32b9c950ywan sel r6, r12, r11 ; obtain vp8_hevmask: r6 165233d2500723e5594f3e7c70896ffeeef32b9c950ywan 166233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;vp8_filter() function 167233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r8, [src], pstep ; p0 168233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r12, c0x80808080 169233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r9, [src], pstep ; q0 170233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r10, [src], pstep ; q1 171233d2500723e5594f3e7c70896ffeeef32b9c950ywan 172233d2500723e5594f3e7c70896ffeeef32b9c950ywan eor r7, r7, r12 ; p1 offset to convert to a signed value 173233d2500723e5594f3e7c70896ffeeef32b9c950ywan eor r8, r8, r12 ; p0 offset to convert to a signed value 174233d2500723e5594f3e7c70896ffeeef32b9c950ywan eor r9, r9, r12 ; q0 offset to convert to a signed value 175233d2500723e5594f3e7c70896ffeeef32b9c950ywan eor r10, r10, r12 ; q1 offset to convert to a signed value 176233d2500723e5594f3e7c70896ffeeef32b9c950ywan 177233d2500723e5594f3e7c70896ffeeef32b9c950ywan str r9, [sp] ; store qs0 temporarily 178233d2500723e5594f3e7c70896ffeeef32b9c950ywan str r8, [sp, #4] ; store ps0 temporarily 179233d2500723e5594f3e7c70896ffeeef32b9c950ywan str r10, [sp, #8] ; store qs1 temporarily 180233d2500723e5594f3e7c70896ffeeef32b9c950ywan str r7, [sp, #12] ; store ps1 temporarily 181233d2500723e5594f3e7c70896ffeeef32b9c950ywan 182233d2500723e5594f3e7c70896ffeeef32b9c950ywan qsub8 r7, r7, r10 ; vp8_signed_char_clamp(ps1-qs1) 183233d2500723e5594f3e7c70896ffeeef32b9c950ywan qsub8 r8, r9, r8 ; vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0)) 184233d2500723e5594f3e7c70896ffeeef32b9c950ywan 185233d2500723e5594f3e7c70896ffeeef32b9c950ywan and r7, r7, r6 ; vp8_filter (r7) &= hev 186233d2500723e5594f3e7c70896ffeeef32b9c950ywan 187233d2500723e5594f3e7c70896ffeeef32b9c950ywan qadd8 r7, r7, r8 188233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r9, c0x03030303 ; r9 = 3 --modified for vp8 189233d2500723e5594f3e7c70896ffeeef32b9c950ywan 190233d2500723e5594f3e7c70896ffeeef32b9c950ywan qadd8 r7, r7, r8 191233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r10, c0x04040404 192233d2500723e5594f3e7c70896ffeeef32b9c950ywan 193233d2500723e5594f3e7c70896ffeeef32b9c950ywan qadd8 r7, r7, r8 194233d2500723e5594f3e7c70896ffeeef32b9c950ywan and r7, r7, lr ; vp8_filter &= mask; 195233d2500723e5594f3e7c70896ffeeef32b9c950ywan 196233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;modify code for vp8 -- Filter1 = vp8_filter (r7) 197233d2500723e5594f3e7c70896ffeeef32b9c950ywan qadd8 r8 , r7 , r9 ; Filter2 (r8) = vp8_signed_char_clamp(vp8_filter+3) 198233d2500723e5594f3e7c70896ffeeef32b9c950ywan qadd8 r7 , r7 , r10 ; vp8_filter = vp8_signed_char_clamp(vp8_filter+4) 199233d2500723e5594f3e7c70896ffeeef32b9c950ywan 200233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov r9, #0 201233d2500723e5594f3e7c70896ffeeef32b9c950ywan shadd8 r8 , r8 , r9 ; Filter2 >>= 3 202233d2500723e5594f3e7c70896ffeeef32b9c950ywan shadd8 r7 , r7 , r9 ; vp8_filter >>= 3 203233d2500723e5594f3e7c70896ffeeef32b9c950ywan shadd8 r8 , r8 , r9 204233d2500723e5594f3e7c70896ffeeef32b9c950ywan shadd8 r7 , r7 , r9 205233d2500723e5594f3e7c70896ffeeef32b9c950ywan shadd8 lr , r8 , r9 ; lr: Filter2 206233d2500723e5594f3e7c70896ffeeef32b9c950ywan shadd8 r7 , r7 , r9 ; r7: filter 207233d2500723e5594f3e7c70896ffeeef32b9c950ywan 208233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;usub8 lr, r8, r10 ; s = (s==4)*-1 209233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;sel lr, r11, r9 210233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;usub8 r8, r10, r8 211233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;sel r8, r11, r9 212233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;and r8, r8, lr ; -1 for each element that equals 4 213233d2500723e5594f3e7c70896ffeeef32b9c950ywan 214233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;calculate output 215233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;qadd8 lr, r8, r7 ; u = vp8_signed_char_clamp(s + vp8_filter) 216233d2500723e5594f3e7c70896ffeeef32b9c950ywan 217233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r8, [sp] ; load qs0 218233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r9, [sp, #4] ; load ps0 219233d2500723e5594f3e7c70896ffeeef32b9c950ywan 220233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r10, c0x01010101 221233d2500723e5594f3e7c70896ffeeef32b9c950ywan 222233d2500723e5594f3e7c70896ffeeef32b9c950ywan qsub8 r8 ,r8, r7 ; u = vp8_signed_char_clamp(qs0 - vp8_filter) 223233d2500723e5594f3e7c70896ffeeef32b9c950ywan qadd8 r9, r9, lr ; u = vp8_signed_char_clamp(ps0 + Filter2) 224233d2500723e5594f3e7c70896ffeeef32b9c950ywan 225233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;end of modification for vp8 226233d2500723e5594f3e7c70896ffeeef32b9c950ywan 227233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov lr, #0 228233d2500723e5594f3e7c70896ffeeef32b9c950ywan sadd8 r7, r7 , r10 ; vp8_filter += 1 229233d2500723e5594f3e7c70896ffeeef32b9c950ywan shadd8 r7, r7, lr ; vp8_filter >>= 1 230233d2500723e5594f3e7c70896ffeeef32b9c950ywan 231233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r11, [sp, #12] ; load ps1 232233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r10, [sp, #8] ; load qs1 233233d2500723e5594f3e7c70896ffeeef32b9c950ywan 234233d2500723e5594f3e7c70896ffeeef32b9c950ywan bic r7, r7, r6 ; vp8_filter &= ~hev 235233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub src, src, pstep, lsl #2 236233d2500723e5594f3e7c70896ffeeef32b9c950ywan 237233d2500723e5594f3e7c70896ffeeef32b9c950ywan qadd8 r11, r11, r7 ; u = vp8_signed_char_clamp(ps1 + vp8_filter) 238233d2500723e5594f3e7c70896ffeeef32b9c950ywan qsub8 r10, r10,r7 ; u = vp8_signed_char_clamp(qs1 - vp8_filter) 239233d2500723e5594f3e7c70896ffeeef32b9c950ywan 240233d2500723e5594f3e7c70896ffeeef32b9c950ywan eor r11, r11, r12 ; *op1 = u^0x80 241233d2500723e5594f3e7c70896ffeeef32b9c950ywan str r11, [src], pstep ; store op1 242233d2500723e5594f3e7c70896ffeeef32b9c950ywan eor r9, r9, r12 ; *op0 = u^0x80 243233d2500723e5594f3e7c70896ffeeef32b9c950ywan str r9, [src], pstep ; store op0 result 244233d2500723e5594f3e7c70896ffeeef32b9c950ywan eor r8, r8, r12 ; *oq0 = u^0x80 245233d2500723e5594f3e7c70896ffeeef32b9c950ywan str r8, [src], pstep ; store oq0 result 246233d2500723e5594f3e7c70896ffeeef32b9c950ywan eor r10, r10, r12 ; *oq1 = u^0x80 247233d2500723e5594f3e7c70896ffeeef32b9c950ywan str r10, [src], pstep ; store oq1 248233d2500723e5594f3e7c70896ffeeef32b9c950ywan 249233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub src, src, pstep, lsl #1 250233d2500723e5594f3e7c70896ffeeef32b9c950ywan 251233d2500723e5594f3e7c70896ffeeef32b9c950ywan|hskip_filter| 252233d2500723e5594f3e7c70896ffeeef32b9c950ywan add src, src, #4 253233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub src, src, pstep, lsl #2 254233d2500723e5594f3e7c70896ffeeef32b9c950ywan 255233d2500723e5594f3e7c70896ffeeef32b9c950ywan subs count, count, #1 256233d2500723e5594f3e7c70896ffeeef32b9c950ywan 257233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrne r9, [src], pstep ; p3 258233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrne r10, [src], pstep ; p2 259233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrne r11, [src], pstep ; p1 260233d2500723e5594f3e7c70896ffeeef32b9c950ywan 261233d2500723e5594f3e7c70896ffeeef32b9c950ywan bne Hnext8 262233d2500723e5594f3e7c70896ffeeef32b9c950ywan 263233d2500723e5594f3e7c70896ffeeef32b9c950ywan add sp, sp, #16 264233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldmia sp!, {r4 - r11, pc} 265233d2500723e5594f3e7c70896ffeeef32b9c950ywan ENDP ; |vp8_loop_filter_horizontal_edge_armv6| 266233d2500723e5594f3e7c70896ffeeef32b9c950ywan 267233d2500723e5594f3e7c70896ffeeef32b9c950ywan 268233d2500723e5594f3e7c70896ffeeef32b9c950ywan;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- 269233d2500723e5594f3e7c70896ffeeef32b9c950ywan|vp8_mbloop_filter_horizontal_edge_armv6| PROC 270233d2500723e5594f3e7c70896ffeeef32b9c950ywan;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- 271233d2500723e5594f3e7c70896ffeeef32b9c950ywan stmdb sp!, {r4 - r11, lr} 272233d2500723e5594f3e7c70896ffeeef32b9c950ywan 273233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub src, src, pstep, lsl #2 ; move src pointer down by 4 lines 274233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr count, [sp, #40] ; count for 8-in-parallel 275233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r6, [sp, #36] ; load thresh address 276233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub sp, sp, #16 ; create temp buffer 277233d2500723e5594f3e7c70896ffeeef32b9c950ywan 278233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r9, [src], pstep ; p3 279233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrb r4, [r2] ; blimit 280233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r10, [src], pstep ; p2 281233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrb r2, [r3] ; limit 282233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r11, [src], pstep ; p1 283233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r4, r4, r4, lsl #8 284233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrb r3, [r6] ; thresh 285233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r2, r2, r2, lsl #8 286233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov count, count, lsl #1 ; 4-in-parallel 287233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r4, r4, r4, lsl #16 288233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r3, r3, r3, lsl #8 289233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r2, r2, r2, lsl #16 290233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r3, r3, r3, lsl #16 291233d2500723e5594f3e7c70896ffeeef32b9c950ywan 292233d2500723e5594f3e7c70896ffeeef32b9c950ywan|MBHnext8| 293233d2500723e5594f3e7c70896ffeeef32b9c950ywan 294233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; vp8_filter_mask() function 295233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; calculate breakout conditions 296233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r12, [src], pstep ; p0 297233d2500723e5594f3e7c70896ffeeef32b9c950ywan 298233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r6, r9, r10 ; p3 - p2 299233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r7, r10, r9 ; p2 - p3 300233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r8, r10, r11 ; p2 - p1 301233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r10, r11, r10 ; p1 - p2 302233d2500723e5594f3e7c70896ffeeef32b9c950ywan 303233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r6, r6, r7 ; abs (p3-p2) 304233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r8, r8, r10 ; abs (p2-p1) 305233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 lr, r6, r2 ; compare to limit. lr: vp8_filter_mask 306233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r8, r8, r2 ; compare to limit 307233d2500723e5594f3e7c70896ffeeef32b9c950ywan 308233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r6, r11, r12 ; p1 - p0 309233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr lr, lr, r8 310233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r7, r12, r11 ; p0 - p1 311233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r9, [src], pstep ; q0 312233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r10, [src], pstep ; q1 313233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r6, r6, r7 ; abs (p1-p0) 314233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r7, r6, r2 ; compare to limit 315233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r8, r6, r3 ; compare to thresh -- save r8 for later 316233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr lr, lr, r7 317233d2500723e5594f3e7c70896ffeeef32b9c950ywan 318233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r6, r11, r10 ; p1 - q1 319233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r7, r10, r11 ; q1 - p1 320233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r11, r12, r9 ; p0 - q0 321233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r12, r9, r12 ; q0 - p0 322233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r6, r6, r7 ; abs (p1-q1) 323233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r7, c0x7F7F7F7F 324233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r12, r11, r12 ; abs (p0-q0) 325233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r11, [src], pstep ; q2 326233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqadd8 r12, r12, r12 ; abs (p0-q0) * 2 327233d2500723e5594f3e7c70896ffeeef32b9c950ywan and r6, r7, r6, lsr #1 ; abs (p1-q1) / 2 328233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r7, r9, r10 ; q0 - q1 329233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqadd8 r12, r12, r6 ; abs (p0-q0)*2 + abs (p1-q1)/2 330233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r6, r10, r9 ; q1 - q0 331233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r12, r12, r4 ; compare to flimit 332233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r9, r11, r10 ; q2 - q1 333233d2500723e5594f3e7c70896ffeeef32b9c950ywan 334233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr lr, lr, r12 335233d2500723e5594f3e7c70896ffeeef32b9c950ywan 336233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r12, [src], pstep ; q3 337233d2500723e5594f3e7c70896ffeeef32b9c950ywan 338233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r10, r10, r11 ; q1 - q2 339233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r6, r7, r6 ; abs (q1-q0) 340233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r10, r9, r10 ; abs (q2-q1) 341233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r7, r6, r2 ; compare to limit 342233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r10, r10, r2 ; compare to limit 343233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r6, r6, r3 ; compare to thresh -- save r6 for later 344233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr lr, lr, r7 345233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr lr, lr, r10 346233d2500723e5594f3e7c70896ffeeef32b9c950ywan 347233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r10, r12, r11 ; q3 - q2 348233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r9, r11, r12 ; q2 - q3 349233d2500723e5594f3e7c70896ffeeef32b9c950ywan 350233d2500723e5594f3e7c70896ffeeef32b9c950ywan mvn r11, #0 ; r11 == -1 351233d2500723e5594f3e7c70896ffeeef32b9c950ywan 352233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r10, r10, r9 ; abs (q3-q2) 353233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r10, r10, r2 ; compare to limit 354233d2500723e5594f3e7c70896ffeeef32b9c950ywan 355233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov r12, #0 356233d2500723e5594f3e7c70896ffeeef32b9c950ywan 357233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr lr, lr, r10 358233d2500723e5594f3e7c70896ffeeef32b9c950ywan 359233d2500723e5594f3e7c70896ffeeef32b9c950ywan usub8 lr, r12, lr ; use usub8 instead of ssub8 360233d2500723e5594f3e7c70896ffeeef32b9c950ywan sel lr, r11, r12 ; filter mask: lr 361233d2500723e5594f3e7c70896ffeeef32b9c950ywan 362233d2500723e5594f3e7c70896ffeeef32b9c950ywan cmp lr, #0 363233d2500723e5594f3e7c70896ffeeef32b9c950ywan beq mbhskip_filter ; skip filtering 364233d2500723e5594f3e7c70896ffeeef32b9c950ywan 365233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;vp8_hevmask() function 366233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;calculate high edge variance 367233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub src, src, pstep, lsl #2 ; move src pointer down by 6 lines 368233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub src, src, pstep, lsl #1 369233d2500723e5594f3e7c70896ffeeef32b9c950ywan 370233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r10, r6, r8 371233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r7, [src], pstep ; p1 372233d2500723e5594f3e7c70896ffeeef32b9c950ywan 373233d2500723e5594f3e7c70896ffeeef32b9c950ywan usub8 r10, r12, r10 374233d2500723e5594f3e7c70896ffeeef32b9c950ywan sel r6, r12, r11 ; hev mask: r6 375233d2500723e5594f3e7c70896ffeeef32b9c950ywan 376233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;vp8_mbfilter() function 377233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;p2, q2 are only needed at the end. Don't need to load them in now. 378233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r8, [src], pstep ; p0 379233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r12, c0x80808080 380233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r9, [src], pstep ; q0 381233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r10, [src] ; q1 382233d2500723e5594f3e7c70896ffeeef32b9c950ywan 383233d2500723e5594f3e7c70896ffeeef32b9c950ywan eor r7, r7, r12 ; ps1 384233d2500723e5594f3e7c70896ffeeef32b9c950ywan eor r8, r8, r12 ; ps0 385233d2500723e5594f3e7c70896ffeeef32b9c950ywan eor r9, r9, r12 ; qs0 386233d2500723e5594f3e7c70896ffeeef32b9c950ywan eor r10, r10, r12 ; qs1 387233d2500723e5594f3e7c70896ffeeef32b9c950ywan 388233d2500723e5594f3e7c70896ffeeef32b9c950ywan qsub8 r12, r9, r8 ; vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0)) 389233d2500723e5594f3e7c70896ffeeef32b9c950ywan str r7, [sp, #12] ; store ps1 temporarily 390233d2500723e5594f3e7c70896ffeeef32b9c950ywan qsub8 r7, r7, r10 ; vp8_signed_char_clamp(ps1-qs1) 391233d2500723e5594f3e7c70896ffeeef32b9c950ywan str r10, [sp, #8] ; store qs1 temporarily 392233d2500723e5594f3e7c70896ffeeef32b9c950ywan qadd8 r7, r7, r12 393233d2500723e5594f3e7c70896ffeeef32b9c950ywan str r9, [sp] ; store qs0 temporarily 394233d2500723e5594f3e7c70896ffeeef32b9c950ywan qadd8 r7, r7, r12 395233d2500723e5594f3e7c70896ffeeef32b9c950ywan str r8, [sp, #4] ; store ps0 temporarily 396233d2500723e5594f3e7c70896ffeeef32b9c950ywan qadd8 r7, r7, r12 ; vp8_filter: r7 397233d2500723e5594f3e7c70896ffeeef32b9c950ywan 398233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r10, c0x03030303 ; r10 = 3 --modified for vp8 399233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r9, c0x04040404 400233d2500723e5594f3e7c70896ffeeef32b9c950ywan 401233d2500723e5594f3e7c70896ffeeef32b9c950ywan and r7, r7, lr ; vp8_filter &= mask (lr is free) 402233d2500723e5594f3e7c70896ffeeef32b9c950ywan 403233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov r12, r7 ; Filter2: r12 404233d2500723e5594f3e7c70896ffeeef32b9c950ywan and r12, r12, r6 ; Filter2 &= hev 405233d2500723e5594f3e7c70896ffeeef32b9c950ywan 406233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;modify code for vp8 407233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;save bottom 3 bits so that we round one side +4 and the other +3 408233d2500723e5594f3e7c70896ffeeef32b9c950ywan qadd8 r8 , r12 , r9 ; Filter1 (r8) = vp8_signed_char_clamp(Filter2+4) 409233d2500723e5594f3e7c70896ffeeef32b9c950ywan qadd8 r12 , r12 , r10 ; Filter2 (r12) = vp8_signed_char_clamp(Filter2+3) 410233d2500723e5594f3e7c70896ffeeef32b9c950ywan 411233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov r10, #0 412233d2500723e5594f3e7c70896ffeeef32b9c950ywan shadd8 r8 , r8 , r10 ; Filter1 >>= 3 413233d2500723e5594f3e7c70896ffeeef32b9c950ywan shadd8 r12 , r12 , r10 ; Filter2 >>= 3 414233d2500723e5594f3e7c70896ffeeef32b9c950ywan shadd8 r8 , r8 , r10 415233d2500723e5594f3e7c70896ffeeef32b9c950ywan shadd8 r12 , r12 , r10 416233d2500723e5594f3e7c70896ffeeef32b9c950ywan shadd8 r8 , r8 , r10 ; r8: Filter1 417233d2500723e5594f3e7c70896ffeeef32b9c950ywan shadd8 r12 , r12 , r10 ; r12: Filter2 418233d2500723e5594f3e7c70896ffeeef32b9c950ywan 419233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r9, [sp] ; load qs0 420233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r11, [sp, #4] ; load ps0 421233d2500723e5594f3e7c70896ffeeef32b9c950ywan 422233d2500723e5594f3e7c70896ffeeef32b9c950ywan qsub8 r9 , r9, r8 ; qs0 = vp8_signed_char_clamp(qs0 - Filter1) 423233d2500723e5594f3e7c70896ffeeef32b9c950ywan qadd8 r11, r11, r12 ; ps0 = vp8_signed_char_clamp(ps0 + Filter2) 424233d2500723e5594f3e7c70896ffeeef32b9c950ywan 425233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;save bottom 3 bits so that we round one side +4 and the other +3 426233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;and r8, r12, r10 ; s = Filter2 & 7 (s: r8) 427233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;qadd8 r12 , r12 , r9 ; Filter2 = vp8_signed_char_clamp(Filter2+4) 428233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;mov r10, #0 429233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;shadd8 r12 , r12 , r10 ; Filter2 >>= 3 430233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;usub8 lr, r8, r9 ; s = (s==4)*-1 431233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;sel lr, r11, r10 432233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;shadd8 r12 , r12 , r10 433233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;usub8 r8, r9, r8 434233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;sel r8, r11, r10 435233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;ldr r9, [sp] ; load qs0 436233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;ldr r11, [sp, #4] ; load ps0 437233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;shadd8 r12 , r12 , r10 438233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;and r8, r8, lr ; -1 for each element that equals 4 439233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;qadd8 r10, r8, r12 ; u = vp8_signed_char_clamp(s + Filter2) 440233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;qsub8 r9 , r9, r12 ; qs0 = vp8_signed_char_clamp(qs0 - Filter2) 441233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;qadd8 r11, r11, r10 ; ps0 = vp8_signed_char_clamp(ps0 + u) 442233d2500723e5594f3e7c70896ffeeef32b9c950ywan 443233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;end of modification for vp8 444233d2500723e5594f3e7c70896ffeeef32b9c950ywan 445233d2500723e5594f3e7c70896ffeeef32b9c950ywan bic r12, r7, r6 ; vp8_filter &= ~hev ( r6 is free) 446233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;mov r12, r7 447233d2500723e5594f3e7c70896ffeeef32b9c950ywan 448233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;roughly 3/7th difference across boundary 449233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov lr, #0x1b ; 27 450233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov r7, #0x3f ; 63 451233d2500723e5594f3e7c70896ffeeef32b9c950ywan 452233d2500723e5594f3e7c70896ffeeef32b9c950ywan sxtb16 r6, r12 453233d2500723e5594f3e7c70896ffeeef32b9c950ywan sxtb16 r10, r12, ror #8 454233d2500723e5594f3e7c70896ffeeef32b9c950ywan smlabb r8, r6, lr, r7 455233d2500723e5594f3e7c70896ffeeef32b9c950ywan smlatb r6, r6, lr, r7 456233d2500723e5594f3e7c70896ffeeef32b9c950ywan smlabb r7, r10, lr, r7 457233d2500723e5594f3e7c70896ffeeef32b9c950ywan smultb r10, r10, lr 458233d2500723e5594f3e7c70896ffeeef32b9c950ywan ssat r8, #8, r8, asr #7 459233d2500723e5594f3e7c70896ffeeef32b9c950ywan ssat r6, #8, r6, asr #7 460233d2500723e5594f3e7c70896ffeeef32b9c950ywan add r10, r10, #63 461233d2500723e5594f3e7c70896ffeeef32b9c950ywan ssat r7, #8, r7, asr #7 462233d2500723e5594f3e7c70896ffeeef32b9c950ywan ssat r10, #8, r10, asr #7 463233d2500723e5594f3e7c70896ffeeef32b9c950ywan 464233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr lr, c0x80808080 465233d2500723e5594f3e7c70896ffeeef32b9c950ywan 466233d2500723e5594f3e7c70896ffeeef32b9c950ywan pkhbt r6, r8, r6, lsl #16 467233d2500723e5594f3e7c70896ffeeef32b9c950ywan pkhbt r10, r7, r10, lsl #16 468233d2500723e5594f3e7c70896ffeeef32b9c950ywan uxtb16 r6, r6 469233d2500723e5594f3e7c70896ffeeef32b9c950ywan uxtb16 r10, r10 470233d2500723e5594f3e7c70896ffeeef32b9c950ywan 471233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub src, src, pstep 472233d2500723e5594f3e7c70896ffeeef32b9c950ywan 473233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r10, r6, r10, lsl #8 ; u = vp8_signed_char_clamp((63 + Filter2 * 27)>>7) 474233d2500723e5594f3e7c70896ffeeef32b9c950ywan 475233d2500723e5594f3e7c70896ffeeef32b9c950ywan qsub8 r8, r9, r10 ; s = vp8_signed_char_clamp(qs0 - u) 476233d2500723e5594f3e7c70896ffeeef32b9c950ywan qadd8 r10, r11, r10 ; s = vp8_signed_char_clamp(ps0 + u) 477233d2500723e5594f3e7c70896ffeeef32b9c950ywan eor r8, r8, lr ; *oq0 = s^0x80 478233d2500723e5594f3e7c70896ffeeef32b9c950ywan str r8, [src] ; store *oq0 479233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub src, src, pstep 480233d2500723e5594f3e7c70896ffeeef32b9c950ywan eor r10, r10, lr ; *op0 = s^0x80 481233d2500723e5594f3e7c70896ffeeef32b9c950ywan str r10, [src] ; store *op0 482233d2500723e5594f3e7c70896ffeeef32b9c950ywan 483233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;roughly 2/7th difference across boundary 484233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov lr, #0x12 ; 18 485233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov r7, #0x3f ; 63 486233d2500723e5594f3e7c70896ffeeef32b9c950ywan 487233d2500723e5594f3e7c70896ffeeef32b9c950ywan sxtb16 r6, r12 488233d2500723e5594f3e7c70896ffeeef32b9c950ywan sxtb16 r10, r12, ror #8 489233d2500723e5594f3e7c70896ffeeef32b9c950ywan smlabb r8, r6, lr, r7 490233d2500723e5594f3e7c70896ffeeef32b9c950ywan smlatb r6, r6, lr, r7 491233d2500723e5594f3e7c70896ffeeef32b9c950ywan smlabb r9, r10, lr, r7 492233d2500723e5594f3e7c70896ffeeef32b9c950ywan smlatb r10, r10, lr, r7 493233d2500723e5594f3e7c70896ffeeef32b9c950ywan ssat r8, #8, r8, asr #7 494233d2500723e5594f3e7c70896ffeeef32b9c950ywan ssat r6, #8, r6, asr #7 495233d2500723e5594f3e7c70896ffeeef32b9c950ywan ssat r9, #8, r9, asr #7 496233d2500723e5594f3e7c70896ffeeef32b9c950ywan ssat r10, #8, r10, asr #7 497233d2500723e5594f3e7c70896ffeeef32b9c950ywan 498233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr lr, c0x80808080 499233d2500723e5594f3e7c70896ffeeef32b9c950ywan 500233d2500723e5594f3e7c70896ffeeef32b9c950ywan pkhbt r6, r8, r6, lsl #16 501233d2500723e5594f3e7c70896ffeeef32b9c950ywan pkhbt r10, r9, r10, lsl #16 502233d2500723e5594f3e7c70896ffeeef32b9c950ywan 503233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r9, [sp, #8] ; load qs1 504233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r11, [sp, #12] ; load ps1 505233d2500723e5594f3e7c70896ffeeef32b9c950ywan 506233d2500723e5594f3e7c70896ffeeef32b9c950ywan uxtb16 r6, r6 507233d2500723e5594f3e7c70896ffeeef32b9c950ywan uxtb16 r10, r10 508233d2500723e5594f3e7c70896ffeeef32b9c950ywan 509233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub src, src, pstep 510233d2500723e5594f3e7c70896ffeeef32b9c950ywan 511233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r10, r6, r10, lsl #8 ; u = vp8_signed_char_clamp((63 + Filter2 * 18)>>7) 512233d2500723e5594f3e7c70896ffeeef32b9c950ywan 513233d2500723e5594f3e7c70896ffeeef32b9c950ywan qadd8 r11, r11, r10 ; s = vp8_signed_char_clamp(ps1 + u) 514233d2500723e5594f3e7c70896ffeeef32b9c950ywan qsub8 r8, r9, r10 ; s = vp8_signed_char_clamp(qs1 - u) 515233d2500723e5594f3e7c70896ffeeef32b9c950ywan eor r11, r11, lr ; *op1 = s^0x80 516233d2500723e5594f3e7c70896ffeeef32b9c950ywan str r11, [src], pstep ; store *op1 517233d2500723e5594f3e7c70896ffeeef32b9c950ywan eor r8, r8, lr ; *oq1 = s^0x80 518233d2500723e5594f3e7c70896ffeeef32b9c950ywan add src, src, pstep, lsl #1 519233d2500723e5594f3e7c70896ffeeef32b9c950ywan 520233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov r7, #0x3f ; 63 521233d2500723e5594f3e7c70896ffeeef32b9c950ywan 522233d2500723e5594f3e7c70896ffeeef32b9c950ywan str r8, [src], pstep ; store *oq1 523233d2500723e5594f3e7c70896ffeeef32b9c950ywan 524233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;roughly 1/7th difference across boundary 525233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov lr, #0x9 ; 9 526233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r9, [src] ; load q2 527233d2500723e5594f3e7c70896ffeeef32b9c950ywan 528233d2500723e5594f3e7c70896ffeeef32b9c950ywan sxtb16 r6, r12 529233d2500723e5594f3e7c70896ffeeef32b9c950ywan sxtb16 r10, r12, ror #8 530233d2500723e5594f3e7c70896ffeeef32b9c950ywan smlabb r8, r6, lr, r7 531233d2500723e5594f3e7c70896ffeeef32b9c950ywan smlatb r6, r6, lr, r7 532233d2500723e5594f3e7c70896ffeeef32b9c950ywan smlabb r12, r10, lr, r7 533233d2500723e5594f3e7c70896ffeeef32b9c950ywan smlatb r10, r10, lr, r7 534233d2500723e5594f3e7c70896ffeeef32b9c950ywan ssat r8, #8, r8, asr #7 535233d2500723e5594f3e7c70896ffeeef32b9c950ywan ssat r6, #8, r6, asr #7 536233d2500723e5594f3e7c70896ffeeef32b9c950ywan ssat r12, #8, r12, asr #7 537233d2500723e5594f3e7c70896ffeeef32b9c950ywan ssat r10, #8, r10, asr #7 538233d2500723e5594f3e7c70896ffeeef32b9c950ywan 539233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub src, src, pstep, lsl #2 540233d2500723e5594f3e7c70896ffeeef32b9c950ywan 541233d2500723e5594f3e7c70896ffeeef32b9c950ywan pkhbt r6, r8, r6, lsl #16 542233d2500723e5594f3e7c70896ffeeef32b9c950ywan pkhbt r10, r12, r10, lsl #16 543233d2500723e5594f3e7c70896ffeeef32b9c950ywan 544233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub src, src, pstep 545233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr lr, c0x80808080 546233d2500723e5594f3e7c70896ffeeef32b9c950ywan 547233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r11, [src] ; load p2 548233d2500723e5594f3e7c70896ffeeef32b9c950ywan 549233d2500723e5594f3e7c70896ffeeef32b9c950ywan uxtb16 r6, r6 550233d2500723e5594f3e7c70896ffeeef32b9c950ywan uxtb16 r10, r10 551233d2500723e5594f3e7c70896ffeeef32b9c950ywan 552233d2500723e5594f3e7c70896ffeeef32b9c950ywan eor r9, r9, lr 553233d2500723e5594f3e7c70896ffeeef32b9c950ywan eor r11, r11, lr 554233d2500723e5594f3e7c70896ffeeef32b9c950ywan 555233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r10, r6, r10, lsl #8 ; u = vp8_signed_char_clamp((63 + Filter2 * 9)>>7) 556233d2500723e5594f3e7c70896ffeeef32b9c950ywan 557233d2500723e5594f3e7c70896ffeeef32b9c950ywan qadd8 r8, r11, r10 ; s = vp8_signed_char_clamp(ps2 + u) 558233d2500723e5594f3e7c70896ffeeef32b9c950ywan qsub8 r10, r9, r10 ; s = vp8_signed_char_clamp(qs2 - u) 559233d2500723e5594f3e7c70896ffeeef32b9c950ywan eor r8, r8, lr ; *op2 = s^0x80 560233d2500723e5594f3e7c70896ffeeef32b9c950ywan str r8, [src], pstep, lsl #2 ; store *op2 561233d2500723e5594f3e7c70896ffeeef32b9c950ywan add src, src, pstep 562233d2500723e5594f3e7c70896ffeeef32b9c950ywan eor r10, r10, lr ; *oq2 = s^0x80 563233d2500723e5594f3e7c70896ffeeef32b9c950ywan str r10, [src], pstep, lsl #1 ; store *oq2 564233d2500723e5594f3e7c70896ffeeef32b9c950ywan 565233d2500723e5594f3e7c70896ffeeef32b9c950ywan|mbhskip_filter| 566233d2500723e5594f3e7c70896ffeeef32b9c950ywan add src, src, #4 567233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub src, src, pstep, lsl #3 568233d2500723e5594f3e7c70896ffeeef32b9c950ywan subs count, count, #1 569233d2500723e5594f3e7c70896ffeeef32b9c950ywan 570233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrne r9, [src], pstep ; p3 571233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrne r10, [src], pstep ; p2 572233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrne r11, [src], pstep ; p1 573233d2500723e5594f3e7c70896ffeeef32b9c950ywan 574233d2500723e5594f3e7c70896ffeeef32b9c950ywan bne MBHnext8 575233d2500723e5594f3e7c70896ffeeef32b9c950ywan 576233d2500723e5594f3e7c70896ffeeef32b9c950ywan add sp, sp, #16 577233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldmia sp!, {r4 - r11, pc} 578233d2500723e5594f3e7c70896ffeeef32b9c950ywan ENDP ; |vp8_mbloop_filter_horizontal_edge_armv6| 579233d2500723e5594f3e7c70896ffeeef32b9c950ywan 580233d2500723e5594f3e7c70896ffeeef32b9c950ywan 581233d2500723e5594f3e7c70896ffeeef32b9c950ywan;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- 582233d2500723e5594f3e7c70896ffeeef32b9c950ywan|vp8_loop_filter_vertical_edge_armv6| PROC 583233d2500723e5594f3e7c70896ffeeef32b9c950ywan;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- 584233d2500723e5594f3e7c70896ffeeef32b9c950ywan stmdb sp!, {r4 - r11, lr} 585233d2500723e5594f3e7c70896ffeeef32b9c950ywan 586233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub src, src, #4 ; move src pointer down by 4 587233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr count, [sp, #40] ; count for 8-in-parallel 588233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r12, [sp, #36] ; load thresh address 589233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub sp, sp, #16 ; create temp buffer 590233d2500723e5594f3e7c70896ffeeef32b9c950ywan 591233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r6, [src], pstep ; load source data 592233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrb r4, [r2] ; blimit 593233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r7, [src], pstep 594233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrb r2, [r3] ; limit 595233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r8, [src], pstep 596233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r4, r4, r4, lsl #8 597233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrb r3, [r12] ; thresh 598233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r2, r2, r2, lsl #8 599233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr lr, [src], pstep 600233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov count, count, lsl #1 ; 4-in-parallel 601233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r4, r4, r4, lsl #16 602233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r3, r3, r3, lsl #8 603233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r2, r2, r2, lsl #16 604233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r3, r3, r3, lsl #16 605233d2500723e5594f3e7c70896ffeeef32b9c950ywan 606233d2500723e5594f3e7c70896ffeeef32b9c950ywan|Vnext8| 607233d2500723e5594f3e7c70896ffeeef32b9c950ywan 608233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; vp8_filter_mask() function 609233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; calculate breakout conditions 610233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; transpose the source data for 4-in-parallel operation 611233d2500723e5594f3e7c70896ffeeef32b9c950ywan TRANSPOSE_MATRIX r6, r7, r8, lr, r9, r10, r11, r12 612233d2500723e5594f3e7c70896ffeeef32b9c950ywan 613233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r7, r9, r10 ; p3 - p2 614233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r8, r10, r9 ; p2 - p3 615233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r9, r10, r11 ; p2 - p1 616233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r10, r11, r10 ; p1 - p2 617233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r7, r7, r8 ; abs (p3-p2) 618233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r10, r9, r10 ; abs (p2-p1) 619233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 lr, r7, r2 ; compare to limit. lr: vp8_filter_mask 620233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r10, r10, r2 ; compare to limit 621233d2500723e5594f3e7c70896ffeeef32b9c950ywan 622233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub src, src, pstep, lsl #2 ; move src pointer down by 4 lines 623233d2500723e5594f3e7c70896ffeeef32b9c950ywan 624233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr lr, lr, r10 625233d2500723e5594f3e7c70896ffeeef32b9c950ywan 626233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r6, r11, r12 ; p1 - p0 627233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r7, r12, r11 ; p0 - p1 628233d2500723e5594f3e7c70896ffeeef32b9c950ywan add src, src, #4 ; move src pointer up by 4 629233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r6, r6, r7 ; abs (p1-p0) 630233d2500723e5594f3e7c70896ffeeef32b9c950ywan str r11, [sp, #12] ; save p1 631233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r10, r6, r2 ; compare to limit 632233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r11, r6, r3 ; compare to thresh 633233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr lr, lr, r10 634233d2500723e5594f3e7c70896ffeeef32b9c950ywan 635233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; transpose uses 8 regs(r6 - r12 and lr). Need to save reg value now 636233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; transpose the source data for 4-in-parallel operation 637233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r6, [src], pstep ; load source data 638233d2500723e5594f3e7c70896ffeeef32b9c950ywan str r11, [sp] ; push r11 to stack 639233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r7, [src], pstep 640233d2500723e5594f3e7c70896ffeeef32b9c950ywan str r12, [sp, #4] ; save current reg before load q0 - q3 data 641233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r8, [src], pstep 642233d2500723e5594f3e7c70896ffeeef32b9c950ywan str lr, [sp, #8] 643233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr lr, [src], pstep 644233d2500723e5594f3e7c70896ffeeef32b9c950ywan 645233d2500723e5594f3e7c70896ffeeef32b9c950ywan TRANSPOSE_MATRIX r6, r7, r8, lr, r9, r10, r11, r12 646233d2500723e5594f3e7c70896ffeeef32b9c950ywan 647233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr lr, [sp, #8] ; load back (f)limit accumulator 648233d2500723e5594f3e7c70896ffeeef32b9c950ywan 649233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r6, r12, r11 ; q3 - q2 650233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r7, r11, r12 ; q2 - q3 651233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r12, r11, r10 ; q2 - q1 652233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r11, r10, r11 ; q1 - q2 653233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r6, r6, r7 ; abs (q3-q2) 654233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r7, r12, r11 ; abs (q2-q1) 655233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r6, r6, r2 ; compare to limit 656233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r7, r7, r2 ; compare to limit 657233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r11, [sp, #4] ; load back p0 658233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r12, [sp, #12] ; load back p1 659233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr lr, lr, r6 660233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr lr, lr, r7 661233d2500723e5594f3e7c70896ffeeef32b9c950ywan 662233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r6, r11, r9 ; p0 - q0 663233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r7, r9, r11 ; q0 - p0 664233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r8, r12, r10 ; p1 - q1 665233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r11, r10, r12 ; q1 - p1 666233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r6, r6, r7 ; abs (p0-q0) 667233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r7, c0x7F7F7F7F 668233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r8, r8, r11 ; abs (p1-q1) 669233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqadd8 r6, r6, r6 ; abs (p0-q0) * 2 670233d2500723e5594f3e7c70896ffeeef32b9c950ywan and r8, r7, r8, lsr #1 ; abs (p1-q1) / 2 671233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r11, r10, r9 ; q1 - q0 672233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqadd8 r6, r8, r6 ; abs (p0-q0)*2 + abs (p1-q1)/2 673233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r12, r9, r10 ; q0 - q1 674233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r6, r6, r4 ; compare to flimit 675233d2500723e5594f3e7c70896ffeeef32b9c950ywan 676233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r9, r11, r12 ; abs (q1-q0) 677233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r8, r9, r2 ; compare to limit 678233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r10, r9, r3 ; compare to thresh 679233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr lr, lr, r6 680233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr lr, lr, r8 681233d2500723e5594f3e7c70896ffeeef32b9c950ywan 682233d2500723e5594f3e7c70896ffeeef32b9c950ywan mvn r11, #0 ; r11 == -1 683233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov r12, #0 684233d2500723e5594f3e7c70896ffeeef32b9c950ywan 685233d2500723e5594f3e7c70896ffeeef32b9c950ywan usub8 lr, r12, lr 686233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r9, [sp] ; load the compared result 687233d2500723e5594f3e7c70896ffeeef32b9c950ywan sel lr, r11, r12 ; filter mask: lr 688233d2500723e5594f3e7c70896ffeeef32b9c950ywan 689233d2500723e5594f3e7c70896ffeeef32b9c950ywan cmp lr, #0 690233d2500723e5594f3e7c70896ffeeef32b9c950ywan beq vskip_filter ; skip filtering 691233d2500723e5594f3e7c70896ffeeef32b9c950ywan 692233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;vp8_hevmask() function 693233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;calculate high edge variance 694233d2500723e5594f3e7c70896ffeeef32b9c950ywan 695233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub src, src, pstep, lsl #2 ; move src pointer down by 4 lines 696233d2500723e5594f3e7c70896ffeeef32b9c950ywan 697233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r9, r9, r10 698233d2500723e5594f3e7c70896ffeeef32b9c950ywan 699233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrh r7, [src, #-2] 700233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrh r8, [src], pstep 701233d2500723e5594f3e7c70896ffeeef32b9c950ywan 702233d2500723e5594f3e7c70896ffeeef32b9c950ywan usub8 r9, r12, r9 703233d2500723e5594f3e7c70896ffeeef32b9c950ywan sel r6, r12, r11 ; hev mask: r6 704233d2500723e5594f3e7c70896ffeeef32b9c950ywan 705233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;vp8_filter() function 706233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; load soure data to r6, r11, r12, lr 707233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrh r9, [src, #-2] 708233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrh r10, [src], pstep 709233d2500723e5594f3e7c70896ffeeef32b9c950ywan 710233d2500723e5594f3e7c70896ffeeef32b9c950ywan pkhbt r12, r7, r8, lsl #16 711233d2500723e5594f3e7c70896ffeeef32b9c950ywan 712233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrh r7, [src, #-2] 713233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrh r8, [src], pstep 714233d2500723e5594f3e7c70896ffeeef32b9c950ywan 715233d2500723e5594f3e7c70896ffeeef32b9c950ywan pkhbt r11, r9, r10, lsl #16 716233d2500723e5594f3e7c70896ffeeef32b9c950ywan 717233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrh r9, [src, #-2] 718233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrh r10, [src], pstep 719233d2500723e5594f3e7c70896ffeeef32b9c950ywan 720233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; Transpose needs 8 regs(r6 - r12, and lr). Save r6 and lr first 721233d2500723e5594f3e7c70896ffeeef32b9c950ywan str r6, [sp] 722233d2500723e5594f3e7c70896ffeeef32b9c950ywan str lr, [sp, #4] 723233d2500723e5594f3e7c70896ffeeef32b9c950ywan 724233d2500723e5594f3e7c70896ffeeef32b9c950ywan pkhbt r6, r7, r8, lsl #16 725233d2500723e5594f3e7c70896ffeeef32b9c950ywan pkhbt lr, r9, r10, lsl #16 726233d2500723e5594f3e7c70896ffeeef32b9c950ywan 727233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;transpose r12, r11, r6, lr to r7, r8, r9, r10 728233d2500723e5594f3e7c70896ffeeef32b9c950ywan TRANSPOSE_MATRIX r12, r11, r6, lr, r7, r8, r9, r10 729233d2500723e5594f3e7c70896ffeeef32b9c950ywan 730233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;load back hev_mask r6 and filter_mask lr 731233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r12, c0x80808080 732233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r6, [sp] 733233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr lr, [sp, #4] 734233d2500723e5594f3e7c70896ffeeef32b9c950ywan 735233d2500723e5594f3e7c70896ffeeef32b9c950ywan eor r7, r7, r12 ; p1 offset to convert to a signed value 736233d2500723e5594f3e7c70896ffeeef32b9c950ywan eor r8, r8, r12 ; p0 offset to convert to a signed value 737233d2500723e5594f3e7c70896ffeeef32b9c950ywan eor r9, r9, r12 ; q0 offset to convert to a signed value 738233d2500723e5594f3e7c70896ffeeef32b9c950ywan eor r10, r10, r12 ; q1 offset to convert to a signed value 739233d2500723e5594f3e7c70896ffeeef32b9c950ywan 740233d2500723e5594f3e7c70896ffeeef32b9c950ywan str r9, [sp] ; store qs0 temporarily 741233d2500723e5594f3e7c70896ffeeef32b9c950ywan str r8, [sp, #4] ; store ps0 temporarily 742233d2500723e5594f3e7c70896ffeeef32b9c950ywan str r10, [sp, #8] ; store qs1 temporarily 743233d2500723e5594f3e7c70896ffeeef32b9c950ywan str r7, [sp, #12] ; store ps1 temporarily 744233d2500723e5594f3e7c70896ffeeef32b9c950ywan 745233d2500723e5594f3e7c70896ffeeef32b9c950ywan qsub8 r7, r7, r10 ; vp8_signed_char_clamp(ps1-qs1) 746233d2500723e5594f3e7c70896ffeeef32b9c950ywan qsub8 r8, r9, r8 ; vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0)) 747233d2500723e5594f3e7c70896ffeeef32b9c950ywan 748233d2500723e5594f3e7c70896ffeeef32b9c950ywan and r7, r7, r6 ; vp8_filter (r7) &= hev (r7 : filter) 749233d2500723e5594f3e7c70896ffeeef32b9c950ywan 750233d2500723e5594f3e7c70896ffeeef32b9c950ywan qadd8 r7, r7, r8 751233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r9, c0x03030303 ; r9 = 3 --modified for vp8 752233d2500723e5594f3e7c70896ffeeef32b9c950ywan 753233d2500723e5594f3e7c70896ffeeef32b9c950ywan qadd8 r7, r7, r8 754233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r10, c0x04040404 755233d2500723e5594f3e7c70896ffeeef32b9c950ywan 756233d2500723e5594f3e7c70896ffeeef32b9c950ywan qadd8 r7, r7, r8 757233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;mvn r11, #0 ; r11 == -1 758233d2500723e5594f3e7c70896ffeeef32b9c950ywan 759233d2500723e5594f3e7c70896ffeeef32b9c950ywan and r7, r7, lr ; vp8_filter &= mask 760233d2500723e5594f3e7c70896ffeeef32b9c950ywan 761233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;modify code for vp8 -- Filter1 = vp8_filter (r7) 762233d2500723e5594f3e7c70896ffeeef32b9c950ywan qadd8 r8 , r7 , r9 ; Filter2 (r8) = vp8_signed_char_clamp(vp8_filter+3) 763233d2500723e5594f3e7c70896ffeeef32b9c950ywan qadd8 r7 , r7 , r10 ; vp8_filter = vp8_signed_char_clamp(vp8_filter+4) 764233d2500723e5594f3e7c70896ffeeef32b9c950ywan 765233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov r9, #0 766233d2500723e5594f3e7c70896ffeeef32b9c950ywan shadd8 r8 , r8 , r9 ; Filter2 >>= 3 767233d2500723e5594f3e7c70896ffeeef32b9c950ywan shadd8 r7 , r7 , r9 ; vp8_filter >>= 3 768233d2500723e5594f3e7c70896ffeeef32b9c950ywan shadd8 r8 , r8 , r9 769233d2500723e5594f3e7c70896ffeeef32b9c950ywan shadd8 r7 , r7 , r9 770233d2500723e5594f3e7c70896ffeeef32b9c950ywan shadd8 lr , r8 , r9 ; lr: filter2 771233d2500723e5594f3e7c70896ffeeef32b9c950ywan shadd8 r7 , r7 , r9 ; r7: filter 772233d2500723e5594f3e7c70896ffeeef32b9c950ywan 773233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;usub8 lr, r8, r10 ; s = (s==4)*-1 774233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;sel lr, r11, r9 775233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;usub8 r8, r10, r8 776233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;sel r8, r11, r9 777233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;and r8, r8, lr ; -1 for each element that equals 4 -- r8: s 778233d2500723e5594f3e7c70896ffeeef32b9c950ywan 779233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;calculate output 780233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;qadd8 lr, r8, r7 ; u = vp8_signed_char_clamp(s + vp8_filter) 781233d2500723e5594f3e7c70896ffeeef32b9c950ywan 782233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r8, [sp] ; load qs0 783233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r9, [sp, #4] ; load ps0 784233d2500723e5594f3e7c70896ffeeef32b9c950ywan 785233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r10, c0x01010101 786233d2500723e5594f3e7c70896ffeeef32b9c950ywan 787233d2500723e5594f3e7c70896ffeeef32b9c950ywan qsub8 r8, r8, r7 ; u = vp8_signed_char_clamp(qs0 - vp8_filter) 788233d2500723e5594f3e7c70896ffeeef32b9c950ywan qadd8 r9, r9, lr ; u = vp8_signed_char_clamp(ps0 + Filter2) 789233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;end of modification for vp8 790233d2500723e5594f3e7c70896ffeeef32b9c950ywan 791233d2500723e5594f3e7c70896ffeeef32b9c950ywan eor r8, r8, r12 792233d2500723e5594f3e7c70896ffeeef32b9c950ywan eor r9, r9, r12 793233d2500723e5594f3e7c70896ffeeef32b9c950ywan 794233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov lr, #0 795233d2500723e5594f3e7c70896ffeeef32b9c950ywan 796233d2500723e5594f3e7c70896ffeeef32b9c950ywan sadd8 r7, r7, r10 797233d2500723e5594f3e7c70896ffeeef32b9c950ywan shadd8 r7, r7, lr 798233d2500723e5594f3e7c70896ffeeef32b9c950ywan 799233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r10, [sp, #8] ; load qs1 800233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r11, [sp, #12] ; load ps1 801233d2500723e5594f3e7c70896ffeeef32b9c950ywan 802233d2500723e5594f3e7c70896ffeeef32b9c950ywan bic r7, r7, r6 ; r7: vp8_filter 803233d2500723e5594f3e7c70896ffeeef32b9c950ywan 804233d2500723e5594f3e7c70896ffeeef32b9c950ywan qsub8 r10 , r10, r7 ; u = vp8_signed_char_clamp(qs1 - vp8_filter) 805233d2500723e5594f3e7c70896ffeeef32b9c950ywan qadd8 r11, r11, r7 ; u = vp8_signed_char_clamp(ps1 + vp8_filter) 806233d2500723e5594f3e7c70896ffeeef32b9c950ywan eor r10, r10, r12 807233d2500723e5594f3e7c70896ffeeef32b9c950ywan eor r11, r11, r12 808233d2500723e5594f3e7c70896ffeeef32b9c950ywan 809233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub src, src, pstep, lsl #2 810233d2500723e5594f3e7c70896ffeeef32b9c950ywan 811233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;we can use TRANSPOSE_MATRIX macro to transpose output - input: q1, q0, p0, p1 812233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;output is b0, b1, b2, b3 813233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;b0: 03 02 01 00 814233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;b1: 13 12 11 10 815233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;b2: 23 22 21 20 816233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;b3: 33 32 31 30 817233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; p1 p0 q0 q1 818233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; (a3 a2 a1 a0) 819233d2500723e5594f3e7c70896ffeeef32b9c950ywan TRANSPOSE_MATRIX r11, r9, r8, r10, r6, r7, r12, lr 820233d2500723e5594f3e7c70896ffeeef32b9c950ywan 821233d2500723e5594f3e7c70896ffeeef32b9c950ywan strh r6, [src, #-2] ; store the result 822233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov r6, r6, lsr #16 823233d2500723e5594f3e7c70896ffeeef32b9c950ywan strh r6, [src], pstep 824233d2500723e5594f3e7c70896ffeeef32b9c950ywan 825233d2500723e5594f3e7c70896ffeeef32b9c950ywan strh r7, [src, #-2] 826233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov r7, r7, lsr #16 827233d2500723e5594f3e7c70896ffeeef32b9c950ywan strh r7, [src], pstep 828233d2500723e5594f3e7c70896ffeeef32b9c950ywan 829233d2500723e5594f3e7c70896ffeeef32b9c950ywan strh r12, [src, #-2] 830233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov r12, r12, lsr #16 831233d2500723e5594f3e7c70896ffeeef32b9c950ywan strh r12, [src], pstep 832233d2500723e5594f3e7c70896ffeeef32b9c950ywan 833233d2500723e5594f3e7c70896ffeeef32b9c950ywan strh lr, [src, #-2] 834233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov lr, lr, lsr #16 835233d2500723e5594f3e7c70896ffeeef32b9c950ywan strh lr, [src], pstep 836233d2500723e5594f3e7c70896ffeeef32b9c950ywan 837233d2500723e5594f3e7c70896ffeeef32b9c950ywan|vskip_filter| 838233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub src, src, #4 839233d2500723e5594f3e7c70896ffeeef32b9c950ywan subs count, count, #1 840233d2500723e5594f3e7c70896ffeeef32b9c950ywan 841233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrne r6, [src], pstep ; load source data 842233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrne r7, [src], pstep 843233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrne r8, [src], pstep 844233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrne lr, [src], pstep 845233d2500723e5594f3e7c70896ffeeef32b9c950ywan 846233d2500723e5594f3e7c70896ffeeef32b9c950ywan bne Vnext8 847233d2500723e5594f3e7c70896ffeeef32b9c950ywan 848233d2500723e5594f3e7c70896ffeeef32b9c950ywan add sp, sp, #16 849233d2500723e5594f3e7c70896ffeeef32b9c950ywan 850233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldmia sp!, {r4 - r11, pc} 851233d2500723e5594f3e7c70896ffeeef32b9c950ywan ENDP ; |vp8_loop_filter_vertical_edge_armv6| 852233d2500723e5594f3e7c70896ffeeef32b9c950ywan 853233d2500723e5594f3e7c70896ffeeef32b9c950ywan 854233d2500723e5594f3e7c70896ffeeef32b9c950ywan 855233d2500723e5594f3e7c70896ffeeef32b9c950ywan;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- 856233d2500723e5594f3e7c70896ffeeef32b9c950ywan|vp8_mbloop_filter_vertical_edge_armv6| PROC 857233d2500723e5594f3e7c70896ffeeef32b9c950ywan;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- 858233d2500723e5594f3e7c70896ffeeef32b9c950ywan stmdb sp!, {r4 - r11, lr} 859233d2500723e5594f3e7c70896ffeeef32b9c950ywan 860233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub src, src, #4 ; move src pointer down by 4 861233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr count, [sp, #40] ; count for 8-in-parallel 862233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r12, [sp, #36] ; load thresh address 863233d2500723e5594f3e7c70896ffeeef32b9c950ywan pld [src, #23] ; preload for next block 864233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub sp, sp, #16 ; create temp buffer 865233d2500723e5594f3e7c70896ffeeef32b9c950ywan 866233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r6, [src], pstep ; load source data 867233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrb r4, [r2] ; blimit 868233d2500723e5594f3e7c70896ffeeef32b9c950ywan pld [src, #23] 869233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r7, [src], pstep 870233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrb r2, [r3] ; limit 871233d2500723e5594f3e7c70896ffeeef32b9c950ywan pld [src, #23] 872233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r8, [src], pstep 873233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r4, r4, r4, lsl #8 874233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrb r3, [r12] ; thresh 875233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r2, r2, r2, lsl #8 876233d2500723e5594f3e7c70896ffeeef32b9c950ywan pld [src, #23] 877233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr lr, [src], pstep 878233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov count, count, lsl #1 ; 4-in-parallel 879233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r4, r4, r4, lsl #16 880233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r3, r3, r3, lsl #8 881233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r2, r2, r2, lsl #16 882233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r3, r3, r3, lsl #16 883233d2500723e5594f3e7c70896ffeeef32b9c950ywan 884233d2500723e5594f3e7c70896ffeeef32b9c950ywan|MBVnext8| 885233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; vp8_filter_mask() function 886233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; calculate breakout conditions 887233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; transpose the source data for 4-in-parallel operation 888233d2500723e5594f3e7c70896ffeeef32b9c950ywan TRANSPOSE_MATRIX r6, r7, r8, lr, r9, r10, r11, r12 889233d2500723e5594f3e7c70896ffeeef32b9c950ywan 890233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r7, r9, r10 ; p3 - p2 891233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r8, r10, r9 ; p2 - p3 892233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r9, r10, r11 ; p2 - p1 893233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r10, r11, r10 ; p1 - p2 894233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r7, r7, r8 ; abs (p3-p2) 895233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r10, r9, r10 ; abs (p2-p1) 896233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 lr, r7, r2 ; compare to limit. lr: vp8_filter_mask 897233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r10, r10, r2 ; compare to limit 898233d2500723e5594f3e7c70896ffeeef32b9c950ywan 899233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub src, src, pstep, lsl #2 ; move src pointer down by 4 lines 900233d2500723e5594f3e7c70896ffeeef32b9c950ywan 901233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr lr, lr, r10 902233d2500723e5594f3e7c70896ffeeef32b9c950ywan 903233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r6, r11, r12 ; p1 - p0 904233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r7, r12, r11 ; p0 - p1 905233d2500723e5594f3e7c70896ffeeef32b9c950ywan add src, src, #4 ; move src pointer up by 4 906233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r6, r6, r7 ; abs (p1-p0) 907233d2500723e5594f3e7c70896ffeeef32b9c950ywan str r11, [sp, #12] ; save p1 908233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r10, r6, r2 ; compare to limit 909233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r11, r6, r3 ; compare to thresh 910233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr lr, lr, r10 911233d2500723e5594f3e7c70896ffeeef32b9c950ywan 912233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; transpose uses 8 regs(r6 - r12 and lr). Need to save reg value now 913233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; transpose the source data for 4-in-parallel operation 914233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r6, [src], pstep ; load source data 915233d2500723e5594f3e7c70896ffeeef32b9c950ywan str r11, [sp] ; push r11 to stack 916233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r7, [src], pstep 917233d2500723e5594f3e7c70896ffeeef32b9c950ywan str r12, [sp, #4] ; save current reg before load q0 - q3 data 918233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r8, [src], pstep 919233d2500723e5594f3e7c70896ffeeef32b9c950ywan str lr, [sp, #8] 920233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr lr, [src], pstep 921233d2500723e5594f3e7c70896ffeeef32b9c950ywan 922233d2500723e5594f3e7c70896ffeeef32b9c950ywan 923233d2500723e5594f3e7c70896ffeeef32b9c950ywan TRANSPOSE_MATRIX r6, r7, r8, lr, r9, r10, r11, r12 924233d2500723e5594f3e7c70896ffeeef32b9c950ywan 925233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr lr, [sp, #8] ; load back (f)limit accumulator 926233d2500723e5594f3e7c70896ffeeef32b9c950ywan 927233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r6, r12, r11 ; q3 - q2 928233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r7, r11, r12 ; q2 - q3 929233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r12, r11, r10 ; q2 - q1 930233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r11, r10, r11 ; q1 - q2 931233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r6, r6, r7 ; abs (q3-q2) 932233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r7, r12, r11 ; abs (q2-q1) 933233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r6, r6, r2 ; compare to limit 934233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r7, r7, r2 ; compare to limit 935233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r11, [sp, #4] ; load back p0 936233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r12, [sp, #12] ; load back p1 937233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr lr, lr, r6 938233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr lr, lr, r7 939233d2500723e5594f3e7c70896ffeeef32b9c950ywan 940233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r6, r11, r9 ; p0 - q0 941233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r7, r9, r11 ; q0 - p0 942233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r8, r12, r10 ; p1 - q1 943233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r11, r10, r12 ; q1 - p1 944233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r6, r6, r7 ; abs (p0-q0) 945233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r7, c0x7F7F7F7F 946233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r8, r8, r11 ; abs (p1-q1) 947233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqadd8 r6, r6, r6 ; abs (p0-q0) * 2 948233d2500723e5594f3e7c70896ffeeef32b9c950ywan and r8, r7, r8, lsr #1 ; abs (p1-q1) / 2 949233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r11, r10, r9 ; q1 - q0 950233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqadd8 r6, r8, r6 ; abs (p0-q0)*2 + abs (p1-q1)/2 951233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r12, r9, r10 ; q0 - q1 952233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r6, r6, r4 ; compare to flimit 953233d2500723e5594f3e7c70896ffeeef32b9c950ywan 954233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r9, r11, r12 ; abs (q1-q0) 955233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r8, r9, r2 ; compare to limit 956233d2500723e5594f3e7c70896ffeeef32b9c950ywan uqsub8 r10, r9, r3 ; compare to thresh 957233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr lr, lr, r6 958233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr lr, lr, r8 959233d2500723e5594f3e7c70896ffeeef32b9c950ywan 960233d2500723e5594f3e7c70896ffeeef32b9c950ywan mvn r11, #0 ; r11 == -1 961233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov r12, #0 962233d2500723e5594f3e7c70896ffeeef32b9c950ywan 963233d2500723e5594f3e7c70896ffeeef32b9c950ywan usub8 lr, r12, lr 964233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r9, [sp] ; load the compared result 965233d2500723e5594f3e7c70896ffeeef32b9c950ywan sel lr, r11, r12 ; filter mask: lr 966233d2500723e5594f3e7c70896ffeeef32b9c950ywan 967233d2500723e5594f3e7c70896ffeeef32b9c950ywan cmp lr, #0 968233d2500723e5594f3e7c70896ffeeef32b9c950ywan beq mbvskip_filter ; skip filtering 969233d2500723e5594f3e7c70896ffeeef32b9c950ywan 970233d2500723e5594f3e7c70896ffeeef32b9c950ywan 971233d2500723e5594f3e7c70896ffeeef32b9c950ywan 972233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;vp8_hevmask() function 973233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;calculate high edge variance 974233d2500723e5594f3e7c70896ffeeef32b9c950ywan 975233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub src, src, pstep, lsl #2 ; move src pointer down by 4 lines 976233d2500723e5594f3e7c70896ffeeef32b9c950ywan 977233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r9, r9, r10 978233d2500723e5594f3e7c70896ffeeef32b9c950ywan 979233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrh r7, [src, #-2] 980233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrh r8, [src], pstep 981233d2500723e5594f3e7c70896ffeeef32b9c950ywan 982233d2500723e5594f3e7c70896ffeeef32b9c950ywan usub8 r9, r12, r9 983233d2500723e5594f3e7c70896ffeeef32b9c950ywan sel r6, r12, r11 ; hev mask: r6 984233d2500723e5594f3e7c70896ffeeef32b9c950ywan 985233d2500723e5594f3e7c70896ffeeef32b9c950ywan 986233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; vp8_mbfilter() function 987233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; p2, q2 are only needed at the end. Don't need to load them in now. 988233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; Transpose needs 8 regs(r6 - r12, and lr). Save r6 and lr first 989233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; load soure data to r6, r11, r12, lr 990233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrh r9, [src, #-2] 991233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrh r10, [src], pstep 992233d2500723e5594f3e7c70896ffeeef32b9c950ywan 993233d2500723e5594f3e7c70896ffeeef32b9c950ywan pkhbt r12, r7, r8, lsl #16 994233d2500723e5594f3e7c70896ffeeef32b9c950ywan 995233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrh r7, [src, #-2] 996233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrh r8, [src], pstep 997233d2500723e5594f3e7c70896ffeeef32b9c950ywan 998233d2500723e5594f3e7c70896ffeeef32b9c950ywan pkhbt r11, r9, r10, lsl #16 999233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1000233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrh r9, [src, #-2] 1001233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrh r10, [src], pstep 1002233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1003233d2500723e5594f3e7c70896ffeeef32b9c950ywan str r6, [sp] ; save r6 1004233d2500723e5594f3e7c70896ffeeef32b9c950ywan str lr, [sp, #4] ; save lr 1005233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1006233d2500723e5594f3e7c70896ffeeef32b9c950ywan pkhbt r6, r7, r8, lsl #16 1007233d2500723e5594f3e7c70896ffeeef32b9c950ywan pkhbt lr, r9, r10, lsl #16 1008233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1009233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;transpose r12, r11, r6, lr to p1, p0, q0, q1 1010233d2500723e5594f3e7c70896ffeeef32b9c950ywan TRANSPOSE_MATRIX r12, r11, r6, lr, r7, r8, r9, r10 1011233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1012233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;load back hev_mask r6 and filter_mask lr 1013233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r12, c0x80808080 1014233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r6, [sp] 1015233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr lr, [sp, #4] 1016233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1017233d2500723e5594f3e7c70896ffeeef32b9c950ywan eor r7, r7, r12 ; ps1 1018233d2500723e5594f3e7c70896ffeeef32b9c950ywan eor r8, r8, r12 ; ps0 1019233d2500723e5594f3e7c70896ffeeef32b9c950ywan eor r9, r9, r12 ; qs0 1020233d2500723e5594f3e7c70896ffeeef32b9c950ywan eor r10, r10, r12 ; qs1 1021233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1022233d2500723e5594f3e7c70896ffeeef32b9c950ywan qsub8 r12, r9, r8 ; vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0)) 1023233d2500723e5594f3e7c70896ffeeef32b9c950ywan str r7, [sp, #12] ; store ps1 temporarily 1024233d2500723e5594f3e7c70896ffeeef32b9c950ywan qsub8 r7, r7, r10 ; vp8_signed_char_clamp(ps1-qs1) 1025233d2500723e5594f3e7c70896ffeeef32b9c950ywan str r10, [sp, #8] ; store qs1 temporarily 1026233d2500723e5594f3e7c70896ffeeef32b9c950ywan qadd8 r7, r7, r12 1027233d2500723e5594f3e7c70896ffeeef32b9c950ywan str r9, [sp] ; store qs0 temporarily 1028233d2500723e5594f3e7c70896ffeeef32b9c950ywan qadd8 r7, r7, r12 1029233d2500723e5594f3e7c70896ffeeef32b9c950ywan str r8, [sp, #4] ; store ps0 temporarily 1030233d2500723e5594f3e7c70896ffeeef32b9c950ywan qadd8 r7, r7, r12 ; vp8_filter: r7 1031233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1032233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r10, c0x03030303 ; r10 = 3 --modified for vp8 1033233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r9, c0x04040404 1034233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;mvn r11, #0 ; r11 == -1 1035233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1036233d2500723e5594f3e7c70896ffeeef32b9c950ywan and r7, r7, lr ; vp8_filter &= mask (lr is free) 1037233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1038233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov r12, r7 ; Filter2: r12 1039233d2500723e5594f3e7c70896ffeeef32b9c950ywan and r12, r12, r6 ; Filter2 &= hev 1040233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1041233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;modify code for vp8 1042233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;save bottom 3 bits so that we round one side +4 and the other +3 1043233d2500723e5594f3e7c70896ffeeef32b9c950ywan qadd8 r8 , r12 , r9 ; Filter1 (r8) = vp8_signed_char_clamp(Filter2+4) 1044233d2500723e5594f3e7c70896ffeeef32b9c950ywan qadd8 r12 , r12 , r10 ; Filter2 (r12) = vp8_signed_char_clamp(Filter2+3) 1045233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1046233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov r10, #0 1047233d2500723e5594f3e7c70896ffeeef32b9c950ywan shadd8 r8 , r8 , r10 ; Filter1 >>= 3 1048233d2500723e5594f3e7c70896ffeeef32b9c950ywan shadd8 r12 , r12 , r10 ; Filter2 >>= 3 1049233d2500723e5594f3e7c70896ffeeef32b9c950ywan shadd8 r8 , r8 , r10 1050233d2500723e5594f3e7c70896ffeeef32b9c950ywan shadd8 r12 , r12 , r10 1051233d2500723e5594f3e7c70896ffeeef32b9c950ywan shadd8 r8 , r8 , r10 ; r8: Filter1 1052233d2500723e5594f3e7c70896ffeeef32b9c950ywan shadd8 r12 , r12 , r10 ; r12: Filter2 1053233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1054233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r9, [sp] ; load qs0 1055233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r11, [sp, #4] ; load ps0 1056233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1057233d2500723e5594f3e7c70896ffeeef32b9c950ywan qsub8 r9 , r9, r8 ; qs0 = vp8_signed_char_clamp(qs0 - Filter1) 1058233d2500723e5594f3e7c70896ffeeef32b9c950ywan qadd8 r11, r11, r12 ; ps0 = vp8_signed_char_clamp(ps0 + Filter2) 1059233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1060233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;save bottom 3 bits so that we round one side +4 and the other +3 1061233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;and r8, r12, r10 ; s = Filter2 & 7 (s: r8) 1062233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;qadd8 r12 , r12 , r9 ; Filter2 = vp8_signed_char_clamp(Filter2+4) 1063233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;mov r10, #0 1064233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;shadd8 r12 , r12 , r10 ; Filter2 >>= 3 1065233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;usub8 lr, r8, r9 ; s = (s==4)*-1 1066233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;sel lr, r11, r10 1067233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;shadd8 r12 , r12 , r10 1068233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;usub8 r8, r9, r8 1069233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;sel r8, r11, r10 1070233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;ldr r9, [sp] ; load qs0 1071233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;ldr r11, [sp, #4] ; load ps0 1072233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;shadd8 r12 , r12 , r10 1073233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;and r8, r8, lr ; -1 for each element that equals 4 1074233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;qadd8 r10, r8, r12 ; u = vp8_signed_char_clamp(s + Filter2) 1075233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;qsub8 r9 , r9, r12 ; qs0 = vp8_signed_char_clamp(qs0 - Filter2) 1076233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;qadd8 r11, r11, r10 ; ps0 = vp8_signed_char_clamp(ps0 + u) 1077233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1078233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;end of modification for vp8 1079233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1080233d2500723e5594f3e7c70896ffeeef32b9c950ywan bic r12, r7, r6 ;vp8_filter &= ~hev ( r6 is free) 1081233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;mov r12, r7 1082233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1083233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;roughly 3/7th difference across boundary 1084233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov lr, #0x1b ; 27 1085233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov r7, #0x3f ; 63 1086233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1087233d2500723e5594f3e7c70896ffeeef32b9c950ywan sxtb16 r6, r12 1088233d2500723e5594f3e7c70896ffeeef32b9c950ywan sxtb16 r10, r12, ror #8 1089233d2500723e5594f3e7c70896ffeeef32b9c950ywan smlabb r8, r6, lr, r7 1090233d2500723e5594f3e7c70896ffeeef32b9c950ywan smlatb r6, r6, lr, r7 1091233d2500723e5594f3e7c70896ffeeef32b9c950ywan smlabb r7, r10, lr, r7 1092233d2500723e5594f3e7c70896ffeeef32b9c950ywan smultb r10, r10, lr 1093233d2500723e5594f3e7c70896ffeeef32b9c950ywan ssat r8, #8, r8, asr #7 1094233d2500723e5594f3e7c70896ffeeef32b9c950ywan ssat r6, #8, r6, asr #7 1095233d2500723e5594f3e7c70896ffeeef32b9c950ywan add r10, r10, #63 1096233d2500723e5594f3e7c70896ffeeef32b9c950ywan ssat r7, #8, r7, asr #7 1097233d2500723e5594f3e7c70896ffeeef32b9c950ywan ssat r10, #8, r10, asr #7 1098233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1099233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr lr, c0x80808080 1100233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1101233d2500723e5594f3e7c70896ffeeef32b9c950ywan pkhbt r6, r8, r6, lsl #16 1102233d2500723e5594f3e7c70896ffeeef32b9c950ywan pkhbt r10, r7, r10, lsl #16 1103233d2500723e5594f3e7c70896ffeeef32b9c950ywan uxtb16 r6, r6 1104233d2500723e5594f3e7c70896ffeeef32b9c950ywan uxtb16 r10, r10 1105233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1106233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub src, src, pstep, lsl #2 ; move src pointer down by 4 lines 1107233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1108233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r10, r6, r10, lsl #8 ; u = vp8_signed_char_clamp((63 + Filter2 * 27)>>7) 1109233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1110233d2500723e5594f3e7c70896ffeeef32b9c950ywan qsub8 r8, r9, r10 ; s = vp8_signed_char_clamp(qs0 - u) 1111233d2500723e5594f3e7c70896ffeeef32b9c950ywan qadd8 r10, r11, r10 ; s = vp8_signed_char_clamp(ps0 + u) 1112233d2500723e5594f3e7c70896ffeeef32b9c950ywan eor r8, r8, lr ; *oq0 = s^0x80 1113233d2500723e5594f3e7c70896ffeeef32b9c950ywan eor r10, r10, lr ; *op0 = s^0x80 1114233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1115233d2500723e5594f3e7c70896ffeeef32b9c950ywan strb r10, [src, #-1] ; store op0 result 1116233d2500723e5594f3e7c70896ffeeef32b9c950ywan strb r8, [src], pstep ; store oq0 result 1117233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov r10, r10, lsr #8 1118233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov r8, r8, lsr #8 1119233d2500723e5594f3e7c70896ffeeef32b9c950ywan strb r10, [src, #-1] 1120233d2500723e5594f3e7c70896ffeeef32b9c950ywan strb r8, [src], pstep 1121233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov r10, r10, lsr #8 1122233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov r8, r8, lsr #8 1123233d2500723e5594f3e7c70896ffeeef32b9c950ywan strb r10, [src, #-1] 1124233d2500723e5594f3e7c70896ffeeef32b9c950ywan strb r8, [src], pstep 1125233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov r10, r10, lsr #8 1126233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov r8, r8, lsr #8 1127233d2500723e5594f3e7c70896ffeeef32b9c950ywan strb r10, [src, #-1] 1128233d2500723e5594f3e7c70896ffeeef32b9c950ywan strb r8, [src], pstep 1129233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1130233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;roughly 2/7th difference across boundary 1131233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov lr, #0x12 ; 18 1132233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov r7, #0x3f ; 63 1133233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1134233d2500723e5594f3e7c70896ffeeef32b9c950ywan sxtb16 r6, r12 1135233d2500723e5594f3e7c70896ffeeef32b9c950ywan sxtb16 r10, r12, ror #8 1136233d2500723e5594f3e7c70896ffeeef32b9c950ywan smlabb r8, r6, lr, r7 1137233d2500723e5594f3e7c70896ffeeef32b9c950ywan smlatb r6, r6, lr, r7 1138233d2500723e5594f3e7c70896ffeeef32b9c950ywan smlabb r9, r10, lr, r7 1139233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1140233d2500723e5594f3e7c70896ffeeef32b9c950ywan smlatb r10, r10, lr, r7 1141233d2500723e5594f3e7c70896ffeeef32b9c950ywan ssat r8, #8, r8, asr #7 1142233d2500723e5594f3e7c70896ffeeef32b9c950ywan ssat r6, #8, r6, asr #7 1143233d2500723e5594f3e7c70896ffeeef32b9c950ywan ssat r9, #8, r9, asr #7 1144233d2500723e5594f3e7c70896ffeeef32b9c950ywan ssat r10, #8, r10, asr #7 1145233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1146233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub src, src, pstep, lsl #2 ; move src pointer down by 4 lines 1147233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1148233d2500723e5594f3e7c70896ffeeef32b9c950ywan pkhbt r6, r8, r6, lsl #16 1149233d2500723e5594f3e7c70896ffeeef32b9c950ywan pkhbt r10, r9, r10, lsl #16 1150233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1151233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r9, [sp, #8] ; load qs1 1152233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r11, [sp, #12] ; load ps1 1153233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr lr, c0x80808080 1154233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1155233d2500723e5594f3e7c70896ffeeef32b9c950ywan uxtb16 r6, r6 1156233d2500723e5594f3e7c70896ffeeef32b9c950ywan uxtb16 r10, r10 1157233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1158233d2500723e5594f3e7c70896ffeeef32b9c950ywan add src, src, #2 1159233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1160233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r10, r6, r10, lsl #8 ; u = vp8_signed_char_clamp((63 + Filter2 * 18)>>7) 1161233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1162233d2500723e5594f3e7c70896ffeeef32b9c950ywan qsub8 r8, r9, r10 ; s = vp8_signed_char_clamp(qs1 - u) 1163233d2500723e5594f3e7c70896ffeeef32b9c950ywan qadd8 r10, r11, r10 ; s = vp8_signed_char_clamp(ps1 + u) 1164233d2500723e5594f3e7c70896ffeeef32b9c950ywan eor r8, r8, lr ; *oq1 = s^0x80 1165233d2500723e5594f3e7c70896ffeeef32b9c950ywan eor r10, r10, lr ; *op1 = s^0x80 1166233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1167233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrb r11, [src, #-5] ; load p2 for 1/7th difference across boundary 1168233d2500723e5594f3e7c70896ffeeef32b9c950ywan strb r10, [src, #-4] ; store op1 1169233d2500723e5594f3e7c70896ffeeef32b9c950ywan strb r8, [src, #-1] ; store oq1 1170233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrb r9, [src], pstep ; load q2 for 1/7th difference across boundary 1171233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1172233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov r10, r10, lsr #8 1173233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov r8, r8, lsr #8 1174233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1175233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrb r6, [src, #-5] 1176233d2500723e5594f3e7c70896ffeeef32b9c950ywan strb r10, [src, #-4] 1177233d2500723e5594f3e7c70896ffeeef32b9c950ywan strb r8, [src, #-1] 1178233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrb r7, [src], pstep 1179233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1180233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov r10, r10, lsr #8 1181233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov r8, r8, lsr #8 1182233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r11, r11, r6, lsl #8 1183233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r9, r9, r7, lsl #8 1184233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1185233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrb r6, [src, #-5] 1186233d2500723e5594f3e7c70896ffeeef32b9c950ywan strb r10, [src, #-4] 1187233d2500723e5594f3e7c70896ffeeef32b9c950ywan strb r8, [src, #-1] 1188233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrb r7, [src], pstep 1189233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1190233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov r10, r10, lsr #8 1191233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov r8, r8, lsr #8 1192233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r11, r11, r6, lsl #16 1193233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r9, r9, r7, lsl #16 1194233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1195233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrb r6, [src, #-5] 1196233d2500723e5594f3e7c70896ffeeef32b9c950ywan strb r10, [src, #-4] 1197233d2500723e5594f3e7c70896ffeeef32b9c950ywan strb r8, [src, #-1] 1198233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrb r7, [src], pstep 1199233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r11, r11, r6, lsl #24 1200233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r9, r9, r7, lsl #24 1201233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1202233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;roughly 1/7th difference across boundary 1203233d2500723e5594f3e7c70896ffeeef32b9c950ywan eor r9, r9, lr 1204233d2500723e5594f3e7c70896ffeeef32b9c950ywan eor r11, r11, lr 1205233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1206233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov lr, #0x9 ; 9 1207233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov r7, #0x3f ; 63 1208233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1209233d2500723e5594f3e7c70896ffeeef32b9c950ywan sxtb16 r6, r12 1210233d2500723e5594f3e7c70896ffeeef32b9c950ywan sxtb16 r10, r12, ror #8 1211233d2500723e5594f3e7c70896ffeeef32b9c950ywan smlabb r8, r6, lr, r7 1212233d2500723e5594f3e7c70896ffeeef32b9c950ywan smlatb r6, r6, lr, r7 1213233d2500723e5594f3e7c70896ffeeef32b9c950ywan smlabb r12, r10, lr, r7 1214233d2500723e5594f3e7c70896ffeeef32b9c950ywan smlatb r10, r10, lr, r7 1215233d2500723e5594f3e7c70896ffeeef32b9c950ywan ssat r8, #8, r8, asr #7 1216233d2500723e5594f3e7c70896ffeeef32b9c950ywan ssat r6, #8, r6, asr #7 1217233d2500723e5594f3e7c70896ffeeef32b9c950ywan ssat r12, #8, r12, asr #7 1218233d2500723e5594f3e7c70896ffeeef32b9c950ywan ssat r10, #8, r10, asr #7 1219233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1220233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub src, src, pstep, lsl #2 1221233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1222233d2500723e5594f3e7c70896ffeeef32b9c950ywan pkhbt r6, r8, r6, lsl #16 1223233d2500723e5594f3e7c70896ffeeef32b9c950ywan pkhbt r10, r12, r10, lsl #16 1224233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1225233d2500723e5594f3e7c70896ffeeef32b9c950ywan uxtb16 r6, r6 1226233d2500723e5594f3e7c70896ffeeef32b9c950ywan uxtb16 r10, r10 1227233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1228233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr lr, c0x80808080 1229233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1230233d2500723e5594f3e7c70896ffeeef32b9c950ywan orr r10, r6, r10, lsl #8 ; u = vp8_signed_char_clamp((63 + Filter2 * 9)>>7) 1231233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1232233d2500723e5594f3e7c70896ffeeef32b9c950ywan qadd8 r8, r11, r10 ; s = vp8_signed_char_clamp(ps2 + u) 1233233d2500723e5594f3e7c70896ffeeef32b9c950ywan qsub8 r10, r9, r10 ; s = vp8_signed_char_clamp(qs2 - u) 1234233d2500723e5594f3e7c70896ffeeef32b9c950ywan eor r8, r8, lr ; *op2 = s^0x80 1235233d2500723e5594f3e7c70896ffeeef32b9c950ywan eor r10, r10, lr ; *oq2 = s^0x80 1236233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1237233d2500723e5594f3e7c70896ffeeef32b9c950ywan strb r8, [src, #-5] ; store *op2 1238233d2500723e5594f3e7c70896ffeeef32b9c950ywan strb r10, [src], pstep ; store *oq2 1239233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov r8, r8, lsr #8 1240233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov r10, r10, lsr #8 1241233d2500723e5594f3e7c70896ffeeef32b9c950ywan strb r8, [src, #-5] 1242233d2500723e5594f3e7c70896ffeeef32b9c950ywan strb r10, [src], pstep 1243233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov r8, r8, lsr #8 1244233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov r10, r10, lsr #8 1245233d2500723e5594f3e7c70896ffeeef32b9c950ywan strb r8, [src, #-5] 1246233d2500723e5594f3e7c70896ffeeef32b9c950ywan strb r10, [src], pstep 1247233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov r8, r8, lsr #8 1248233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov r10, r10, lsr #8 1249233d2500723e5594f3e7c70896ffeeef32b9c950ywan strb r8, [src, #-5] 1250233d2500723e5594f3e7c70896ffeeef32b9c950ywan strb r10, [src], pstep 1251233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1252233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;adjust src pointer for next loop 1253233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub src, src, #2 1254233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1255233d2500723e5594f3e7c70896ffeeef32b9c950ywan|mbvskip_filter| 1256233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub src, src, #4 1257233d2500723e5594f3e7c70896ffeeef32b9c950ywan subs count, count, #1 1258233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1259233d2500723e5594f3e7c70896ffeeef32b9c950ywan pld [src, #23] ; preload for next block 1260233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrne r6, [src], pstep ; load source data 1261233d2500723e5594f3e7c70896ffeeef32b9c950ywan pld [src, #23] 1262233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrne r7, [src], pstep 1263233d2500723e5594f3e7c70896ffeeef32b9c950ywan pld [src, #23] 1264233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrne r8, [src], pstep 1265233d2500723e5594f3e7c70896ffeeef32b9c950ywan pld [src, #23] 1266233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldrne lr, [src], pstep 1267233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1268233d2500723e5594f3e7c70896ffeeef32b9c950ywan bne MBVnext8 1269233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1270233d2500723e5594f3e7c70896ffeeef32b9c950ywan add sp, sp, #16 1271233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1272233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldmia sp!, {r4 - r11, pc} 1273233d2500723e5594f3e7c70896ffeeef32b9c950ywan ENDP ; |vp8_mbloop_filter_vertical_edge_armv6| 1274233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1275233d2500723e5594f3e7c70896ffeeef32b9c950ywan; Constant Pool 1276233d2500723e5594f3e7c70896ffeeef32b9c950ywanc0x80808080 DCD 0x80808080 1277233d2500723e5594f3e7c70896ffeeef32b9c950ywanc0x03030303 DCD 0x03030303 1278233d2500723e5594f3e7c70896ffeeef32b9c950ywanc0x04040404 DCD 0x04040404 1279233d2500723e5594f3e7c70896ffeeef32b9c950ywanc0x01010101 DCD 0x01010101 1280233d2500723e5594f3e7c70896ffeeef32b9c950ywanc0x7F7F7F7F DCD 0x7F7F7F7F 1281233d2500723e5594f3e7c70896ffeeef32b9c950ywan 1282233d2500723e5594f3e7c70896ffeeef32b9c950ywan END 1283