190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 2f71323e297a928af368937089d3ed71239786f86Andreas Huber; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 4f71323e297a928af368937089d3ed71239786f86Andreas Huber; Use of this source code is governed by a BSD-style license 5f71323e297a928af368937089d3ed71239786f86Andreas Huber; that can be found in the LICENSE file in the root of the source 6f71323e297a928af368937089d3ed71239786f86Andreas Huber; tree. An additional intellectual property rights grant can be found 7f71323e297a928af368937089d3ed71239786f86Andreas Huber; in the file PATENTS. All contributing project authors may 8f71323e297a928af368937089d3ed71239786f86Andreas Huber; be found in the AUTHORS file in the root of the source tree. 990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber EXPORT |vp8_loop_filter_simple_vertical_edge_neon| 1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ARM 1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber REQUIRE8 1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber PRESERVE8 1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber AREA ||.text||, CODE, READONLY, ALIGN=2 1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;Note: flimit, limit, and thresh should be positive numbers. All 16 elements in flimit 1990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;are equal. So, in the code, only one load is needed 2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;for flimit. Same way applies to limit and thresh. 2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r0 unsigned char *s, 2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r1 int p, //pitch 2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r2 const signed char *flimit, 2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r3 const signed char *limit, 2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; stack(r4) const signed char *thresh, 2690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; //stack(r5) int count --unused 2790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 2890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber|vp8_loop_filter_simple_vertical_edge_neon| PROC 2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber sub r0, r0, #2 ; move src pointer down by 2 columns 3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld4.8 {d6[0], d7[0], d8[0], d9[0]}, [r0], r1 3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.s8 {d2[], d3[]}, [r2] ; flimit 3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.s8 {d26[], d27[]}, [r3] ; limit -> q13 3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld4.8 {d6[1], d7[1], d8[1], d9[1]}, [r0], r1 35d35fe0269d77984b383b6bdc051f26b72da15277Ard Biesheuvel adr r12, vlfy_coeff 3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld4.8 {d6[2], d7[2], d8[2], d9[2]}, [r0], r1 3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld4.8 {d6[3], d7[3], d8[3], d9[3]}, [r0], r1 3890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld4.8 {d6[4], d7[4], d8[4], d9[4]}, [r0], r1 3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld4.8 {d6[5], d7[5], d8[5], d9[5]}, [r0], r1 4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld4.8 {d6[6], d7[6], d8[6], d9[6]}, [r0], r1 4190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld4.8 {d6[7], d7[7], d8[7], d9[7]}, [r0], r1 4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld4.8 {d10[0], d11[0], d12[0], d13[0]}, [r0], r1 4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q0}, [r12]! ; 0x80 4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld4.8 {d10[1], d11[1], d12[1], d13[1]}, [r0], r1 4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q11}, [r12]! ; 0x03 4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld4.8 {d10[2], d11[2], d12[2], d13[2]}, [r0], r1 4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q12}, [r12]! ; 0x04 4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld4.8 {d10[3], d11[3], d12[3], d13[3]}, [r0], r1 5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld4.8 {d10[4], d11[4], d12[4], d13[4]}, [r0], r1 5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld4.8 {d10[5], d11[5], d12[5], d13[5]}, [r0], r1 5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld4.8 {d10[6], d11[6], d12[6], d13[6]}, [r0], r1 5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld4.8 {d10[7], d11[7], d12[7], d13[7]}, [r0], r1 5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vswp d7, d10 5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vswp d12, d9 5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;vswp q4, q5 ; p1:q3, p0:q5, q0:q4, q1:q6 5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;vp8_filter_mask() function 6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;vp8_hevmask() function 6190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber sub r0, r0, r1, lsl #4 6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vabd.u8 q15, q5, q4 ; abs(p0 - q0) 6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vabd.u8 q14, q3, q6 ; abs(p1 - q1) 6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.u8 q15, q15, q15 ; abs(p0 - q0) * 2 6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vshr.u8 q14, q14, #1 ; abs(p1 - q1) / 2 6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.u8 q15, q15, q14 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2 6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber veor q4, q4, q0 ; qs0: q0 offset to convert to a signed value 6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber veor q5, q5, q0 ; ps0: p0 offset to convert to a signed value 7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber veor q3, q3, q0 ; ps1: p1 offset to convert to a signed value 7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber veor q6, q6, q0 ; qs1: q1 offset to convert to a signed value 7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vadd.u8 q1, q1, q1 ; flimit * 2 7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vadd.u8 q1, q1, q13 ; flimit * 2 + limit 7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vcge.u8 q15, q1, q15 ; abs(p0 - q0)*2 + abs(p1-q1)/2 > flimit*2 + limit)*-1 7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;vp8_filter() function 7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;;;;;;;;;; 7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;vqsub.s8 q2, q5, q4 ; ( qs0 - ps0) 8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsubl.s8 q2, d8, d10 ; ( qs0 - ps0) 8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsubl.s8 q13, d9, d11 8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqsub.s8 q1, q3, q6 ; vp8_filter = vp8_signed_char_clamp(ps1-qs1) 8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;vmul.i8 q2, q2, q11 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0)) 8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vadd.s16 q10, q2, q2 ; 3 * ( qs0 - ps0) 8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vadd.s16 q14, q13, q13 8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vadd.s16 q2, q2, q10 8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vadd.s16 q13, q13, q14 9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;vqadd.s8 q1, q1, q2 9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vaddw.s8 q2, q2, d2 ; vp8_filter + 3 * ( qs0 - ps0) 9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vaddw.s8 q13, q13, d3 9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqmovn.s16 d2, q2 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0)) 9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqmovn.s16 d3, q13 9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r0, r0, #1 9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r2, r0, r1 10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;;;;;;;;;;; 10190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vand q1, q1, q15 ; vp8_filter &= mask 10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s8 q2, q1, q11 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3) 10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s8 q1, q1, q12 ; Filter1 = vp8_signed_char_clamp(vp8_filter+4) 10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vshr.s8 q2, q2, #3 ; Filter2 >>= 3 10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vshr.s8 q1, q1, #3 ; Filter1 >>= 3 10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;calculate output 11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqsub.s8 q10, q4, q1 ; u = vp8_signed_char_clamp(qs0 - Filter1) 11190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s8 q11, q5, q2 ; u = vp8_signed_char_clamp(ps0 + Filter2) 11290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber veor q7, q10, q0 ; *oq0 = u^0x80 11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber veor q6, q11, q0 ; *op0 = u^0x80 11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r3, r2, r1 11790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vswp d13, d14 11890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r12, r3, r1 11990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;store op1, op0, oq0, oq1 12190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst2.8 {d12[0], d13[0]}, [r0] 12290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst2.8 {d12[1], d13[1]}, [r2] 12390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst2.8 {d12[2], d13[2]}, [r3] 12490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst2.8 {d12[3], d13[3]}, [r12], r1 12590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r0, r12, r1 12690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst2.8 {d12[4], d13[4]}, [r12] 12790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst2.8 {d12[5], d13[5]}, [r0], r1 12890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r2, r0, r1 12990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst2.8 {d12[6], d13[6]}, [r0] 13090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst2.8 {d12[7], d13[7]}, [r2], r1 13190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r3, r2, r1 13290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst2.8 {d14[0], d15[0]}, [r2] 13390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst2.8 {d14[1], d15[1]}, [r3], r1 13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r12, r3, r1 13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst2.8 {d14[2], d15[2]}, [r3] 13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst2.8 {d14[3], d15[3]}, [r12], r1 13790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r0, r12, r1 13890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst2.8 {d14[4], d15[4]}, [r12] 13990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst2.8 {d14[5], d15[5]}, [r0], r1 14090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r2, r0, r1 14190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst2.8 {d14[6], d15[6]}, [r0] 14290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst2.8 {d14[7], d15[7]}, [r2] 14390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber bx lr 14590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ENDP ; |vp8_loop_filter_simple_vertical_edge_neon| 14690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;----------------- 14879f15823c34ae1e423108295e416213200bb280fAndreas Huber 14990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervlfy_coeff 15090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080 15190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303 15290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404 15390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber END 155