190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 2f71323e297a928af368937089d3ed71239786f86Andreas Huber; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 4f71323e297a928af368937089d3ed71239786f86Andreas Huber; Use of this source code is governed by a BSD-style license 5f71323e297a928af368937089d3ed71239786f86Andreas Huber; that can be found in the LICENSE file in the root of the source 6f71323e297a928af368937089d3ed71239786f86Andreas Huber; tree. An additional intellectual property rights grant can be found 7f71323e297a928af368937089d3ed71239786f86Andreas Huber; in the file PATENTS. All contributing project authors may 8f71323e297a928af368937089d3ed71239786f86Andreas Huber; be found in the AUTHORS file in the root of the source tree. 990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber EXPORT |vp8_loop_filter_simple_horizontal_edge_neon| 1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ARM 1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber REQUIRE8 1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber PRESERVE8 1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber AREA ||.text||, CODE, READONLY, ALIGN=2 1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit 1990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;are equal. So, in the code, only one load is needed 2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;for flimit. Same way applies to limit and thresh. 2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r0 unsigned char *s, 2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r1 int p, //pitch 2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r2 const signed char *flimit, 2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r3 const signed char *limit, 2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; stack(r4) const signed char *thresh, 2690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; //stack(r5) int count --unused 2790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 2890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber|vp8_loop_filter_simple_horizontal_edge_neon| PROC 2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber sub r0, r0, r1, lsl #1 ; move src pointer down by 2 lines 3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 31d35fe0269d77984b383b6bdc051f26b72da15277Ard Biesheuvel adr r12, lfhy_coeff 3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q5}, [r0], r1 ; p1 3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.s8 {d2[], d3[]}, [r2] ; flimit 3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.s8 {d26[], d27[]}, [r3] ; limit -> q13 3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q6}, [r0], r1 ; p0 3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q0}, [r12]! ; 0x80 3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q7}, [r0], r1 ; q0 3890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q10}, [r12]! ; 0x03 3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q8}, [r0] ; q1 4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;vp8_filter_mask() function 4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vabd.u8 q15, q6, q7 ; abs(p0 - q0) 4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vabd.u8 q14, q5, q8 ; abs(p1 - q1) 4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.u8 q15, q15, q15 ; abs(p0 - q0) * 2 4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vshr.u8 q14, q14, #1 ; abs(p1 - q1) / 2 4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.u8 q15, q15, q14 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2 4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;vp8_filter() function 4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber veor q7, q7, q0 ; qs0: q0 offset to convert to a signed value 5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber veor q6, q6, q0 ; ps0: p0 offset to convert to a signed value 5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber veor q5, q5, q0 ; ps1: p1 offset to convert to a signed value 5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber veor q8, q8, q0 ; qs1: q1 offset to convert to a signed value 5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vadd.u8 q1, q1, q1 ; flimit * 2 5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vadd.u8 q1, q1, q13 ; flimit * 2 + limit 5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vcge.u8 q15, q1, q15 ; (abs(p0 - q0)*2 + abs(p1-q1)/2 > flimit*2 + limit)*-1 5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;;;;;;;;;; 5990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;vqsub.s8 q2, q7, q6 ; ( qs0 - ps0) 6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsubl.s8 q2, d14, d12 ; ( qs0 - ps0) 6190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsubl.s8 q3, d15, d13 6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqsub.s8 q4, q5, q8 ; q4: vp8_filter = vp8_signed_char_clamp(ps1-qs1) 6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;vmul.i8 q2, q2, q10 ; 3 * ( qs0 - ps0) 6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vadd.s16 q11, q2, q2 ; 3 * ( qs0 - ps0) 6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vadd.s16 q12, q3, q3 6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q9}, [r12]! ; 0x04 7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vadd.s16 q2, q2, q11 7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vadd.s16 q3, q3, q12 7390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vaddw.s8 q2, q2, d8 ; vp8_filter + 3 * ( qs0 - ps0) 7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vaddw.s8 q3, q3, d9 7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;vqadd.s8 q4, q4, q2 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0)) 7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqmovn.s16 d8, q2 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0)) 7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqmovn.s16 d9, q3 8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;;;;;;;;;;;;; 8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vand q4, q4, q15 ; vp8_filter &= mask 8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s8 q2, q4, q10 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3) 8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s8 q4, q4, q9 ; Filter1 = vp8_signed_char_clamp(vp8_filter+4) 8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vshr.s8 q2, q2, #3 ; Filter2 >>= 3 8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vshr.s8 q4, q4, #3 ; Filter1 >>= 3 8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber sub r0, r0, r1, lsl #1 9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;calculate output 9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s8 q11, q6, q2 ; u = vp8_signed_char_clamp(ps0 + Filter2) 9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqsub.s8 q10, q7, q4 ; u = vp8_signed_char_clamp(qs0 - Filter1) 9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r3, r0, r1 9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber veor q6, q11, q0 ; *op0 = u^0x80 9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber veor q7, q10, q0 ; *oq0 = u^0x80 9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {q6}, [r0] ; store op0 10190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {q7}, [r3] ; store oq0 10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber bx lr 10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ENDP ; |vp8_loop_filter_simple_horizontal_edge_neon| 10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;----------------- 10779f15823c34ae1e423108295e416213200bb280fAndreas Huber 10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberlfhy_coeff 10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080 11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303 11190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404 11290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber END 114