190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 2f71323e297a928af368937089d3ed71239786f86Andreas Huber; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 4f71323e297a928af368937089d3ed71239786f86Andreas Huber; Use of this source code is governed by a BSD-style license 5f71323e297a928af368937089d3ed71239786f86Andreas Huber; that can be found in the LICENSE file in the root of the source 6f71323e297a928af368937089d3ed71239786f86Andreas Huber; tree. An additional intellectual property rights grant can be found 7f71323e297a928af368937089d3ed71239786f86Andreas Huber; in the file PATENTS. All contributing project authors may 8f71323e297a928af368937089d3ed71239786f86Andreas Huber; be found in the AUTHORS file in the root of the source tree. 990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber EXPORT |vp8_sixtap_predict8x8_neon| 1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ARM 1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber REQUIRE8 1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber PRESERVE8 1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber AREA ||.text||, CODE, READONLY, ALIGN=2 1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r0 unsigned char *src_ptr, 1990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r1 int src_pixels_per_line, 2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r2 int xoffset, 2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r3 int yoffset, 2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; stack(r4) unsigned char *dst_ptr, 2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; stack(r5) int dst_pitch 2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber|vp8_sixtap_predict8x8_neon| PROC 2690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push {r4-r5, lr} 2790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 28d35fe0269d77984b383b6bdc051f26b72da15277Ard Biesheuvel adrl r12, filter8_coeff 2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ldr r4, [sp, #12] ;load parameters from stack 3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ldr r5, [sp, #16] ;load parameters from stack 3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber cmp r2, #0 ;skip first_pass filter if xoffset=0 3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber beq secondpass_filter8x8_only 3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r2, r12, r2, lsl #5 ;calculate filter location 3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber cmp r3, #0 ;skip second_pass filter if yoffset=0 3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.s32 {q14, q15}, [r2] ;load first_pass filter 4190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber beq firstpass_filter8x8_only 4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber sub sp, sp, #64 ;reserve space on stack for temporary storage 4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov lr, sp 4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vabs.s32 q12, q14 4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vabs.s32 q13, q15 4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov r2, #2 ;loop counter 5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber sub r0, r0, #2 ;move srcptr back to (line-2) and (column-2) 5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber sub r0, r0, r1, lsl #1 5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d0, d24[0] ;first_pass filter (d0-d5) 5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d1, d24[4] 5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d2, d25[0] 5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;First pass: output_height lines x output_width columns (13x8) 5990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q3}, [r0], r1 ;load src data 6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d3, d25[4] 6190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q4}, [r0], r1 6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d4, d26[0] 6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q5}, [r0], r1 6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d5, d26[4] 6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q6}, [r0], r1 6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberfilt_blk2d_fp8x8_loop_neon 6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pld [r0] 6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pld [r0, r1] 7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pld [r0, r1, lsl #1] 7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q7, d6, d0 ;(src_ptr[-2] * vp8_filter[0]) 7390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q8, d8, d0 7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q9, d10, d0 7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q10, d12, d0 7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d28, d6, d7, #1 ;construct src_ptr[-1] 7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d29, d8, d9, #1 7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d30, d10, d11, #1 8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d31, d12, d13, #1 8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q7, d28, d1 ;-(src_ptr[-1] * vp8_filter[1]) 8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q8, d29, d1 8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q9, d30, d1 8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q10, d31, d1 8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d28, d6, d7, #4 ;construct src_ptr[2] 8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d29, d8, d9, #4 8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d30, d10, d11, #4 9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d31, d12, d13, #4 9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q7, d28, d4 ;-(src_ptr[2] * vp8_filter[4]) 9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q8, d29, d4 9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q9, d30, d4 9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q10, d31, d4 9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d28, d6, d7, #2 ;construct src_ptr[0] 9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d29, d8, d9, #2 9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d30, d10, d11, #2 10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d31, d12, d13, #2 10190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q7, d28, d2 ;(src_ptr[0] * vp8_filter[2]) 10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q8, d29, d2 10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q9, d30, d2 10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q10, d31, d2 10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d28, d6, d7, #5 ;construct src_ptr[3] 10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d29, d8, d9, #5 10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d30, d10, d11, #5 11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d31, d12, d13, #5 11190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q7, d28, d5 ;(src_ptr[3] * vp8_filter[5]) 11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q8, d29, d5 11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q9, d30, d5 11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q10, d31, d5 11690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d28, d6, d7, #3 ;construct src_ptr[1] 11890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d29, d8, d9, #3 11990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d30, d10, d11, #3 12090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d31, d12, d13, #3 12190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q3, d28, d3 ;(src_ptr[1] * vp8_filter[3]) 12390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q4, d29, d3 12490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q5, d30, d3 12590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q6, d31, d3 12690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber subs r2, r2, #1 12890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 q7, q3 ;sum of all (src_data*filter_parameters) 13090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 q8, q4 13190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 q9, q5 13290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 q10, q6 13390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q3}, [r0], r1 ;load src data 13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrun.s16 d22, q7, #7 ;shift/round/saturate to u8 13790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrun.s16 d23, q8, #7 13890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrun.s16 d24, q9, #7 13990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrun.s16 d25, q10, #7 14090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d22}, [lr]! ;store result 14290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q4}, [r0], r1 14390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d23}, [lr]! 14490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q5}, [r0], r1 14590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d24}, [lr]! 14690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q6}, [r0], r1 14790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d25}, [lr]! 14890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber bne filt_blk2d_fp8x8_loop_neon 15090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;first_pass filtering on the rest 5-line data 15290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;vld1.u8 {q3}, [r0], r1 ;load src data 15390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;vld1.u8 {q4}, [r0], r1 15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;vld1.u8 {q5}, [r0], r1 15590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;vld1.u8 {q6}, [r0], r1 15690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q7}, [r0], r1 15790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q8, d6, d0 ;(src_ptr[-2] * vp8_filter[0]) 15990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q9, d8, d0 16090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q10, d10, d0 16190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q11, d12, d0 16290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q12, d14, d0 16390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 16490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d27, d6, d7, #1 ;construct src_ptr[-1] 16590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d28, d8, d9, #1 16690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d29, d10, d11, #1 16790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d30, d12, d13, #1 16890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d31, d14, d15, #1 16990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 17090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q8, d27, d1 ;-(src_ptr[-1] * vp8_filter[1]) 17190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q9, d28, d1 17290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q10, d29, d1 17390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q11, d30, d1 17490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q12, d31, d1 17590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 17690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d27, d6, d7, #4 ;construct src_ptr[2] 17790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d28, d8, d9, #4 17890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d29, d10, d11, #4 17990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d30, d12, d13, #4 18090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d31, d14, d15, #4 18190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 18290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q8, d27, d4 ;-(src_ptr[2] * vp8_filter[4]) 18390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q9, d28, d4 18490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q10, d29, d4 18590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q11, d30, d4 18690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q12, d31, d4 18790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 18890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d27, d6, d7, #2 ;construct src_ptr[0] 18990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d28, d8, d9, #2 19090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d29, d10, d11, #2 19190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d30, d12, d13, #2 19290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d31, d14, d15, #2 19390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 19490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q8, d27, d2 ;(src_ptr[0] * vp8_filter[2]) 19590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q9, d28, d2 19690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q10, d29, d2 19790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q11, d30, d2 19890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q12, d31, d2 19990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 20090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d27, d6, d7, #5 ;construct src_ptr[3] 20190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d28, d8, d9, #5 20290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d29, d10, d11, #5 20390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d30, d12, d13, #5 20490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d31, d14, d15, #5 20590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 20690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q8, d27, d5 ;(src_ptr[3] * vp8_filter[5]) 20790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q9, d28, d5 20890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q10, d29, d5 20990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q11, d30, d5 21090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q12, d31, d5 21190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 21290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d27, d6, d7, #3 ;construct src_ptr[1] 21390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d28, d8, d9, #3 21490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d29, d10, d11, #3 21590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d30, d12, d13, #3 21690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d31, d14, d15, #3 21790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 21890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q3, d27, d3 ;(src_ptr[1] * vp8_filter[3]) 21990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q4, d28, d3 22090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q5, d29, d3 22190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q6, d30, d3 22290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q7, d31, d3 22390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 22490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 q8, q3 ;sum of all (src_data*filter_parameters) 22590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 q9, q4 22690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 q10, q5 22790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 q11, q6 22890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 q12, q7 22990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 23090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r3, r12, r3, lsl #5 23190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 23290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrun.s16 d26, q8, #7 ;shift/round/saturate to u8 23390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber sub lr, lr, #64 23490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrun.s16 d27, q9, #7 23590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q9}, [lr]! ;load intermediate data from stack 23690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrun.s16 d28, q10, #7 23790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q10}, [lr]! 23890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 23990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.s32 {q5, q6}, [r3] ;load second_pass filter 24090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 24190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrun.s16 d29, q11, #7 24290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q11}, [lr]! 24390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 24490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vabs.s32 q7, q5 24590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vabs.s32 q8, q6 24690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 24790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrun.s16 d30, q12, #7 24890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q12}, [lr]! 24990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 25090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;Second pass: 8x8 25190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov r3, #2 ;loop counter 25290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 25390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d0, d14[0] ;second_pass filter parameters (d0-d5) 25490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d1, d14[4] 25590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d2, d15[0] 25690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d3, d15[4] 25790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d4, d16[0] 25890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d5, d16[4] 25990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 26090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberfilt_blk2d_sp8x8_loop_neon 26190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q3, d18, d0 ;(src_ptr[-2] * vp8_filter[0]) 26290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q4, d19, d0 26390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q5, d20, d0 26490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q6, d21, d0 26590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 26690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q3, d19, d1 ;-(src_ptr[-1] * vp8_filter[1]) 26790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q4, d20, d1 26890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q5, d21, d1 26990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q6, d22, d1 27090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 27190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q3, d22, d4 ;-(src_ptr[2] * vp8_filter[4]) 27290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q4, d23, d4 27390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q5, d24, d4 27490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q6, d25, d4 27590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 27690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q3, d20, d2 ;(src_ptr[0] * vp8_filter[2]) 27790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q4, d21, d2 27890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q5, d22, d2 27990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q6, d23, d2 28090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 28190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q3, d23, d5 ;(src_ptr[3] * vp8_filter[5]) 28290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q4, d24, d5 28390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q5, d25, d5 28490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q6, d26, d5 28590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 28690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q7, d21, d3 ;(src_ptr[1] * vp8_filter[3]) 28790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q8, d22, d3 28890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q9, d23, d3 28990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q10, d24, d3 29090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 29190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber subs r3, r3, #1 29290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 29390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 q7, q3 ;sum of all (src_data*filter_parameters) 29490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 q8, q4 29590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 q9, q5 29690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 q10, q6 29790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 29890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrun.s16 d6, q7, #7 ;shift/round/saturate to u8 29990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrun.s16 d7, q8, #7 30090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrun.s16 d8, q9, #7 30190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrun.s16 d9, q10, #7 30290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 30390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmov q9, q11 30490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d6}, [r4], r5 ;store result 30590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmov q10, q12 30690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d7}, [r4], r5 30790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmov q11, q13 30890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d8}, [r4], r5 30990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmov q12, q14 31090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d9}, [r4], r5 31190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmov d26, d30 31290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 31390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber bne filt_blk2d_sp8x8_loop_neon 31490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 31590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add sp, sp, #64 31690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop {r4-r5,pc} 31790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 31890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;--------------------- 31990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberfirstpass_filter8x8_only 32090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;add r2, r12, r2, lsl #5 ;calculate filter location 32190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;vld1.s32 {q14, q15}, [r2] ;load first_pass filter 32290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vabs.s32 q12, q14 32390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vabs.s32 q13, q15 32490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 32590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov r2, #2 ;loop counter 32690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber sub r0, r0, #2 ;move srcptr back to (line-2) and (column-2) 32790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 32890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d0, d24[0] ;first_pass filter (d0-d5) 32990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d1, d24[4] 33090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d2, d25[0] 33190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d3, d25[4] 33290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d4, d26[0] 33390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d5, d26[4] 33490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 33590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;First pass: output_height lines x output_width columns (8x8) 33690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberfilt_blk2d_fpo8x8_loop_neon 33790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q3}, [r0], r1 ;load src data 33890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q4}, [r0], r1 33990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q5}, [r0], r1 34090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q6}, [r0], r1 34190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 34290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pld [r0] 34390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pld [r0, r1] 34490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pld [r0, r1, lsl #1] 34590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 34690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q7, d6, d0 ;(src_ptr[-2] * vp8_filter[0]) 34790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q8, d8, d0 34890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q9, d10, d0 34990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q10, d12, d0 35090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 35190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d28, d6, d7, #1 ;construct src_ptr[-1] 35290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d29, d8, d9, #1 35390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d30, d10, d11, #1 35490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d31, d12, d13, #1 35590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 35690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q7, d28, d1 ;-(src_ptr[-1] * vp8_filter[1]) 35790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q8, d29, d1 35890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q9, d30, d1 35990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q10, d31, d1 36090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 36190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d28, d6, d7, #4 ;construct src_ptr[2] 36290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d29, d8, d9, #4 36390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d30, d10, d11, #4 36490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d31, d12, d13, #4 36590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 36690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q7, d28, d4 ;-(src_ptr[2] * vp8_filter[4]) 36790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q8, d29, d4 36890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q9, d30, d4 36990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q10, d31, d4 37090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 37190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d28, d6, d7, #2 ;construct src_ptr[0] 37290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d29, d8, d9, #2 37390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d30, d10, d11, #2 37490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d31, d12, d13, #2 37590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 37690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q7, d28, d2 ;(src_ptr[0] * vp8_filter[2]) 37790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q8, d29, d2 37890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q9, d30, d2 37990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q10, d31, d2 38090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 38190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d28, d6, d7, #5 ;construct src_ptr[3] 38290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d29, d8, d9, #5 38390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d30, d10, d11, #5 38490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d31, d12, d13, #5 38590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 38690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q7, d28, d5 ;(src_ptr[3] * vp8_filter[5]) 38790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q8, d29, d5 38890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q9, d30, d5 38990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q10, d31, d5 39090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 39190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d28, d6, d7, #3 ;construct src_ptr[1] 39290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d29, d8, d9, #3 39390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d30, d10, d11, #3 39490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d31, d12, d13, #3 39590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 39690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q3, d28, d3 ;(src_ptr[1] * vp8_filter[3]) 39790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q4, d29, d3 39890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q5, d30, d3 39990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q6, d31, d3 40090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; 40190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 q7, q3 ;sum of all (src_data*filter_parameters) 40290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 q8, q4 40390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 q9, q5 40490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 q10, q6 40590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 40690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber subs r2, r2, #1 40790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 40890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrun.s16 d22, q7, #7 ;shift/round/saturate to u8 40990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrun.s16 d23, q8, #7 41090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrun.s16 d24, q9, #7 41190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrun.s16 d25, q10, #7 41290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 41390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d22}, [r4], r5 ;store result 41490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d23}, [r4], r5 41590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d24}, [r4], r5 41690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d25}, [r4], r5 41790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 41890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber bne filt_blk2d_fpo8x8_loop_neon 41990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 42090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop {r4-r5,pc} 42190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 42290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;--------------------- 42390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersecondpass_filter8x8_only 42490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber sub r0, r0, r1, lsl #1 42590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r3, r12, r3, lsl #5 42690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 42790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d18}, [r0], r1 ;load src data 42890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.s32 {q5, q6}, [r3] ;load second_pass filter 42990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d19}, [r0], r1 43090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vabs.s32 q7, q5 43190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d20}, [r0], r1 43290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vabs.s32 q8, q6 43390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d21}, [r0], r1 43490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov r3, #2 ;loop counter 43590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d22}, [r0], r1 43690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d0, d14[0] ;second_pass filter parameters (d0-d5) 43790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d23}, [r0], r1 43890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d1, d14[4] 43990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d24}, [r0], r1 44090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d2, d15[0] 44190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d25}, [r0], r1 44290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d3, d15[4] 44390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d26}, [r0], r1 44490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d4, d16[0] 44590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d27}, [r0], r1 44690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d5, d16[4] 44790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d28}, [r0], r1 44890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d29}, [r0], r1 44990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d30}, [r0], r1 45090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 45190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;Second pass: 8x8 45290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberfilt_blk2d_spo8x8_loop_neon 45390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q3, d18, d0 ;(src_ptr[-2] * vp8_filter[0]) 45490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q4, d19, d0 45590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q5, d20, d0 45690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q6, d21, d0 45790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 45890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q3, d19, d1 ;-(src_ptr[-1] * vp8_filter[1]) 45990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q4, d20, d1 46090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q5, d21, d1 46190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q6, d22, d1 46290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 46390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q3, d22, d4 ;-(src_ptr[2] * vp8_filter[4]) 46490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q4, d23, d4 46590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q5, d24, d4 46690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q6, d25, d4 46790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 46890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q3, d20, d2 ;(src_ptr[0] * vp8_filter[2]) 46990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q4, d21, d2 47090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q5, d22, d2 47190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q6, d23, d2 47290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 47390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q3, d23, d5 ;(src_ptr[3] * vp8_filter[5]) 47490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q4, d24, d5 47590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q5, d25, d5 47690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q6, d26, d5 47790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 47890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q7, d21, d3 ;(src_ptr[1] * vp8_filter[3]) 47990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q8, d22, d3 48090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q9, d23, d3 48190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q10, d24, d3 48290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 48390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber subs r3, r3, #1 48490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 48590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 q7, q3 ;sum of all (src_data*filter_parameters) 48690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 q8, q4 48790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 q9, q5 48890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 q10, q6 48990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 49090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrun.s16 d6, q7, #7 ;shift/round/saturate to u8 49190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrun.s16 d7, q8, #7 49290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrun.s16 d8, q9, #7 49390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrun.s16 d9, q10, #7 49490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 49590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmov q9, q11 49690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d6}, [r4], r5 ;store result 49790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmov q10, q12 49890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d7}, [r4], r5 49990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmov q11, q13 50090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d8}, [r4], r5 50190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmov q12, q14 50290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d9}, [r4], r5 50390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmov d26, d30 50490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 50590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber bne filt_blk2d_spo8x8_loop_neon 50690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 50790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop {r4-r5,pc} 50890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 50990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ENDP 51090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 51190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;----------------- 51279f15823c34ae1e423108295e416213200bb280fAndreas Huber 51390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberfilter8_coeff 51490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber DCD 0, 0, 128, 0, 0, 0, 0, 0 51590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber DCD 0, -6, 123, 12, -1, 0, 0, 0 51690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber DCD 2, -11, 108, 36, -8, 1, 0, 0 51790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber DCD 0, -9, 93, 50, -6, 0, 0, 0 51890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber DCD 3, -16, 77, 77, -16, 3, 0, 0 51990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber DCD 0, -6, 50, 93, -9, 0, 0, 0 52090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber DCD 1, -8, 36, 108, -11, 2, 0, 0 52190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber DCD 0, -1, 12, 123, -6, 0, 0, 0 52290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 52390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber END 524