190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 2f71323e297a928af368937089d3ed71239786f86Andreas Huber; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 4f71323e297a928af368937089d3ed71239786f86Andreas Huber; Use of this source code is governed by a BSD-style license 5f71323e297a928af368937089d3ed71239786f86Andreas Huber; that can be found in the LICENSE file in the root of the source 6f71323e297a928af368937089d3ed71239786f86Andreas Huber; tree. An additional intellectual property rights grant can be found 7f71323e297a928af368937089d3ed71239786f86Andreas Huber; in the file PATENTS. All contributing project authors may 8f71323e297a928af368937089d3ed71239786f86Andreas Huber; be found in the AUTHORS file in the root of the source tree. 990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber EXPORT |vp8_sixtap_predict_neon| 1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ARM 1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber REQUIRE8 1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber PRESERVE8 1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber AREA ||.text||, CODE, READONLY, ALIGN=2 18d35fe0269d77984b383b6bdc051f26b72da15277Ard Biesheuvel 19d35fe0269d77984b383b6bdc051f26b72da15277Ard Biesheuvelfilter4_coeff 20d35fe0269d77984b383b6bdc051f26b72da15277Ard Biesheuvel DCD 0, 0, 128, 0, 0, 0, 0, 0 21d35fe0269d77984b383b6bdc051f26b72da15277Ard Biesheuvel DCD 0, -6, 123, 12, -1, 0, 0, 0 22d35fe0269d77984b383b6bdc051f26b72da15277Ard Biesheuvel DCD 2, -11, 108, 36, -8, 1, 0, 0 23d35fe0269d77984b383b6bdc051f26b72da15277Ard Biesheuvel DCD 0, -9, 93, 50, -6, 0, 0, 0 24d35fe0269d77984b383b6bdc051f26b72da15277Ard Biesheuvel DCD 3, -16, 77, 77, -16, 3, 0, 0 25d35fe0269d77984b383b6bdc051f26b72da15277Ard Biesheuvel DCD 0, -6, 50, 93, -9, 0, 0, 0 26d35fe0269d77984b383b6bdc051f26b72da15277Ard Biesheuvel DCD 1, -8, 36, 108, -11, 2, 0, 0 27d35fe0269d77984b383b6bdc051f26b72da15277Ard Biesheuvel DCD 0, -1, 12, 123, -6, 0, 0, 0 28d35fe0269d77984b383b6bdc051f26b72da15277Ard Biesheuvel 29d35fe0269d77984b383b6bdc051f26b72da15277Ard Biesheuvel;----------------- 3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r0 unsigned char *src_ptr, 3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r1 int src_pixels_per_line, 3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r2 int xoffset, 3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r3 int yoffset, 3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; stack(r4) unsigned char *dst_ptr, 3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; stack(lr) int dst_pitch 3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber|vp8_sixtap_predict_neon| PROC 3890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push {r4, lr} 3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 40d35fe0269d77984b383b6bdc051f26b72da15277Ard Biesheuvel adr r12, filter4_coeff 4190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ldr r4, [sp, #8] ;load parameters from stack 4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ldr lr, [sp, #12] ;load parameters from stack 4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber cmp r2, #0 ;skip first_pass filter if xoffset=0 4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber beq secondpass_filter4x4_only 4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r2, r12, r2, lsl #5 ;calculate filter location 4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber cmp r3, #0 ;skip second_pass filter if yoffset=0 5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.s32 {q14, q15}, [r2] ;load first_pass filter 5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber beq firstpass_filter4x4_only 5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vabs.s32 q12, q14 ;get abs(filer_parameters) 5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vabs.s32 q13, q15 5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber sub r0, r0, #2 ;go back 2 columns of src data 5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber sub r0, r0, r1, lsl #1 ;go back 2 lines of src data 5990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;First pass: output_height lines x output_width columns (9x4) 6190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q3}, [r0], r1 ;load first 4-line src data 6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d0, d24[0] ;first_pass filter (d0-d5) 6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q4}, [r0], r1 6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d1, d24[4] 6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q5}, [r0], r1 6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d2, d25[0] 6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q6}, [r0], r1 6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d3, d25[4] 6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d4, d26[0] 7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d5, d26[4] 7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pld [r0] 7390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pld [r0, r1] 7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pld [r0, r1, lsl #1] 7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d18, d6, d7, #5 ;construct src_ptr[3] 7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d19, d8, d9, #5 7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d20, d10, d11, #5 7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d21, d12, d13, #5 8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vswp d7, d8 ;discard 2nd half data after src_ptr[3] is done 8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vswp d11, d12 8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vzip.32 d18, d19 ;put 2-line data in 1 register (src_ptr[3]) 8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vzip.32 d20, d21 8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q7, d18, d5 ;(src_ptr[3] * vp8_filter[5]) 8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q8, d20, d5 8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmov q4, q3 ;keep original src data in q4 q6 9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmov q6, q5 9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vzip.32 d6, d7 ;construct src_ptr[-2], and put 2-line data together 9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vzip.32 d10, d11 9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vshr.u64 q9, q4, #8 ;construct src_ptr[-1] 9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vshr.u64 q10, q6, #8 9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q7, d6, d0 ;+(src_ptr[-2] * vp8_filter[0]) 9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q8, d10, d0 9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vzip.32 d18, d19 ;put 2-line data in 1 register (src_ptr[-1]) 10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vzip.32 d20, d21 10190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vshr.u64 q3, q4, #32 ;construct src_ptr[2] 10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vshr.u64 q5, q6, #32 10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q7, d18, d1 ;-(src_ptr[-1] * vp8_filter[1]) 10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q8, d20, d1 10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vzip.32 d6, d7 ;put 2-line data in 1 register (src_ptr[2]) 10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vzip.32 d10, d11 10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vshr.u64 q9, q4, #16 ;construct src_ptr[0] 10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vshr.u64 q10, q6, #16 11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q7, d6, d4 ;-(src_ptr[2] * vp8_filter[4]) 11190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q8, d10, d4 11290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vzip.32 d18, d19 ;put 2-line data in 1 register (src_ptr[0]) 11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vzip.32 d20, d21 11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vshr.u64 q3, q4, #24 ;construct src_ptr[1] 11690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vshr.u64 q5, q6, #24 11790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q7, d18, d2 ;(src_ptr[0] * vp8_filter[2]) 11890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q8, d20, d2 11990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vzip.32 d6, d7 ;put 2-line data in 1 register (src_ptr[1]) 12190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vzip.32 d10, d11 12290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q9, d6, d3 ;(src_ptr[1] * vp8_filter[3]) 12390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q10, d10, d3 12490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q3}, [r0], r1 ;load rest 5-line src data 12690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q4}, [r0], r1 12790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 q7, q9 ;sum of all (src_data*filter_parameters) 12990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 q8, q10 13090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q5}, [r0], r1 13290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q6}, [r0], r1 13390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrun.s16 d27, q7, #7 ;shift/round/saturate to u8 13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrun.s16 d28, q8, #7 13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;First Pass on rest 5-line data 13890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q11}, [r0], r1 13990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d18, d6, d7, #5 ;construct src_ptr[3] 14190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d19, d8, d9, #5 14290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d20, d10, d11, #5 14390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d21, d12, d13, #5 14490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vswp d7, d8 ;discard 2nd half data after src_ptr[3] is done 14690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vswp d11, d12 14790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vzip.32 d18, d19 ;put 2-line data in 1 register (src_ptr[3]) 14990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vzip.32 d20, d21 15090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d31, d22, d23, #5 ;construct src_ptr[3] 15190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q7, d18, d5 ;(src_ptr[3] * vp8_filter[5]) 15290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q8, d20, d5 15390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q12, d31, d5 ;(src_ptr[3] * vp8_filter[5]) 15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmov q4, q3 ;keep original src data in q4 q6 15690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmov q6, q5 15790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vzip.32 d6, d7 ;construct src_ptr[-2], and put 2-line data together 15990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vzip.32 d10, d11 16090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vshr.u64 q9, q4, #8 ;construct src_ptr[-1] 16190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vshr.u64 q10, q6, #8 16290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 16390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q7, d6, d0 ;+(src_ptr[-2] * vp8_filter[0]) 16490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q8, d10, d0 16590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q12, d22, d0 ;(src_ptr[-2] * vp8_filter[0]) 16690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 16790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vzip.32 d18, d19 ;put 2-line data in 1 register (src_ptr[-1]) 16890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vzip.32 d20, d21 16990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vshr.u64 q3, q4, #32 ;construct src_ptr[2] 17090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vshr.u64 q5, q6, #32 17190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d31, d22, d23, #1 ;construct src_ptr[-1] 17290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 17390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q7, d18, d1 ;-(src_ptr[-1] * vp8_filter[1]) 17490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q8, d20, d1 17590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q12, d31, d1 ;-(src_ptr[-1] * vp8_filter[1]) 17690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 17790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vzip.32 d6, d7 ;put 2-line data in 1 register (src_ptr[2]) 17890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vzip.32 d10, d11 17990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vshr.u64 q9, q4, #16 ;construct src_ptr[0] 18090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vshr.u64 q10, q6, #16 18190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d31, d22, d23, #4 ;construct src_ptr[2] 18290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 18390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q7, d6, d4 ;-(src_ptr[2] * vp8_filter[4]) 18490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q8, d10, d4 18590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q12, d31, d4 ;-(src_ptr[2] * vp8_filter[4]) 18690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 18790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vzip.32 d18, d19 ;put 2-line data in 1 register (src_ptr[0]) 18890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vzip.32 d20, d21 18990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vshr.u64 q3, q4, #24 ;construct src_ptr[1] 19090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vshr.u64 q5, q6, #24 19190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d31, d22, d23, #2 ;construct src_ptr[0] 19290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 19390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q7, d18, d2 ;(src_ptr[0] * vp8_filter[2]) 19490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q8, d20, d2 19590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q12, d31, d2 ;(src_ptr[0] * vp8_filter[2]) 19690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 19790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vzip.32 d6, d7 ;put 2-line data in 1 register (src_ptr[1]) 19890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vzip.32 d10, d11 19990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d31, d22, d23, #3 ;construct src_ptr[1] 20090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q9, d6, d3 ;(src_ptr[1] * vp8_filter[3]) 20190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q10, d10, d3 20290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q11, d31, d3 ;(src_ptr[1] * vp8_filter[3]) 20390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 20490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r3, r12, r3, lsl #5 20590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 20690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 q7, q9 ;sum of all (src_data*filter_parameters) 20790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 q8, q10 20890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 q12, q11 20990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 21090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d23, d27, d28, #4 21190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.s32 {q5, q6}, [r3] ;load second_pass filter 21290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 21390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrun.s16 d29, q7, #7 ;shift/round/saturate to u8 21490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrun.s16 d30, q8, #7 21590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrun.s16 d31, q12, #7 21690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 21790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;Second pass: 4x4 21890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vabs.s32 q7, q5 21990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vabs.s32 q8, q6 22090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 22190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d24, d28, d29, #4 22290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d25, d29, d30, #4 22390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d26, d30, d31, #4 22490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 22590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d0, d14[0] ;second_pass filter parameters (d0-d5) 22690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d1, d14[4] 22790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d2, d15[0] 22890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d3, d15[4] 22990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d4, d16[0] 23090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d5, d16[4] 23190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 23290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q3, d27, d0 ;(src_ptr[-2] * vp8_filter[0]) 23390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q4, d28, d0 23490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 23590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q5, d25, d5 ;(src_ptr[3] * vp8_filter[5]) 23690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q6, d26, d5 23790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 23890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q3, d29, d4 ;-(src_ptr[2] * vp8_filter[4]) 23990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q4, d30, d4 24090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 24190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q5, d23, d1 ;-(src_ptr[-1] * vp8_filter[1]) 24290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q6, d24, d1 24390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 24490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q3, d28, d2 ;(src_ptr[0] * vp8_filter[2]) 24590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q4, d29, d2 24690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 24790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q5, d24, d3 ;(src_ptr[1] * vp8_filter[3]) 24890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q6, d25, d3 24990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 25090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r0, r4, lr 25190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r1, r0, lr 25290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r2, r1, lr 25390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 25490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 q5, q3 ;sum of all (src_data*filter_parameters) 25590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 q6, q4 25690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 25790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrun.s16 d3, q5, #7 ;shift/round/saturate to u8 25890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrun.s16 d4, q6, #7 25990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 26090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.32 {d3[0]}, [r4] ;store result 26190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.32 {d3[1]}, [r0] 26290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.32 {d4[0]}, [r1] 26390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.32 {d4[1]}, [r2] 26490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 26590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop {r4, pc} 26690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 26790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 26890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;--------------------- 26990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberfirstpass_filter4x4_only 27090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vabs.s32 q12, q14 ;get abs(filer_parameters) 27190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vabs.s32 q13, q15 27290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 27390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber sub r0, r0, #2 ;go back 2 columns of src data 27490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 27590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;First pass: output_height lines x output_width columns (4x4) 27690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q3}, [r0], r1 ;load first 4-line src data 27790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d0, d24[0] ;first_pass filter (d0-d5) 27890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q4}, [r0], r1 27990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d1, d24[4] 28090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q5}, [r0], r1 28190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d2, d25[0] 28290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q6}, [r0], r1 28390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 28490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d3, d25[4] 28590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d4, d26[0] 28690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d5, d26[4] 28790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 28890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d18, d6, d7, #5 ;construct src_ptr[3] 28990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d19, d8, d9, #5 29090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d20, d10, d11, #5 29190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d21, d12, d13, #5 29290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 29390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vswp d7, d8 ;discard 2nd half data after src_ptr[3] is done 29490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vswp d11, d12 29590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 29690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vzip.32 d18, d19 ;put 2-line data in 1 register (src_ptr[3]) 29790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vzip.32 d20, d21 29890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q7, d18, d5 ;(src_ptr[3] * vp8_filter[5]) 29990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q8, d20, d5 30090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 30190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmov q4, q3 ;keep original src data in q4 q6 30290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmov q6, q5 30390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 30490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vzip.32 d6, d7 ;construct src_ptr[-2], and put 2-line data together 30590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vzip.32 d10, d11 30690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vshr.u64 q9, q4, #8 ;construct src_ptr[-1] 30790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vshr.u64 q10, q6, #8 30890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q7, d6, d0 ;+(src_ptr[-2] * vp8_filter[0]) 30990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q8, d10, d0 31090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 31190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vzip.32 d18, d19 ;put 2-line data in 1 register (src_ptr[-1]) 31290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vzip.32 d20, d21 31390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vshr.u64 q3, q4, #32 ;construct src_ptr[2] 31490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vshr.u64 q5, q6, #32 31590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q7, d18, d1 ;-(src_ptr[-1] * vp8_filter[1]) 31690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q8, d20, d1 31790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 31890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vzip.32 d6, d7 ;put 2-line data in 1 register (src_ptr[2]) 31990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vzip.32 d10, d11 32090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vshr.u64 q9, q4, #16 ;construct src_ptr[0] 32190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vshr.u64 q10, q6, #16 32290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q7, d6, d4 ;-(src_ptr[2] * vp8_filter[4]) 32390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q8, d10, d4 32490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 32590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vzip.32 d18, d19 ;put 2-line data in 1 register (src_ptr[0]) 32690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vzip.32 d20, d21 32790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vshr.u64 q3, q4, #24 ;construct src_ptr[1] 32890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vshr.u64 q5, q6, #24 32990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q7, d18, d2 ;(src_ptr[0] * vp8_filter[2]) 33090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q8, d20, d2 33190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 33290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vzip.32 d6, d7 ;put 2-line data in 1 register (src_ptr[1]) 33390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vzip.32 d10, d11 33490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q9, d6, d3 ;(src_ptr[1] * vp8_filter[3]) 33590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q10, d10, d3 33690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 33790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r0, r4, lr 33890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r1, r0, lr 33990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r2, r1, lr 34090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 34190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 q7, q9 ;sum of all (src_data*filter_parameters) 34290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 q8, q10 34390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 34490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrun.s16 d27, q7, #7 ;shift/round/saturate to u8 34590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrun.s16 d28, q8, #7 34690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 34790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.32 {d27[0]}, [r4] ;store result 34890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.32 {d27[1]}, [r0] 34990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.32 {d28[0]}, [r1] 35090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.32 {d28[1]}, [r2] 35190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 35290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop {r4, pc} 35390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 35490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 35590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;--------------------- 35690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersecondpass_filter4x4_only 35790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber sub r0, r0, r1, lsl #1 35890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r3, r12, r3, lsl #5 35990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 36090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.32 {d27[0]}, [r0], r1 ;load src data 36190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.s32 {q5, q6}, [r3] ;load second_pass filter 36290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.32 {d27[1]}, [r0], r1 36390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vabs.s32 q7, q5 36490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.32 {d28[0]}, [r0], r1 36590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vabs.s32 q8, q6 36690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.32 {d28[1]}, [r0], r1 36790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d0, d14[0] ;second_pass filter parameters (d0-d5) 36890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.32 {d29[0]}, [r0], r1 36990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d1, d14[4] 37090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.32 {d29[1]}, [r0], r1 37190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d2, d15[0] 37290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.32 {d30[0]}, [r0], r1 37390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d3, d15[4] 37490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.32 {d30[1]}, [r0], r1 37590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d4, d16[0] 37690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.32 {d31[0]}, [r0], r1 37790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d5, d16[4] 37890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 37990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d23, d27, d28, #4 38090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d24, d28, d29, #4 38190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d25, d29, d30, #4 38290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d26, d30, d31, #4 38390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 38490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q3, d27, d0 ;(src_ptr[-2] * vp8_filter[0]) 38590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q4, d28, d0 38690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 38790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q5, d25, d5 ;(src_ptr[3] * vp8_filter[5]) 38890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q6, d26, d5 38990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 39090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q3, d29, d4 ;-(src_ptr[2] * vp8_filter[4]) 39190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q4, d30, d4 39290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 39390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q5, d23, d1 ;-(src_ptr[-1] * vp8_filter[1]) 39490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlsl.u8 q6, d24, d1 39590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 39690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q3, d28, d2 ;(src_ptr[0] * vp8_filter[2]) 39790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q4, d29, d2 39890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 39990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q5, d24, d3 ;(src_ptr[1] * vp8_filter[3]) 40090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q6, d25, d3 40190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 40290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r0, r4, lr 40390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r1, r0, lr 40490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r2, r1, lr 40590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 40690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 q5, q3 ;sum of all (src_data*filter_parameters) 40790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 q6, q4 40890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 40990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrun.s16 d3, q5, #7 ;shift/round/saturate to u8 41090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrun.s16 d4, q6, #7 41190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 41290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.32 {d3[0]}, [r4] ;store result 41390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.32 {d3[1]}, [r0] 41490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.32 {d4[0]}, [r1] 41590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.32 {d4[1]}, [r2] 41690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 41790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop {r4, pc} 41890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 41990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ENDP 42090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 42190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber END 422