190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 2f71323e297a928af368937089d3ed71239786f86Andreas Huber; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 4f71323e297a928af368937089d3ed71239786f86Andreas Huber; Use of this source code is governed by a BSD-style license 5f71323e297a928af368937089d3ed71239786f86Andreas Huber; that can be found in the LICENSE file in the root of the source 6f71323e297a928af368937089d3ed71239786f86Andreas Huber; tree. An additional intellectual property rights grant can be found 7f71323e297a928af368937089d3ed71239786f86Andreas Huber; in the file PATENTS. All contributing project authors may 8f71323e297a928af368937089d3ed71239786f86Andreas Huber; be found in the AUTHORS file in the root of the source tree. 990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber EXPORT |vp8_bilinear_predict16x16_neon| 1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ARM 1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber REQUIRE8 1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber PRESERVE8 1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber AREA ||.text||, CODE, READONLY, ALIGN=2 1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r0 unsigned char *src_ptr, 1990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r1 int src_pixels_per_line, 2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r2 int xoffset, 2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r3 int yoffset, 2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r4 unsigned char *dst_ptr, 2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; stack(r5) int dst_pitch 2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber|vp8_bilinear_predict16x16_neon| PROC 2690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push {r4-r5, lr} 2790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 28d35fe0269d77984b383b6bdc051f26b72da15277Ard Biesheuvel adr r12, bifilter16_coeff 2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ldr r4, [sp, #12] ;load parameters from stack 3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ldr r5, [sp, #16] ;load parameters from stack 3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber cmp r2, #0 ;skip first_pass filter if xoffset=0 3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber beq secondpass_bfilter16x16_only 3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r2, r12, r2, lsl #3 ;calculate filter location 3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber cmp r3, #0 ;skip second_pass filter if yoffset=0 3890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.s32 {d31}, [r2] ;load first_pass filter 4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber beq firstpass_bfilter16x16_only 4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber sub sp, sp, #272 ;reserve space on stack for temporary storage 4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d2, d3, d4}, [r0], r1 ;load src data 4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov lr, sp 4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d5, d6, d7}, [r0], r1 4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov r2, #3 ;loop counter 4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d8, d9, d10}, [r0], r1 5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d0, d31[0] ;first_pass filter (d0 d1) 5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d11, d12, d13}, [r0], r1 5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d1, d31[4] 5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;First Pass: output_height lines x output_width columns (17x16) 5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberfilt_blk2d_fp16x16_loop_neon 5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pld [r0] 5990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pld [r0, r1] 6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pld [r0, r1, lsl #1] 6190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q7, d2, d0 ;(src_ptr[0] * vp8_filter[0]) 6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q8, d3, d0 6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q9, d5, d0 6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q10, d6, d0 6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q11, d8, d0 6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q12, d9, d0 6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q13, d11, d0 6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q14, d12, d0 7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d2, d2, d3, #1 ;construct src_ptr[1] 7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d5, d5, d6, #1 7390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d8, d8, d9, #1 7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d11, d11, d12, #1 7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q7, d2, d1 ;(src_ptr[0] * vp8_filter[1]) 7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q9, d5, d1 7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q11, d8, d1 7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q13, d11, d1 8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d3, d3, d4, #1 8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d6, d6, d7, #1 8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d9, d9, d10, #1 8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d12, d12, d13, #1 8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q8, d3, d1 ;(src_ptr[0] * vp8_filter[1]) 8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q10, d6, d1 8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q12, d9, d1 8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q14, d12, d1 9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber subs r2, r2, #1 9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d14, q7, #7 ;shift/round/saturate to u8 9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d15, q8, #7 9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d16, q9, #7 9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d17, q10, #7 9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d18, q11, #7 9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d19, q12, #7 9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d20, q13, #7 10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d2, d3, d4}, [r0], r1 ;load src data 10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d21, q14, #7 10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d5, d6, d7}, [r0], r1 10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d14, d15, d16, d17}, [lr]! ;store result 10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d8, d9, d10}, [r0], r1 10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d18, d19, d20, d21}, [lr]! 10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d11, d12, d13}, [r0], r1 10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber bne filt_blk2d_fp16x16_loop_neon 11190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;First-pass filtering for rest 5 lines 11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d14, d15, d16}, [r0], r1 11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q9, d2, d0 ;(src_ptr[0] * vp8_filter[0]) 11690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q10, d3, d0 11790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q11, d5, d0 11890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q12, d6, d0 11990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q13, d8, d0 12090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q14, d9, d0 12190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d2, d2, d3, #1 ;construct src_ptr[1] 12390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d5, d5, d6, #1 12490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d8, d8, d9, #1 12590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q9, d2, d1 ;(src_ptr[0] * vp8_filter[1]) 12790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q11, d5, d1 12890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q13, d8, d1 12990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d3, d3, d4, #1 13190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d6, d6, d7, #1 13290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d9, d9, d10, #1 13390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q10, d3, d1 ;(src_ptr[0] * vp8_filter[1]) 13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q12, d6, d1 13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q14, d9, d1 13790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q1, d11, d0 13990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q2, d12, d0 14090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q3, d14, d0 14190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q4, d15, d0 14290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d11, d11, d12, #1 ;construct src_ptr[1] 14490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d14, d14, d15, #1 14590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q1, d11, d1 ;(src_ptr[0] * vp8_filter[1]) 14790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q3, d14, d1 14890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d12, d12, d13, #1 15090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d15, d15, d16, #1 15190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q2, d12, d1 ;(src_ptr[0] * vp8_filter[1]) 15390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q4, d15, d1 15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d10, q9, #7 ;shift/round/saturate to u8 15690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d11, q10, #7 15790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d12, q11, #7 15890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d13, q12, #7 15990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d14, q13, #7 16090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d15, q14, #7 16190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d16, q1, #7 16290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d17, q2, #7 16390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d18, q3, #7 16490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d19, q4, #7 16590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 16690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d10, d11, d12, d13}, [lr]! ;store result 16790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d14, d15, d16, d17}, [lr]! 16890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d18, d19}, [lr]! 16990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 17090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;Second pass: 16x16 17190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;secondpass_filter 17290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r3, r12, r3, lsl #3 17390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber sub lr, lr, #272 17490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 17590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u32 {d31}, [r3] ;load second_pass filter 17690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 17790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d22, d23}, [lr]! ;load src data 17890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 17990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d0, d31[0] ;second_pass filter parameters (d0 d1) 18090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d1, d31[4] 18190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov r12, #4 ;loop counter 18290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 18390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberfilt_blk2d_sp16x16_loop_neon 18490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d24, d25}, [lr]! 18590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q1, d22, d0 ;(src_ptr[0] * vp8_filter[0]) 18690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d26, d27}, [lr]! 18790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q2, d23, d0 18890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d28, d29}, [lr]! 18990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q3, d24, d0 19090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d30, d31}, [lr]! 19190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 19290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q4, d25, d0 19390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q5, d26, d0 19490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q6, d27, d0 19590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q7, d28, d0 19690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q8, d29, d0 19790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 19890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q1, d24, d1 ;(src_ptr[pixel_step] * vp8_filter[1]) 19990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q2, d25, d1 20090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q3, d26, d1 20190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q4, d27, d1 20290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q5, d28, d1 20390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q6, d29, d1 20490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q7, d30, d1 20590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q8, d31, d1 20690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 20790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber subs r12, r12, #1 20890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 20990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d2, q1, #7 ;shift/round/saturate to u8 21090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d3, q2, #7 21190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d4, q3, #7 21290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d5, q4, #7 21390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d6, q5, #7 21490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d7, q6, #7 21590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d8, q7, #7 21690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d9, q8, #7 21790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 21890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d2, d3}, [r4], r5 ;store result 21990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d4, d5}, [r4], r5 22090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d6, d7}, [r4], r5 22190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmov q11, q15 22290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d8, d9}, [r4], r5 22390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 22490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber bne filt_blk2d_sp16x16_loop_neon 22590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 22690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add sp, sp, #272 22790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 22890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop {r4-r5,pc} 22990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 23090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;-------------------- 23190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberfirstpass_bfilter16x16_only 23290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov r2, #4 ;loop counter 23390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d0, d31[0] ;first_pass filter (d0 d1) 23490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d1, d31[4] 23590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 23690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;First Pass: output_height lines x output_width columns (16x16) 23790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberfilt_blk2d_fpo16x16_loop_neon 23890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d2, d3, d4}, [r0], r1 ;load src data 23990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d5, d6, d7}, [r0], r1 24090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d8, d9, d10}, [r0], r1 24190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d11, d12, d13}, [r0], r1 24290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 24390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pld [r0] 24490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pld [r0, r1] 24590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pld [r0, r1, lsl #1] 24690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 24790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q7, d2, d0 ;(src_ptr[0] * vp8_filter[0]) 24890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q8, d3, d0 24990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q9, d5, d0 25090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q10, d6, d0 25190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q11, d8, d0 25290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q12, d9, d0 25390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q13, d11, d0 25490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q14, d12, d0 25590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 25690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d2, d2, d3, #1 ;construct src_ptr[1] 25790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d5, d5, d6, #1 25890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d8, d8, d9, #1 25990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d11, d11, d12, #1 26090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 26190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q7, d2, d1 ;(src_ptr[0] * vp8_filter[1]) 26290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q9, d5, d1 26390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q11, d8, d1 26490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q13, d11, d1 26590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 26690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d3, d3, d4, #1 26790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d6, d6, d7, #1 26890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d9, d9, d10, #1 26990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d12, d12, d13, #1 27090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 27190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q8, d3, d1 ;(src_ptr[0] * vp8_filter[1]) 27290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q10, d6, d1 27390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q12, d9, d1 27490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q14, d12, d1 27590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 27690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber subs r2, r2, #1 27790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 27890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d14, q7, #7 ;shift/round/saturate to u8 27990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d15, q8, #7 28090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d16, q9, #7 28190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d17, q10, #7 28290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d18, q11, #7 28390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d19, q12, #7 28490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d20, q13, #7 28590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d14, d15}, [r4], r5 ;store result 28690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d21, q14, #7 28790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 28890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d16, d17}, [r4], r5 28990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d18, d19}, [r4], r5 29090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d20, d21}, [r4], r5 29190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 29290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber bne filt_blk2d_fpo16x16_loop_neon 29390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop {r4-r5,pc} 29490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 29590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;--------------------- 29690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersecondpass_bfilter16x16_only 29790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;Second pass: 16x16 29890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;secondpass_filter 29990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r3, r12, r3, lsl #3 30090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov r12, #4 ;loop counter 30190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u32 {d31}, [r3] ;load second_pass filter 30290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d22, d23}, [r0], r1 ;load src data 30390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 30490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d0, d31[0] ;second_pass filter parameters (d0 d1) 30590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d1, d31[4] 30690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 30790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberfilt_blk2d_spo16x16_loop_neon 30890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d24, d25}, [r0], r1 30990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q1, d22, d0 ;(src_ptr[0] * vp8_filter[0]) 31090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d26, d27}, [r0], r1 31190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q2, d23, d0 31290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d28, d29}, [r0], r1 31390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q3, d24, d0 31490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d30, d31}, [r0], r1 31590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 31690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q4, d25, d0 31790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q5, d26, d0 31890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q6, d27, d0 31990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q7, d28, d0 32090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q8, d29, d0 32190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 32290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q1, d24, d1 ;(src_ptr[pixel_step] * vp8_filter[1]) 32390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q2, d25, d1 32490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q3, d26, d1 32590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q4, d27, d1 32690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q5, d28, d1 32790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q6, d29, d1 32890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q7, d30, d1 32990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q8, d31, d1 33090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 33190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d2, q1, #7 ;shift/round/saturate to u8 33290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d3, q2, #7 33390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d4, q3, #7 33490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d5, q4, #7 33590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d6, q5, #7 33690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d7, q6, #7 33790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d8, q7, #7 33890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d9, q8, #7 33990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 34090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d2, d3}, [r4], r5 ;store result 34190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber subs r12, r12, #1 34290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d4, d5}, [r4], r5 34390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmov q11, q15 34490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d6, d7}, [r4], r5 34590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d8, d9}, [r4], r5 34690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 34790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber bne filt_blk2d_spo16x16_loop_neon 34890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop {r4-r5,pc} 34990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 35090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ENDP 35190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 35290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;----------------- 35379f15823c34ae1e423108295e416213200bb280fAndreas Huber 35490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberbifilter16_coeff 35590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber DCD 128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112 35690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 35790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber END 358