190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 2f71323e297a928af368937089d3ed71239786f86Andreas Huber; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 4f71323e297a928af368937089d3ed71239786f86Andreas Huber; Use of this source code is governed by a BSD-style license 5f71323e297a928af368937089d3ed71239786f86Andreas Huber; that can be found in the LICENSE file in the root of the source 6f71323e297a928af368937089d3ed71239786f86Andreas Huber; tree. An additional intellectual property rights grant can be found 7f71323e297a928af368937089d3ed71239786f86Andreas Huber; in the file PATENTS. All contributing project authors may 8f71323e297a928af368937089d3ed71239786f86Andreas Huber; be found in the AUTHORS file in the root of the source tree. 990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber EXPORT |vp8_bilinear_predict8x4_neon| 1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ARM 1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber REQUIRE8 1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber PRESERVE8 1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber AREA ||.text||, CODE, READONLY, ALIGN=2 1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r0 unsigned char *src_ptr, 1990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r1 int src_pixels_per_line, 2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r2 int xoffset, 2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r3 int yoffset, 2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r4 unsigned char *dst_ptr, 2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; stack(lr) int dst_pitch 2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber|vp8_bilinear_predict8x4_neon| PROC 2690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push {r4, lr} 2790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 28d35fe0269d77984b383b6bdc051f26b72da15277Ard Biesheuvel adr r12, bifilter8x4_coeff 2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ldr r4, [sp, #8] ;load parameters from stack 3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ldr lr, [sp, #12] ;load parameters from stack 3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber cmp r2, #0 ;skip first_pass filter if xoffset=0 3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber beq skip_firstpass_filter 3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;First pass: output_height lines x output_width columns (5x8) 3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r2, r12, r2, lsl #3 ;calculate filter location 3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q1}, [r0], r1 ;load src data 3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u32 {d31}, [r2] ;load first_pass filter 4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q2}, [r0], r1 4190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d0, d31[0] ;first_pass filter (d0 d1) 4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q3}, [r0], r1 4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d1, d31[4] 4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q4}, [r0], r1 4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q6, d2, d0 ;(src_ptr[0] * vp8_filter[0]) 4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q5}, [r0], r1 4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q7, d4, d0 4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q8, d6, d0 5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q9, d8, d0 5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q10, d10, d0 5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d3, d2, d3, #1 ;construct src_ptr[-1] 5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d5, d4, d5, #1 5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d7, d6, d7, #1 5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d9, d8, d9, #1 5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d11, d10, d11, #1 5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q6, d3, d1 ;(src_ptr[1] * vp8_filter[1]) 6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q7, d5, d1 6190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q8, d7, d1 6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q9, d9, d1 6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q10, d11, d1 6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d22, q6, #7 ;shift/round/saturate to u8 6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d23, q7, #7 6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d24, q8, #7 6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d25, q9, #7 6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d26, q10, #7 7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;Second pass: 4x8 7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersecondpass_filter 7390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber cmp r3, #0 ;skip second_pass filter if yoffset=0 7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber beq skip_secondpass_filter 7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r3, r12, r3, lsl #3 7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r0, r4, lr 7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u32 {d31}, [r3] ;load second_pass filter 8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r1, r0, lr 8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d0, d31[0] ;second_pass filter parameters (d0 d1) 8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d1, d31[4] 8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q1, d22, d0 ;(src_ptr[0] * vp8_filter[0]) 8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q2, d23, d0 8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q3, d24, d0 8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q4, d25, d0 8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q1, d23, d1 ;(src_ptr[pixel_step] * vp8_filter[1]) 9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q2, d24, d1 9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q3, d25, d1 9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q4, d26, d1 9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r2, r1, lr 9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d2, q1, #7 ;shift/round/saturate to u8 9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d3, q2, #7 9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d4, q3, #7 10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d5, q4, #7 10190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d2}, [r4] ;store result 10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d3}, [r0] 10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d4}, [r1] 10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d5}, [r2] 10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop {r4, pc} 10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;-------------------- 11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberskip_firstpass_filter 11190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d22}, [r0], r1 ;load src data 11290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d23}, [r0], r1 11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d24}, [r0], r1 11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d25}, [r0], r1 11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d26}, [r0], r1 11690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber b secondpass_filter 11890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;--------------------- 12090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberskip_secondpass_filter 12190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d22}, [r4], lr ;store result 12290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d23}, [r4], lr 12390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d24}, [r4], lr 12490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d25}, [r4], lr 12590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop {r4, pc} 12790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ENDP 12990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;----------------- 13179f15823c34ae1e423108295e416213200bb280fAndreas Huber 13290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberbifilter8x4_coeff 13390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber DCD 128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112 13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber END 136