190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 2f71323e297a928af368937089d3ed71239786f86Andreas Huber; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 4f71323e297a928af368937089d3ed71239786f86Andreas Huber; Use of this source code is governed by a BSD-style license 5f71323e297a928af368937089d3ed71239786f86Andreas Huber; that can be found in the LICENSE file in the root of the source 6f71323e297a928af368937089d3ed71239786f86Andreas Huber; tree. An additional intellectual property rights grant can be found 7f71323e297a928af368937089d3ed71239786f86Andreas Huber; in the file PATENTS. All contributing project authors may 8f71323e297a928af368937089d3ed71239786f86Andreas Huber; be found in the AUTHORS file in the root of the source tree. 990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1279f15823c34ae1e423108295e416213200bb280fAndreas Huber EXPORT |vp8_sub_pixel_variance16x16_neon_func| 1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ARM 1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber REQUIRE8 1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber PRESERVE8 1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber AREA ||.text||, CODE, READONLY, ALIGN=2 1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r0 unsigned char *src_ptr, 1990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r1 int src_pixels_per_line, 2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r2 int xoffset, 2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r3 int yoffset, 2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; stack(r4) unsigned char *dst_ptr, 2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; stack(r5) int dst_pixels_per_line, 2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; stack(r6) unsigned int *sse 2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;note: most of the code is copied from bilinear_predict16x16_neon and vp8_variance16x16_neon. 2690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 2779f15823c34ae1e423108295e416213200bb280fAndreas Huber|vp8_sub_pixel_variance16x16_neon_func| PROC 2890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push {r4-r6, lr} 2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 30d35fe0269d77984b383b6bdc051f26b72da15277Ard Biesheuvel adr r12, BilinearTaps_coeff 3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ldr r4, [sp, #16] ;load *dst_ptr from stack 3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ldr r5, [sp, #20] ;load dst_pixels_per_line from stack 3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ldr r6, [sp, #24] ;load *sse from stack 3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber cmp r2, #0 ;skip first_pass filter if xoffset=0 3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber beq secondpass_bfilter16x16_only 3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r2, r12, r2, lsl #3 ;calculate filter location 3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber cmp r3, #0 ;skip second_pass filter if yoffset=0 4190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.s32 {d31}, [r2] ;load first_pass filter 4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber beq firstpass_bfilter16x16_only 4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber sub sp, sp, #272 ;reserve space on stack for temporary storage 4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d2, d3, d4}, [r0], r1 ;load src data 4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov lr, sp 4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d5, d6, d7}, [r0], r1 5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov r2, #3 ;loop counter 5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d8, d9, d10}, [r0], r1 5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d0, d31[0] ;first_pass filter (d0 d1) 5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d11, d12, d13}, [r0], r1 5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d1, d31[4] 5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;First Pass: output_height lines x output_width columns (17x16) 6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervp8e_filt_blk2d_fp16x16_loop_neon 6190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pld [r0] 6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pld [r0, r1] 6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pld [r0, r1, lsl #1] 6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q7, d2, d0 ;(src_ptr[0] * Filter[0]) 6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q8, d3, d0 6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q9, d5, d0 6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q10, d6, d0 6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q11, d8, d0 7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q12, d9, d0 7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q13, d11, d0 7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q14, d12, d0 7390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d2, d2, d3, #1 ;construct src_ptr[1] 7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d5, d5, d6, #1 7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d8, d8, d9, #1 7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d11, d11, d12, #1 7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q7, d2, d1 ;(src_ptr[0] * Filter[1]) 8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q9, d5, d1 8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q11, d8, d1 8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q13, d11, d1 8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d3, d3, d4, #1 8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d6, d6, d7, #1 8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d9, d9, d10, #1 8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d12, d12, d13, #1 8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q8, d3, d1 ;(src_ptr[0] * Filter[1]) 9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q10, d6, d1 9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q12, d9, d1 9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q14, d12, d1 9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber subs r2, r2, #1 9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d14, q7, #7 ;shift/round/saturate to u8 9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d15, q8, #7 9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d16, q9, #7 9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d17, q10, #7 10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d18, q11, #7 10190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d19, q12, #7 10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d20, q13, #7 10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d2, d3, d4}, [r0], r1 ;load src data 10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d21, q14, #7 10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d5, d6, d7}, [r0], r1 10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d14, d15, d16, d17}, [lr]! ;store result 10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d8, d9, d10}, [r0], r1 11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d18, d19, d20, d21}, [lr]! 11190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d11, d12, d13}, [r0], r1 11290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber bne vp8e_filt_blk2d_fp16x16_loop_neon 11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;First-pass filtering for rest 5 lines 11690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d14, d15, d16}, [r0], r1 11790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q9, d2, d0 ;(src_ptr[0] * Filter[0]) 11990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q10, d3, d0 12090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q11, d5, d0 12190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q12, d6, d0 12290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q13, d8, d0 12390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q14, d9, d0 12490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d2, d2, d3, #1 ;construct src_ptr[1] 12690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d5, d5, d6, #1 12790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d8, d8, d9, #1 12890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q9, d2, d1 ;(src_ptr[0] * Filter[1]) 13090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q11, d5, d1 13190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q13, d8, d1 13290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d3, d3, d4, #1 13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d6, d6, d7, #1 13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d9, d9, d10, #1 13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q10, d3, d1 ;(src_ptr[0] * Filter[1]) 13890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q12, d6, d1 13990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q14, d9, d1 14090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q1, d11, d0 14290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q2, d12, d0 14390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q3, d14, d0 14490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q4, d15, d0 14590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d11, d11, d12, #1 ;construct src_ptr[1] 14790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d14, d14, d15, #1 14890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q1, d11, d1 ;(src_ptr[0] * Filter[1]) 15090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q3, d14, d1 15190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d12, d12, d13, #1 15390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d15, d15, d16, #1 15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q2, d12, d1 ;(src_ptr[0] * Filter[1]) 15690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q4, d15, d1 15790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d10, q9, #7 ;shift/round/saturate to u8 15990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d11, q10, #7 16090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d12, q11, #7 16190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d13, q12, #7 16290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d14, q13, #7 16390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d15, q14, #7 16490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d16, q1, #7 16590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d17, q2, #7 16690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d18, q3, #7 16790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d19, q4, #7 16890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 16990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d10, d11, d12, d13}, [lr]! ;store result 17090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d14, d15, d16, d17}, [lr]! 17190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d18, d19}, [lr]! 17290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 17390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;Second pass: 16x16 17490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;secondpass_filter 17590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r3, r12, r3, lsl #3 17690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber sub lr, lr, #272 17790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 17890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u32 {d31}, [r3] ;load second_pass filter 17990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 18090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber sub sp, sp, #256 18190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov r3, sp 18290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 18390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d22, d23}, [lr]! ;load src data 18490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 18590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d0, d31[0] ;second_pass filter parameters (d0 d1) 18690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d1, d31[4] 18790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov r12, #4 ;loop counter 18890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 18990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervp8e_filt_blk2d_sp16x16_loop_neon 19090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d24, d25}, [lr]! 19190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q1, d22, d0 ;(src_ptr[0] * Filter[0]) 19290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d26, d27}, [lr]! 19390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q2, d23, d0 19490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d28, d29}, [lr]! 19590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q3, d24, d0 19690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d30, d31}, [lr]! 19790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 19890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q4, d25, d0 19990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q5, d26, d0 20090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q6, d27, d0 20190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q7, d28, d0 20290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q8, d29, d0 20390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 20490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q1, d24, d1 ;(src_ptr[pixel_step] * Filter[1]) 20590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q2, d25, d1 20690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q3, d26, d1 20790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q4, d27, d1 20890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q5, d28, d1 20990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q6, d29, d1 21090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q7, d30, d1 21190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q8, d31, d1 21290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 21390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber subs r12, r12, #1 21490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 21590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d2, q1, #7 ;shift/round/saturate to u8 21690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d3, q2, #7 21790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d4, q3, #7 21890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d5, q4, #7 21990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d6, q5, #7 22090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d7, q6, #7 22190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d8, q7, #7 22290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d9, q8, #7 22390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 22490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d2, d3}, [r3]! ;store result 22590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d4, d5}, [r3]! 22690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d6, d7}, [r3]! 22790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmov q11, q15 22890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d8, d9}, [r3]! 22990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 23090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber bne vp8e_filt_blk2d_sp16x16_loop_neon 23190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 23290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber b sub_pixel_variance16x16_neon 23390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 23490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;-------------------- 23590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberfirstpass_bfilter16x16_only 23690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov r2, #4 ;loop counter 23790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber sub sp, sp, #528 ;reserve space on stack for temporary storage 23890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d0, d31[0] ;first_pass filter (d0 d1) 23990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d1, d31[4] 24090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov r3, sp 24190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 24290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;First Pass: output_height lines x output_width columns (16x16) 24390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervp8e_filt_blk2d_fpo16x16_loop_neon 24490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d2, d3, d4}, [r0], r1 ;load src data 24590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d5, d6, d7}, [r0], r1 24690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d8, d9, d10}, [r0], r1 24790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d11, d12, d13}, [r0], r1 24890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 24990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pld [r0] 25090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pld [r0, r1] 25190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pld [r0, r1, lsl #1] 25290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 25390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q7, d2, d0 ;(src_ptr[0] * Filter[0]) 25490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q8, d3, d0 25590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q9, d5, d0 25690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q10, d6, d0 25790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q11, d8, d0 25890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q12, d9, d0 25990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q13, d11, d0 26090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q14, d12, d0 26190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 26290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d2, d2, d3, #1 ;construct src_ptr[1] 26390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d5, d5, d6, #1 26490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d8, d8, d9, #1 26590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d11, d11, d12, #1 26690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 26790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q7, d2, d1 ;(src_ptr[0] * Filter[1]) 26890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q9, d5, d1 26990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q11, d8, d1 27090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q13, d11, d1 27190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 27290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d3, d3, d4, #1 27390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d6, d6, d7, #1 27490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d9, d9, d10, #1 27590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d12, d12, d13, #1 27690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 27790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q8, d3, d1 ;(src_ptr[0] * Filter[1]) 27890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q10, d6, d1 27990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q12, d9, d1 28090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q14, d12, d1 28190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 28290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber subs r2, r2, #1 28390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 28490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d14, q7, #7 ;shift/round/saturate to u8 28590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d15, q8, #7 28690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d16, q9, #7 28790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d17, q10, #7 28890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d18, q11, #7 28990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d19, q12, #7 29090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d20, q13, #7 29190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d14, d15}, [r3]! ;store result 29290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d21, q14, #7 29390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 29490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d16, d17}, [r3]! 29590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d18, d19}, [r3]! 29690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d20, d21}, [r3]! 29790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 29890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber bne vp8e_filt_blk2d_fpo16x16_loop_neon 29990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 30090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber b sub_pixel_variance16x16_neon 30190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 30290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;--------------------- 30390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersecondpass_bfilter16x16_only 30490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;Second pass: 16x16 30590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;secondpass_filter 30690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber sub sp, sp, #528 ;reserve space on stack for temporary storage 30790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r3, r12, r3, lsl #3 30890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov r12, #4 ;loop counter 30990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u32 {d31}, [r3] ;load second_pass filter 31090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d22, d23}, [r0], r1 ;load src data 31190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov r3, sp 31290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 31390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d0, d31[0] ;second_pass filter parameters (d0 d1) 31490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d1, d31[4] 31590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 31690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervp8e_filt_blk2d_spo16x16_loop_neon 31790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d24, d25}, [r0], r1 31890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q1, d22, d0 ;(src_ptr[0] * Filter[0]) 31990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d26, d27}, [r0], r1 32090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q2, d23, d0 32190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d28, d29}, [r0], r1 32290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q3, d24, d0 32390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d30, d31}, [r0], r1 32490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 32590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q4, d25, d0 32690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q5, d26, d0 32790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q6, d27, d0 32890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q7, d28, d0 32990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q8, d29, d0 33090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 33190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q1, d24, d1 ;(src_ptr[pixel_step] * Filter[1]) 33290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q2, d25, d1 33390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q3, d26, d1 33490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q4, d27, d1 33590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q5, d28, d1 33690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q6, d29, d1 33790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q7, d30, d1 33890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q8, d31, d1 33990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 34090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d2, q1, #7 ;shift/round/saturate to u8 34190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d3, q2, #7 34290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d4, q3, #7 34390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d5, q4, #7 34490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d6, q5, #7 34590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d7, q6, #7 34690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d8, q7, #7 34790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d9, q8, #7 34890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 34990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d2, d3}, [r3]! ;store result 35090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber subs r12, r12, #1 35190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d4, d5}, [r3]! 35290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmov q11, q15 35390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d6, d7}, [r3]! 35490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.u8 {d8, d9}, [r3]! 35590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 35690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber bne vp8e_filt_blk2d_spo16x16_loop_neon 35790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 35890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber b sub_pixel_variance16x16_neon 35990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 36090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;---------------------------- 36190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;variance16x16 36290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersub_pixel_variance16x16_neon 36390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmov.i8 q8, #0 ;q8 - sum 36490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmov.i8 q9, #0 ;q9, q10 - sse 36590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmov.i8 q10, #0 36690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 36790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber sub r3, r3, #256 36890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov r12, #8 36990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 37090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersub_pixel_variance16x16_neon_loop 37190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.8 {q0}, [r3]! ;Load up source and reference 37290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.8 {q2}, [r4], r5 37390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.8 {q1}, [r3]! 37490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.8 {q3}, [r4], r5 37590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 37690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsubl.u8 q11, d0, d4 ;diff 37790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsubl.u8 q12, d1, d5 37890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsubl.u8 q13, d2, d6 37990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsubl.u8 q14, d3, d7 38090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 38190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vpadal.s16 q8, q11 ;sum 38290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.s16 q9, d22, d22 ;sse 38390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.s16 q10, d23, d23 38490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 38590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber subs r12, r12, #1 38690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 38790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vpadal.s16 q8, q12 38890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.s16 q9, d24, d24 38990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.s16 q10, d25, d25 39090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vpadal.s16 q8, q13 39190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.s16 q9, d26, d26 39290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.s16 q10, d27, d27 39390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vpadal.s16 q8, q14 39490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.s16 q9, d28, d28 39590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.s16 q10, d29, d29 39690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 39790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber bne sub_pixel_variance16x16_neon_loop 39890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 39990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vadd.u32 q10, q9, q10 ;accumulate sse 40090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vpaddl.s32 q0, q8 ;accumulate sum 40190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 40290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vpaddl.u32 q1, q10 40390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vadd.s64 d0, d0, d1 40490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vadd.u64 d1, d2, d3 40590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 40690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.s32 q5, d0, d0 40790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.32 {d1[0]}, [r6] ;store sse 40890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vshr.s32 d10, d10, #8 40990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsub.s32 d0, d1, d10 41090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 41190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add sp, sp, #528 41290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmov.32 r0, d0[0] ;return 41390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 41490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop {r4-r6,pc} 41590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 41690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ENDP 41790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 41890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;----------------- 41979f15823c34ae1e423108295e416213200bb280fAndreas Huber 42090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberbilinear_taps_coeff 42190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber DCD 128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112 42290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 42390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber END 424