190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 2f71323e297a928af368937089d3ed71239786f86Andreas Huber; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 4f71323e297a928af368937089d3ed71239786f86Andreas Huber; Use of this source code is governed by a BSD-style license 5f71323e297a928af368937089d3ed71239786f86Andreas Huber; that can be found in the LICENSE file in the root of the source 6f71323e297a928af368937089d3ed71239786f86Andreas Huber; tree. An additional intellectual property rights grant can be found 7f71323e297a928af368937089d3ed71239786f86Andreas Huber; in the file PATENTS. All contributing project authors may 8f71323e297a928af368937089d3ed71239786f86Andreas Huber; be found in the AUTHORS file in the root of the source tree. 990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber EXPORT |vp8_sub_pixel_variance8x8_neon| 1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ARM 1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber REQUIRE8 1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber PRESERVE8 1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber AREA ||.text||, CODE, READONLY, ALIGN=2 1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r0 unsigned char *src_ptr, 1990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r1 int src_pixels_per_line, 2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r2 int xoffset, 2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; r3 int yoffset, 2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; stack(r4) unsigned char *dst_ptr, 2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; stack(r5) int dst_pixels_per_line, 2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; stack(r6) unsigned int *sse 2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;note: most of the code is copied from bilinear_predict8x8_neon and vp8_variance8x8_neon. 2690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 2790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber|vp8_sub_pixel_variance8x8_neon| PROC 2890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push {r4-r5, lr} 2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 30d35fe0269d77984b383b6bdc051f26b72da15277Ard Biesheuvel adr r12, BilinearTaps_coeff 3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ldr r4, [sp, #12] ;load *dst_ptr from stack 3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ldr r5, [sp, #16] ;load dst_pixels_per_line from stack 3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ldr lr, [sp, #20] ;load *sse from stack 3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber cmp r2, #0 ;skip first_pass filter if xoffset=0 3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber beq skip_firstpass_filter 3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;First pass: output_height lines x output_width columns (9x8) 3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r2, r12, r2, lsl #3 ;calculate filter location 4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q1}, [r0], r1 ;load src data 4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u32 {d31}, [r2] ;load first_pass filter 4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q2}, [r0], r1 4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d0, d31[0] ;first_pass filter (d0 d1) 4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q3}, [r0], r1 4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d1, d31[4] 4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q4}, [r0], r1 4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q6, d2, d0 ;(src_ptr[0] * Filter[0]) 5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q7, d4, d0 5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q8, d6, d0 5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q9, d8, d0 5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d3, d2, d3, #1 ;construct src_ptr[-1] 5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d5, d4, d5, #1 5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d7, d6, d7, #1 5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d9, d8, d9, #1 5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q6, d3, d1 ;(src_ptr[1] * Filter[1]) 6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q7, d5, d1 6190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q8, d7, d1 6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q9, d9, d1 6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q1}, [r0], r1 ;load src data 6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d22, q6, #7 ;shift/round/saturate to u8 6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q2}, [r0], r1 6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d23, q7, #7 6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q3}, [r0], r1 6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d24, q8, #7 7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q4}, [r0], r1 7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d25, q9, #7 7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;first_pass filtering on the rest 5-line data 7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {q5}, [r0], r1 7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q6, d2, d0 ;(src_ptr[0] * Filter[0]) 7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q7, d4, d0 7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q8, d6, d0 7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q9, d8, d0 8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q10, d10, d0 8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d3, d2, d3, #1 ;construct src_ptr[-1] 8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d5, d4, d5, #1 8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d7, d6, d7, #1 8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d9, d8, d9, #1 8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vext.8 d11, d10, d11, #1 8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q6, d3, d1 ;(src_ptr[1] * Filter[1]) 8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q7, d5, d1 9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q8, d7, d1 9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q9, d9, d1 9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q10, d11, d1 9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d26, q6, #7 ;shift/round/saturate to u8 9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d27, q7, #7 9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d28, q8, #7 9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d29, q9, #7 9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d30, q10, #7 9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;Second pass: 8x8 10190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersecondpass_filter 10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber cmp r3, #0 ;skip second_pass filter if yoffset=0 10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;skip_secondpass_filter 10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber beq sub_pixel_variance8x8_neon 10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r3, r12, r3, lsl #3 10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u32 {d31}, [r3] ;load second_pass filter 10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d0, d31[0] ;second_pass filter parameters (d0 d1) 11190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vdup.8 d1, d31[4] 11290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q1, d22, d0 ;(src_ptr[0] * Filter[0]) 11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q2, d23, d0 11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q3, d24, d0 11690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q4, d25, d0 11790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q5, d26, d0 11890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q6, d27, d0 11990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q7, d28, d0 12090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.u8 q8, d29, d0 12190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q1, d23, d1 ;(src_ptr[pixel_step] * Filter[1]) 12390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q2, d24, d1 12490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q3, d25, d1 12590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q4, d26, d1 12690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q5, d27, d1 12790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q6, d28, d1 12890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q7, d29, d1 12990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.u8 q8, d30, d1 13090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d22, q1, #7 ;shift/round/saturate to u8 13290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d23, q2, #7 13390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d24, q3, #7 13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d25, q4, #7 13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d26, q5, #7 13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d27, q6, #7 13790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d28, q7, #7 13890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqrshrn.u16 d29, q8, #7 13990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber b sub_pixel_variance8x8_neon 14190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;-------------------- 14390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberskip_firstpass_filter 14490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d22}, [r0], r1 ;load src data 14590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d23}, [r0], r1 14690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d24}, [r0], r1 14790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d25}, [r0], r1 14890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d26}, [r0], r1 14990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d27}, [r0], r1 15090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d28}, [r0], r1 15190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d29}, [r0], r1 15290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.u8 {d30}, [r0], r1 15390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber b secondpass_filter 15590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;---------------------- 15790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;vp8_variance8x8_neon 15890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersub_pixel_variance8x8_neon 15990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmov.i8 q8, #0 ;q8 - sum 16090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmov.i8 q9, #0 ;q9, q10 - sse 16190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmov.i8 q10, #0 16290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 16390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov r12, #2 16490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 16590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersub_pixel_variance8x8_neon_loop 16690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.8 {d0}, [r4], r5 ;load dst data 16790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber subs r12, r12, #1 16890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.8 {d1}, [r4], r5 16990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.8 {d2}, [r4], r5 17090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsubl.u8 q4, d22, d0 ;calculate diff 17190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.8 {d3}, [r4], r5 17290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 17390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsubl.u8 q5, d23, d1 17490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsubl.u8 q6, d24, d2 17590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 17690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vpadal.s16 q8, q4 ;sum 17790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.s16 q9, d8, d8 ;sse 17890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.s16 q10, d9, d9 17990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 18090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsubl.u8 q7, d25, d3 18190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 18290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vpadal.s16 q8, q5 18390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.s16 q9, d10, d10 18490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.s16 q10, d11, d11 18590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 18690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmov q11, q13 18790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 18890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vpadal.s16 q8, q6 18990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.s16 q9, d12, d12 19090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.s16 q10, d13, d13 19190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 19290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmov q12, q14 19390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 19490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vpadal.s16 q8, q7 19590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.s16 q9, d14, d14 19690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmlal.s16 q10, d15, d15 19790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 19890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber bne sub_pixel_variance8x8_neon_loop 19990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 20090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vadd.u32 q10, q9, q10 ;accumulate sse 20190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vpaddl.s32 q0, q8 ;accumulate sum 20290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 20390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vpaddl.u32 q1, q10 20490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vadd.s64 d0, d0, d1 20590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vadd.u64 d1, d2, d3 20690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 20790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmull.s32 q5, d0, d0 20890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.32 {d1[0]}, [lr] ;store sse 20990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vshr.s32 d10, d10, #6 21090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsub.s32 d0, d1, d10 21190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 21290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmov.32 r0, d0[0] ;return 21390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop {r4-r5, pc} 21490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 21590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ENDP 21690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 21790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;----------------- 21879f15823c34ae1e423108295e416213200bb280fAndreas Huber 21990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberbilinear_taps_coeff 22090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber DCD 128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112 22190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 22290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber END 223