1da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian/* 2da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * Copyright (c) 2014 The WebM project authors. All Rights Reserved. 3da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * 4da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * Use of this source code is governed by a BSD-style license 5da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * that can be found in the LICENSE file in the root of the source 6da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * tree. An additional intellectual property rights grant can be found 7da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * in the file PATENTS. All contributing project authors may 8da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * be found in the AUTHORS file in the root of the source tree. 9da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian */ 10da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 11da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include <arm_neon.h> 12da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include <assert.h> 13da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 14da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include "./vpx_config.h" 15da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include "./vpx_dsp_rtcd.h" 16da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include "vpx/vpx_integer.h" 17da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include "vpx_ports/mem.h" 18da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 19da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic INLINE int32x4_t MULTIPLY_BY_Q0( 20da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int16x4_t dsrc0, 21da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int16x4_t dsrc1, 22da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int16x4_t dsrc2, 23da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int16x4_t dsrc3, 24da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int16x4_t dsrc4, 25da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int16x4_t dsrc5, 26da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int16x4_t dsrc6, 27da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int16x4_t dsrc7, 28da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int16x8_t q0s16) { 29da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int32x4_t qdst; 30da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int16x4_t d0s16, d1s16; 31da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 32da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d0s16 = vget_low_s16(q0s16); 33da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d1s16 = vget_high_s16(q0s16); 34da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 35da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian qdst = vmull_lane_s16(dsrc0, d0s16, 0); 36da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian qdst = vmlal_lane_s16(qdst, dsrc1, d0s16, 1); 37da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian qdst = vmlal_lane_s16(qdst, dsrc2, d0s16, 2); 38da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian qdst = vmlal_lane_s16(qdst, dsrc3, d0s16, 3); 39da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian qdst = vmlal_lane_s16(qdst, dsrc4, d1s16, 0); 40da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian qdst = vmlal_lane_s16(qdst, dsrc5, d1s16, 1); 41da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian qdst = vmlal_lane_s16(qdst, dsrc6, d1s16, 2); 42da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian qdst = vmlal_lane_s16(qdst, dsrc7, d1s16, 3); 43da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian return qdst; 44da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 45da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 46da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianvoid vpx_convolve8_horiz_neon( 47da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const uint8_t *src, 48da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ptrdiff_t src_stride, 49da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t *dst, 50da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ptrdiff_t dst_stride, 51da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const int16_t *filter_x, 52da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int x_step_q4, 53da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const int16_t *filter_y, // unused 54da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int y_step_q4, // unused 55da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int w, 56da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int h) { 57da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int width; 58da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const uint8_t *s, *psrc; 59da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t *d, *pdst; 60da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8x8_t d2u8, d3u8, d24u8, d25u8, d26u8, d27u8, d28u8, d29u8; 61da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint32x2_t d2u32, d3u32, d28u32, d29u32, d30u32, d31u32; 62da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8x16_t q12u8, q13u8, q14u8, q15u8; 63da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d22s16, d23s16; 64da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int16x4_t d24s16, d25s16, d26s16, d27s16; 65da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint16x4_t d2u16, d3u16, d4u16, d5u16, d16u16, d17u16, d18u16, d19u16; 66da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int16x8_t q0s16; 67da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16; 68da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int32x4_t q1s32, q2s32, q14s32, q15s32; 69da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint16x8x2_t q0x2u16; 70da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8x8x2_t d0x2u8, d1x2u8; 71da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint32x2x2_t d0x2u32; 72da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint16x4x2_t d0x2u16, d1x2u16; 73da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint32x4x2_t q0x2u32; 74da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 75da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian assert(x_step_q4 == 16); 76da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 77da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian q0s16 = vld1q_s16(filter_x); 78da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 79da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src -= 3; // adjust for taps 80da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian for (; h > 0; h -= 4, 81da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += src_stride * 4, 82da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += dst_stride * 4) { // loop_horiz_v 83da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian s = src; 84da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d24u8 = vld1_u8(s); 85da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian s += src_stride; 86da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d25u8 = vld1_u8(s); 87da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian s += src_stride; 88da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d26u8 = vld1_u8(s); 89da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian s += src_stride; 90da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d27u8 = vld1_u8(s); 91da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 92da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian q12u8 = vcombine_u8(d24u8, d25u8); 93da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian q13u8 = vcombine_u8(d26u8, d27u8); 94da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 95da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian q0x2u16 = vtrnq_u16(vreinterpretq_u16_u8(q12u8), 96da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vreinterpretq_u16_u8(q13u8)); 97da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d24u8 = vreinterpret_u8_u16(vget_low_u16(q0x2u16.val[0])); 98da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d25u8 = vreinterpret_u8_u16(vget_high_u16(q0x2u16.val[0])); 99da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d26u8 = vreinterpret_u8_u16(vget_low_u16(q0x2u16.val[1])); 100da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d27u8 = vreinterpret_u8_u16(vget_high_u16(q0x2u16.val[1])); 101da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d0x2u8 = vtrn_u8(d24u8, d25u8); 102da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d1x2u8 = vtrn_u8(d26u8, d27u8); 103da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 104da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian __builtin_prefetch(src + src_stride * 4); 105da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian __builtin_prefetch(src + src_stride * 5); 106da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian __builtin_prefetch(src + src_stride * 6); 107da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 108da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian q8u16 = vmovl_u8(d0x2u8.val[0]); 109da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian q9u16 = vmovl_u8(d0x2u8.val[1]); 110da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian q10u16 = vmovl_u8(d1x2u8.val[0]); 111da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian q11u16 = vmovl_u8(d1x2u8.val[1]); 112da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 113da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d16u16 = vget_low_u16(q8u16); 114da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d17u16 = vget_high_u16(q8u16); 115da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d18u16 = vget_low_u16(q9u16); 116da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d19u16 = vget_high_u16(q9u16); 117da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian q8u16 = vcombine_u16(d16u16, d18u16); // vswp 17 18 118da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian q9u16 = vcombine_u16(d17u16, d19u16); 119da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 120da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d20s16 = vreinterpret_s16_u16(vget_low_u16(q10u16)); 121da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d23s16 = vreinterpret_s16_u16(vget_high_u16(q10u16)); // vmov 23 21 122da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian for (width = w, psrc = src + 7, pdst = dst; 123da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian width > 0; 124da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian width -= 4, psrc += 4, pdst += 4) { // loop_horiz 125da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian s = psrc; 126da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d28u32 = vld1_dup_u32((const uint32_t *)s); 127da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian s += src_stride; 128da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d29u32 = vld1_dup_u32((const uint32_t *)s); 129da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian s += src_stride; 130da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d31u32 = vld1_dup_u32((const uint32_t *)s); 131da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian s += src_stride; 132da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d30u32 = vld1_dup_u32((const uint32_t *)s); 133da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 134da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian __builtin_prefetch(psrc + 64); 135da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 136da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d0x2u16 = vtrn_u16(vreinterpret_u16_u32(d28u32), 137da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vreinterpret_u16_u32(d31u32)); 138da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d1x2u16 = vtrn_u16(vreinterpret_u16_u32(d29u32), 139da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vreinterpret_u16_u32(d30u32)); 140da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d0x2u8 = vtrn_u8(vreinterpret_u8_u16(d0x2u16.val[0]), // d28 141da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vreinterpret_u8_u16(d1x2u16.val[0])); // d29 142da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d1x2u8 = vtrn_u8(vreinterpret_u8_u16(d0x2u16.val[1]), // d31 143da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vreinterpret_u8_u16(d1x2u16.val[1])); // d30 144da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 145da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian __builtin_prefetch(psrc + 64 + src_stride); 146da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 147da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian q14u8 = vcombine_u8(d0x2u8.val[0], d0x2u8.val[1]); 148da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian q15u8 = vcombine_u8(d1x2u8.val[1], d1x2u8.val[0]); 149da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian q0x2u32 = vtrnq_u32(vreinterpretq_u32_u8(q14u8), 150da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vreinterpretq_u32_u8(q15u8)); 151da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 152da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d28u8 = vreinterpret_u8_u32(vget_low_u32(q0x2u32.val[0])); 153da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d29u8 = vreinterpret_u8_u32(vget_high_u32(q0x2u32.val[0])); 154da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian q12u16 = vmovl_u8(d28u8); 155da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian q13u16 = vmovl_u8(d29u8); 156da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 157da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian __builtin_prefetch(psrc + 64 + src_stride * 2); 158da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 159da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d16s16 = vreinterpret_s16_u16(vget_low_u16(q8u16)); 160da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d17s16 = vreinterpret_s16_u16(vget_high_u16(q8u16)); 161da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d18s16 = vreinterpret_s16_u16(vget_low_u16(q9u16)); 162da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d19s16 = vreinterpret_s16_u16(vget_high_u16(q9u16)); 163da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16)); 164da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16)); 165da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16)); 166da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16)); 167da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16)); 168da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 169da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian q1s32 = MULTIPLY_BY_Q0(d16s16, d17s16, d20s16, d22s16, 170da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d18s16, d19s16, d23s16, d24s16, q0s16); 171da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian q2s32 = MULTIPLY_BY_Q0(d17s16, d20s16, d22s16, d18s16, 172da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d19s16, d23s16, d24s16, d26s16, q0s16); 173da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian q14s32 = MULTIPLY_BY_Q0(d20s16, d22s16, d18s16, d19s16, 174da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d23s16, d24s16, d26s16, d27s16, q0s16); 175da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian q15s32 = MULTIPLY_BY_Q0(d22s16, d18s16, d19s16, d23s16, 176da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d24s16, d26s16, d27s16, d25s16, q0s16); 177da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 178da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian __builtin_prefetch(psrc + 60 + src_stride * 3); 179da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 180da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d2u16 = vqrshrun_n_s32(q1s32, 7); 181da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d3u16 = vqrshrun_n_s32(q2s32, 7); 182da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d4u16 = vqrshrun_n_s32(q14s32, 7); 183da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d5u16 = vqrshrun_n_s32(q15s32, 7); 184da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 185da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian q1u16 = vcombine_u16(d2u16, d3u16); 186da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian q2u16 = vcombine_u16(d4u16, d5u16); 187da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 188da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d2u8 = vqmovn_u16(q1u16); 189da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d3u8 = vqmovn_u16(q2u16); 190da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 191da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d0x2u16 = vtrn_u16(vreinterpret_u16_u8(d2u8), 192da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vreinterpret_u16_u8(d3u8)); 193da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d0x2u32 = vtrn_u32(vreinterpret_u32_u16(d0x2u16.val[0]), 194da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vreinterpret_u32_u16(d0x2u16.val[1])); 195da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d0x2u8 = vtrn_u8(vreinterpret_u8_u32(d0x2u32.val[0]), 196da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vreinterpret_u8_u32(d0x2u32.val[1])); 197da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 198da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d2u32 = vreinterpret_u32_u8(d0x2u8.val[0]); 199da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d3u32 = vreinterpret_u32_u8(d0x2u8.val[1]); 200da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 201da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d = pdst; 202da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vst1_lane_u32((uint32_t *)d, d2u32, 0); 203da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d += dst_stride; 204da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vst1_lane_u32((uint32_t *)d, d3u32, 0); 205da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d += dst_stride; 206da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vst1_lane_u32((uint32_t *)d, d2u32, 1); 207da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d += dst_stride; 208da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vst1_lane_u32((uint32_t *)d, d3u32, 1); 209da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 210da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian q8u16 = q9u16; 211da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d20s16 = d23s16; 212da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian q11u16 = q12u16; 213da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian q9u16 = q13u16; 214da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16)); 215da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 216da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 217da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian return; 218da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 219da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 220da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianvoid vpx_convolve8_vert_neon( 221da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const uint8_t *src, 222da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ptrdiff_t src_stride, 223da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t *dst, 224da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ptrdiff_t dst_stride, 225da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const int16_t *filter_x, // unused 226da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int x_step_q4, // unused 227da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const int16_t *filter_y, 228da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int y_step_q4, 229da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int w, 230da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int h) { 231da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int height; 232da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const uint8_t *s; 233da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t *d; 234da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint32x2_t d2u32, d3u32; 235da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint32x2_t d16u32, d18u32, d20u32, d22u32, d24u32, d26u32; 236da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16; 237da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int16x4_t d24s16, d25s16, d26s16, d27s16; 238da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint16x4_t d2u16, d3u16, d4u16, d5u16; 239da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int16x8_t q0s16; 240da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16; 241da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int32x4_t q1s32, q2s32, q14s32, q15s32; 242da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 243da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian assert(y_step_q4 == 16); 244da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 245da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src -= src_stride * 3; 246da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian q0s16 = vld1q_s16(filter_y); 247da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian for (; w > 0; w -= 4, src += 4, dst += 4) { // loop_vert_h 248da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian s = src; 249da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d16u32 = vld1_lane_u32((const uint32_t *)s, d16u32, 0); 250da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian s += src_stride; 251da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d16u32 = vld1_lane_u32((const uint32_t *)s, d16u32, 1); 252da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian s += src_stride; 253da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d18u32 = vld1_lane_u32((const uint32_t *)s, d18u32, 0); 254da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian s += src_stride; 255da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d18u32 = vld1_lane_u32((const uint32_t *)s, d18u32, 1); 256da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian s += src_stride; 257da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d20u32 = vld1_lane_u32((const uint32_t *)s, d20u32, 0); 258da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian s += src_stride; 259da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d20u32 = vld1_lane_u32((const uint32_t *)s, d20u32, 1); 260da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian s += src_stride; 261da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d22u32 = vld1_lane_u32((const uint32_t *)s, d22u32, 0); 262da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian s += src_stride; 263da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 264da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian q8u16 = vmovl_u8(vreinterpret_u8_u32(d16u32)); 265da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian q9u16 = vmovl_u8(vreinterpret_u8_u32(d18u32)); 266da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian q10u16 = vmovl_u8(vreinterpret_u8_u32(d20u32)); 267da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian q11u16 = vmovl_u8(vreinterpret_u8_u32(d22u32)); 268da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 269da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d18s16 = vreinterpret_s16_u16(vget_low_u16(q9u16)); 270da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d19s16 = vreinterpret_s16_u16(vget_high_u16(q9u16)); 271da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16)); 272da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d = dst; 273da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian for (height = h; height > 0; height -= 4) { // loop_vert 274da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d24u32 = vld1_lane_u32((const uint32_t *)s, d24u32, 0); 275da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian s += src_stride; 276da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d26u32 = vld1_lane_u32((const uint32_t *)s, d26u32, 0); 277da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian s += src_stride; 278da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d26u32 = vld1_lane_u32((const uint32_t *)s, d26u32, 1); 279da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian s += src_stride; 280da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d24u32 = vld1_lane_u32((const uint32_t *)s, d24u32, 1); 281da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian s += src_stride; 282da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 283da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian q12u16 = vmovl_u8(vreinterpret_u8_u32(d24u32)); 284da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian q13u16 = vmovl_u8(vreinterpret_u8_u32(d26u32)); 285da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 286da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d16s16 = vreinterpret_s16_u16(vget_low_u16(q8u16)); 287da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d17s16 = vreinterpret_s16_u16(vget_high_u16(q8u16)); 288da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d20s16 = vreinterpret_s16_u16(vget_low_u16(q10u16)); 289da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d21s16 = vreinterpret_s16_u16(vget_high_u16(q10u16)); 290da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16)); 291da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16)); 292da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16)); 293da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16)); 294da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 295da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian __builtin_prefetch(d); 296da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian __builtin_prefetch(d + dst_stride); 297da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian q1s32 = MULTIPLY_BY_Q0(d16s16, d17s16, d18s16, d19s16, 298da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d20s16, d21s16, d22s16, d24s16, q0s16); 299da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian __builtin_prefetch(d + dst_stride * 2); 300da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian __builtin_prefetch(d + dst_stride * 3); 301da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian q2s32 = MULTIPLY_BY_Q0(d17s16, d18s16, d19s16, d20s16, 302da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d21s16, d22s16, d24s16, d26s16, q0s16); 303da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian __builtin_prefetch(s); 304da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian __builtin_prefetch(s + src_stride); 305da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian q14s32 = MULTIPLY_BY_Q0(d18s16, d19s16, d20s16, d21s16, 306da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d22s16, d24s16, d26s16, d27s16, q0s16); 307da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian __builtin_prefetch(s + src_stride * 2); 308da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian __builtin_prefetch(s + src_stride * 3); 309da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian q15s32 = MULTIPLY_BY_Q0(d19s16, d20s16, d21s16, d22s16, 310da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d24s16, d26s16, d27s16, d25s16, q0s16); 311da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 312da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d2u16 = vqrshrun_n_s32(q1s32, 7); 313da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d3u16 = vqrshrun_n_s32(q2s32, 7); 314da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d4u16 = vqrshrun_n_s32(q14s32, 7); 315da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d5u16 = vqrshrun_n_s32(q15s32, 7); 316da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 317da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian q1u16 = vcombine_u16(d2u16, d3u16); 318da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian q2u16 = vcombine_u16(d4u16, d5u16); 319da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 320da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d2u32 = vreinterpret_u32_u8(vqmovn_u16(q1u16)); 321da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d3u32 = vreinterpret_u32_u8(vqmovn_u16(q2u16)); 322da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 323da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vst1_lane_u32((uint32_t *)d, d2u32, 0); 324da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d += dst_stride; 325da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vst1_lane_u32((uint32_t *)d, d2u32, 1); 326da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d += dst_stride; 327da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vst1_lane_u32((uint32_t *)d, d3u32, 0); 328da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d += dst_stride; 329da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vst1_lane_u32((uint32_t *)d, d3u32, 1); 330da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d += dst_stride; 331da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 332da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian q8u16 = q10u16; 333da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d18s16 = d22s16; 334da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d19s16 = d24s16; 335da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian q10u16 = q13u16; 336da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian d22s16 = d25s16; 337da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 338da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 339da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian return; 340da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 341