1ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian/* 2ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian * Copyright (c) 2014 The WebM project authors. All Rights Reserved. 3ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian * 4ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian * Use of this source code is governed by a BSD-style license 5ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian * that can be found in the LICENSE file in the root of the source 6ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian * tree. An additional intellectual property rights grant can be found 7ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian * in the file PATENTS. All contributing project authors may 8ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian * be found in the AUTHORS file in the root of the source tree. 9ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian */ 10ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 11ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian#include <arm_neon.h> 127bc9febe8749e98a3812a0dc4380ceae75c29450Johann#include <string.h> 137bc9febe8749e98a3812a0dc4380ceae75c29450Johann#include "./vpx_config.h" 140a39d0a697ff3603e8c100300fda363658e10b23James Zern#include "vpx_dsp/arm/mem_neon.h" 157ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian#include "vpx_ports/mem.h" 16ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 17ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianstatic const int8_t vp8_sub_pel_filters[8][8] = { 187bc9febe8749e98a3812a0dc4380ceae75c29450Johann { 0, 0, 128, 0, 0, 0, 0, 0 }, /* note that 1/8 pel positionyys are */ 197bc9febe8749e98a3812a0dc4380ceae75c29450Johann { 0, -6, 123, 12, -1, 0, 0, 0 }, /* just as per alpha -0.5 bicubic */ 207bc9febe8749e98a3812a0dc4380ceae75c29450Johann { 2, -11, 108, 36, -8, 1, 0, 0 }, /* New 1/4 pel 6 tap filter */ 217bc9febe8749e98a3812a0dc4380ceae75c29450Johann { 0, -9, 93, 50, -6, 0, 0, 0 }, 227bc9febe8749e98a3812a0dc4380ceae75c29450Johann { 3, -16, 77, 77, -16, 3, 0, 0 }, /* New 1/2 pel 6 tap filter */ 237bc9febe8749e98a3812a0dc4380ceae75c29450Johann { 0, -6, 50, 93, -9, 0, 0, 0 }, 247bc9febe8749e98a3812a0dc4380ceae75c29450Johann { 1, -8, 36, 108, -11, 2, 0, 0 }, /* New 1/4 pel 6 tap filter */ 257bc9febe8749e98a3812a0dc4380ceae75c29450Johann { 0, -1, 12, 123, -6, 0, 0, 0 }, 26ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian}; 27ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 287bc9febe8749e98a3812a0dc4380ceae75c29450Johann// This table is derived from vp8/common/filter.c:vp8_sub_pel_filters. 297bc9febe8749e98a3812a0dc4380ceae75c29450Johann// Apply abs() to all the values. Elements 0, 2, 3, and 5 are always positive. 307bc9febe8749e98a3812a0dc4380ceae75c29450Johann// Elements 1 and 4 are either 0 or negative. The code accounts for this with 317bc9febe8749e98a3812a0dc4380ceae75c29450Johann// multiply/accumulates which either add or subtract as needed. The other 327bc9febe8749e98a3812a0dc4380ceae75c29450Johann// functions will be updated to use this table later. 337bc9febe8749e98a3812a0dc4380ceae75c29450Johann// It is also expanded to 8 elements to allow loading into 64 bit neon 347bc9febe8749e98a3812a0dc4380ceae75c29450Johann// registers. 357bc9febe8749e98a3812a0dc4380ceae75c29450Johannstatic const uint8_t abs_filters[8][8] = { 367bc9febe8749e98a3812a0dc4380ceae75c29450Johann { 0, 0, 128, 0, 0, 0, 0, 0 }, { 0, 6, 123, 12, 1, 0, 0, 0 }, 377bc9febe8749e98a3812a0dc4380ceae75c29450Johann { 2, 11, 108, 36, 8, 1, 0, 0 }, { 0, 9, 93, 50, 6, 0, 0, 0 }, 387bc9febe8749e98a3812a0dc4380ceae75c29450Johann { 3, 16, 77, 77, 16, 3, 0, 0 }, { 0, 6, 50, 93, 9, 0, 0, 0 }, 397bc9febe8749e98a3812a0dc4380ceae75c29450Johann { 1, 8, 36, 108, 11, 2, 0, 0 }, { 0, 1, 12, 123, 6, 0, 0, 0 }, 407bc9febe8749e98a3812a0dc4380ceae75c29450Johann}; 41ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 427bc9febe8749e98a3812a0dc4380ceae75c29450Johannstatic INLINE uint8x8_t load_and_shift(const unsigned char *a) { 437bc9febe8749e98a3812a0dc4380ceae75c29450Johann return vreinterpret_u8_u64(vshl_n_u64(vreinterpret_u64_u8(vld1_u8(a)), 32)); 447bc9febe8749e98a3812a0dc4380ceae75c29450Johann} 45ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 467bc9febe8749e98a3812a0dc4380ceae75c29450Johannstatic INLINE void filter_add_accumulate(const uint8x16_t a, const uint8x16_t b, 477bc9febe8749e98a3812a0dc4380ceae75c29450Johann const uint8x8_t filter, uint16x8_t *c, 487bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint16x8_t *d) { 497bc9febe8749e98a3812a0dc4380ceae75c29450Johann const uint32x2x2_t a_shuf = vzip_u32(vreinterpret_u32_u8(vget_low_u8(a)), 507bc9febe8749e98a3812a0dc4380ceae75c29450Johann vreinterpret_u32_u8(vget_high_u8(a))); 517bc9febe8749e98a3812a0dc4380ceae75c29450Johann const uint32x2x2_t b_shuf = vzip_u32(vreinterpret_u32_u8(vget_low_u8(b)), 527bc9febe8749e98a3812a0dc4380ceae75c29450Johann vreinterpret_u32_u8(vget_high_u8(b))); 537bc9febe8749e98a3812a0dc4380ceae75c29450Johann *c = vmlal_u8(*c, vreinterpret_u8_u32(a_shuf.val[0]), filter); 547bc9febe8749e98a3812a0dc4380ceae75c29450Johann *d = vmlal_u8(*d, vreinterpret_u8_u32(b_shuf.val[0]), filter); 557bc9febe8749e98a3812a0dc4380ceae75c29450Johann} 56ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 577bc9febe8749e98a3812a0dc4380ceae75c29450Johannstatic INLINE void filter_sub_accumulate(const uint8x16_t a, const uint8x16_t b, 587bc9febe8749e98a3812a0dc4380ceae75c29450Johann const uint8x8_t filter, uint16x8_t *c, 597bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint16x8_t *d) { 607bc9febe8749e98a3812a0dc4380ceae75c29450Johann const uint32x2x2_t a_shuf = vzip_u32(vreinterpret_u32_u8(vget_low_u8(a)), 617bc9febe8749e98a3812a0dc4380ceae75c29450Johann vreinterpret_u32_u8(vget_high_u8(a))); 627bc9febe8749e98a3812a0dc4380ceae75c29450Johann const uint32x2x2_t b_shuf = vzip_u32(vreinterpret_u32_u8(vget_low_u8(b)), 637bc9febe8749e98a3812a0dc4380ceae75c29450Johann vreinterpret_u32_u8(vget_high_u8(b))); 647bc9febe8749e98a3812a0dc4380ceae75c29450Johann *c = vmlsl_u8(*c, vreinterpret_u8_u32(a_shuf.val[0]), filter); 657bc9febe8749e98a3812a0dc4380ceae75c29450Johann *d = vmlsl_u8(*d, vreinterpret_u8_u32(b_shuf.val[0]), filter); 667bc9febe8749e98a3812a0dc4380ceae75c29450Johann} 677bc9febe8749e98a3812a0dc4380ceae75c29450Johann 687bc9febe8749e98a3812a0dc4380ceae75c29450Johannstatic INLINE void yonly4x4(const unsigned char *src, int src_stride, 697bc9febe8749e98a3812a0dc4380ceae75c29450Johann int filter_offset, unsigned char *dst, 707bc9febe8749e98a3812a0dc4380ceae75c29450Johann int dst_stride) { 717bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8x8_t a0, a1, a2, a3, a4, a5, a6, a7, a8; 727bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8x8_t b0, b1, b2, b3, b4, b5, b6, b7, b8; 737bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint16x8_t c0, c1, c2, c3; 747bc9febe8749e98a3812a0dc4380ceae75c29450Johann int16x8_t d0, d1; 757bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8x8_t e0, e1; 767bc9febe8749e98a3812a0dc4380ceae75c29450Johann 777bc9febe8749e98a3812a0dc4380ceae75c29450Johann const uint8x8_t filter = vld1_u8(abs_filters[filter_offset]); 787bc9febe8749e98a3812a0dc4380ceae75c29450Johann const uint8x8_t filter0 = vdup_lane_u8(filter, 0); 797bc9febe8749e98a3812a0dc4380ceae75c29450Johann const uint8x8_t filter1 = vdup_lane_u8(filter, 1); 807bc9febe8749e98a3812a0dc4380ceae75c29450Johann const uint8x8_t filter2 = vdup_lane_u8(filter, 2); 817bc9febe8749e98a3812a0dc4380ceae75c29450Johann const uint8x8_t filter3 = vdup_lane_u8(filter, 3); 827bc9febe8749e98a3812a0dc4380ceae75c29450Johann const uint8x8_t filter4 = vdup_lane_u8(filter, 4); 837bc9febe8749e98a3812a0dc4380ceae75c29450Johann const uint8x8_t filter5 = vdup_lane_u8(filter, 5); 847bc9febe8749e98a3812a0dc4380ceae75c29450Johann 857bc9febe8749e98a3812a0dc4380ceae75c29450Johann src -= src_stride * 2; 867bc9febe8749e98a3812a0dc4380ceae75c29450Johann // Shift the even rows to allow using 'vext' to combine the vectors. armv8 877bc9febe8749e98a3812a0dc4380ceae75c29450Johann // has vcopy_lane which would be interesting. This started as just a 887bc9febe8749e98a3812a0dc4380ceae75c29450Johann // horrible workaround for clang adding alignment hints to 32bit loads: 897bc9febe8749e98a3812a0dc4380ceae75c29450Johann // https://llvm.org/bugs/show_bug.cgi?id=24421 907bc9febe8749e98a3812a0dc4380ceae75c29450Johann // But it turns out it almost identical to casting the loads. 917bc9febe8749e98a3812a0dc4380ceae75c29450Johann a0 = load_and_shift(src); 927bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_stride; 937bc9febe8749e98a3812a0dc4380ceae75c29450Johann a1 = vld1_u8(src); 947bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_stride; 957bc9febe8749e98a3812a0dc4380ceae75c29450Johann a2 = load_and_shift(src); 967bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_stride; 977bc9febe8749e98a3812a0dc4380ceae75c29450Johann a3 = vld1_u8(src); 987bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_stride; 997bc9febe8749e98a3812a0dc4380ceae75c29450Johann a4 = load_and_shift(src); 1007bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_stride; 1017bc9febe8749e98a3812a0dc4380ceae75c29450Johann a5 = vld1_u8(src); 1027bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_stride; 1037bc9febe8749e98a3812a0dc4380ceae75c29450Johann a6 = load_and_shift(src); 1047bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_stride; 1057bc9febe8749e98a3812a0dc4380ceae75c29450Johann a7 = vld1_u8(src); 1067bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_stride; 1077bc9febe8749e98a3812a0dc4380ceae75c29450Johann a8 = vld1_u8(src); 1087bc9febe8749e98a3812a0dc4380ceae75c29450Johann 1097bc9febe8749e98a3812a0dc4380ceae75c29450Johann // Combine the rows so we can operate on 8 at a time. 1107bc9febe8749e98a3812a0dc4380ceae75c29450Johann b0 = vext_u8(a0, a1, 4); 1117bc9febe8749e98a3812a0dc4380ceae75c29450Johann b2 = vext_u8(a2, a3, 4); 1127bc9febe8749e98a3812a0dc4380ceae75c29450Johann b4 = vext_u8(a4, a5, 4); 1137bc9febe8749e98a3812a0dc4380ceae75c29450Johann b6 = vext_u8(a6, a7, 4); 1147bc9febe8749e98a3812a0dc4380ceae75c29450Johann b8 = a8; 1157bc9febe8749e98a3812a0dc4380ceae75c29450Johann 1167bc9febe8749e98a3812a0dc4380ceae75c29450Johann // To keep with the 8-at-a-time theme, combine *alternate* rows. This 1177bc9febe8749e98a3812a0dc4380ceae75c29450Johann // allows combining the odd rows with the even. 1187bc9febe8749e98a3812a0dc4380ceae75c29450Johann b1 = vext_u8(b0, b2, 4); 1197bc9febe8749e98a3812a0dc4380ceae75c29450Johann b3 = vext_u8(b2, b4, 4); 1207bc9febe8749e98a3812a0dc4380ceae75c29450Johann b5 = vext_u8(b4, b6, 4); 1217bc9febe8749e98a3812a0dc4380ceae75c29450Johann b7 = vext_u8(b6, b8, 4); 1227bc9febe8749e98a3812a0dc4380ceae75c29450Johann 1237bc9febe8749e98a3812a0dc4380ceae75c29450Johann // Multiply and expand to 16 bits. 1247bc9febe8749e98a3812a0dc4380ceae75c29450Johann c0 = vmull_u8(b0, filter0); 1257bc9febe8749e98a3812a0dc4380ceae75c29450Johann c1 = vmull_u8(b2, filter0); 1267bc9febe8749e98a3812a0dc4380ceae75c29450Johann c2 = vmull_u8(b5, filter5); 1277bc9febe8749e98a3812a0dc4380ceae75c29450Johann c3 = vmull_u8(b7, filter5); 1287bc9febe8749e98a3812a0dc4380ceae75c29450Johann 1297bc9febe8749e98a3812a0dc4380ceae75c29450Johann // Multiply, subtract and accumulate for filters 1 and 4 (the negative 1307bc9febe8749e98a3812a0dc4380ceae75c29450Johann // ones). 1317bc9febe8749e98a3812a0dc4380ceae75c29450Johann c0 = vmlsl_u8(c0, b4, filter4); 1327bc9febe8749e98a3812a0dc4380ceae75c29450Johann c1 = vmlsl_u8(c1, b6, filter4); 1337bc9febe8749e98a3812a0dc4380ceae75c29450Johann c2 = vmlsl_u8(c2, b1, filter1); 1347bc9febe8749e98a3812a0dc4380ceae75c29450Johann c3 = vmlsl_u8(c3, b3, filter1); 1357bc9febe8749e98a3812a0dc4380ceae75c29450Johann 1367bc9febe8749e98a3812a0dc4380ceae75c29450Johann // Add more positive ones. vmlal should really return a signed type. 1377bc9febe8749e98a3812a0dc4380ceae75c29450Johann // It's doing signed math internally, as evidenced by the fact we can do 1387bc9febe8749e98a3812a0dc4380ceae75c29450Johann // subtractions followed by more additions. Ideally we could use 1397bc9febe8749e98a3812a0dc4380ceae75c29450Johann // vqmlal/sl but that instruction doesn't exist. Might be able to 1407bc9febe8749e98a3812a0dc4380ceae75c29450Johann // shoehorn vqdmlal/vqdmlsl in here but it would take some effort. 1417bc9febe8749e98a3812a0dc4380ceae75c29450Johann c0 = vmlal_u8(c0, b2, filter2); 1427bc9febe8749e98a3812a0dc4380ceae75c29450Johann c1 = vmlal_u8(c1, b4, filter2); 1437bc9febe8749e98a3812a0dc4380ceae75c29450Johann c2 = vmlal_u8(c2, b3, filter3); 1447bc9febe8749e98a3812a0dc4380ceae75c29450Johann c3 = vmlal_u8(c3, b5, filter3); 1457bc9febe8749e98a3812a0dc4380ceae75c29450Johann 1467bc9febe8749e98a3812a0dc4380ceae75c29450Johann // Use signed saturation math because vmlsl may have left some negative 1477bc9febe8749e98a3812a0dc4380ceae75c29450Johann // numbers in there. 1487bc9febe8749e98a3812a0dc4380ceae75c29450Johann d0 = vqaddq_s16(vreinterpretq_s16_u16(c2), vreinterpretq_s16_u16(c0)); 1497bc9febe8749e98a3812a0dc4380ceae75c29450Johann d1 = vqaddq_s16(vreinterpretq_s16_u16(c3), vreinterpretq_s16_u16(c1)); 1507bc9febe8749e98a3812a0dc4380ceae75c29450Johann 1517bc9febe8749e98a3812a0dc4380ceae75c29450Johann // Use signed again because numbers like -200 need to be saturated to 0. 1527bc9febe8749e98a3812a0dc4380ceae75c29450Johann e0 = vqrshrun_n_s16(d0, 7); 1537bc9febe8749e98a3812a0dc4380ceae75c29450Johann e1 = vqrshrun_n_s16(d1, 7); 1547bc9febe8749e98a3812a0dc4380ceae75c29450Johann 1550a39d0a697ff3603e8c100300fda363658e10b23James Zern store_unaligned_u8q(dst, dst_stride, vcombine_u8(e0, e1)); 1567bc9febe8749e98a3812a0dc4380ceae75c29450Johann} 157ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1587bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid vp8_sixtap_predict4x4_neon(unsigned char *src_ptr, int src_pixels_per_line, 1597bc9febe8749e98a3812a0dc4380ceae75c29450Johann int xoffset, int yoffset, 1607bc9febe8749e98a3812a0dc4380ceae75c29450Johann unsigned char *dst_ptr, int dst_pitch) { 1617bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8x16_t s0, s1, s2, s3, s4; 1627bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint64x2_t s01, s23; 1637bc9febe8749e98a3812a0dc4380ceae75c29450Johann // Variables to hold src[] elements for the given filter[] 1647bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8x8_t s0_f5, s1_f5, s2_f5, s3_f5, s4_f5; 1657bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8x8_t s4_f1, s4_f2, s4_f3, s4_f4; 1667bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8x16_t s01_f0, s23_f0; 1677bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint64x2_t s01_f3, s23_f3; 1687bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint32x2x2_t s01_f3_q, s23_f3_q, s01_f5_q, s23_f5_q; 1697bc9febe8749e98a3812a0dc4380ceae75c29450Johann // Accumulator variables. 1707bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint16x8_t d0123, d4567, d89; 1717bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint16x8_t d0123_a, d4567_a, d89_a; 1727bc9febe8749e98a3812a0dc4380ceae75c29450Johann int16x8_t e0123, e4567, e89; 1737bc9febe8749e98a3812a0dc4380ceae75c29450Johann // Second pass intermediates. 1747bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8x8_t b0, b1, b2, b3, b4, b5, b6, b7, b8; 1757bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint16x8_t c0, c1, c2, c3; 1767bc9febe8749e98a3812a0dc4380ceae75c29450Johann int16x8_t d0, d1; 1777bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8x8_t e0, e1; 1787bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8x8_t filter, filter0, filter1, filter2, filter3, filter4, filter5; 1797bc9febe8749e98a3812a0dc4380ceae75c29450Johann 1807bc9febe8749e98a3812a0dc4380ceae75c29450Johann if (xoffset == 0) { // Second pass only. 1817bc9febe8749e98a3812a0dc4380ceae75c29450Johann yonly4x4(src_ptr, src_pixels_per_line, yoffset, dst_ptr, dst_pitch); 1827bc9febe8749e98a3812a0dc4380ceae75c29450Johann return; 1837bc9febe8749e98a3812a0dc4380ceae75c29450Johann } 1847bc9febe8749e98a3812a0dc4380ceae75c29450Johann 1857bc9febe8749e98a3812a0dc4380ceae75c29450Johann if (yoffset == 0) { // First pass only. 1867bc9febe8749e98a3812a0dc4380ceae75c29450Johann src_ptr -= 2; 1877bc9febe8749e98a3812a0dc4380ceae75c29450Johann } else { // Add context for the second pass. 2 extra lines on top. 1887bc9febe8749e98a3812a0dc4380ceae75c29450Johann src_ptr -= 2 + (src_pixels_per_line * 2); 1897bc9febe8749e98a3812a0dc4380ceae75c29450Johann } 1907bc9febe8749e98a3812a0dc4380ceae75c29450Johann 1917bc9febe8749e98a3812a0dc4380ceae75c29450Johann filter = vld1_u8(abs_filters[xoffset]); 1927bc9febe8749e98a3812a0dc4380ceae75c29450Johann filter0 = vdup_lane_u8(filter, 0); 1937bc9febe8749e98a3812a0dc4380ceae75c29450Johann filter1 = vdup_lane_u8(filter, 1); 1947bc9febe8749e98a3812a0dc4380ceae75c29450Johann filter2 = vdup_lane_u8(filter, 2); 1957bc9febe8749e98a3812a0dc4380ceae75c29450Johann filter3 = vdup_lane_u8(filter, 3); 1967bc9febe8749e98a3812a0dc4380ceae75c29450Johann filter4 = vdup_lane_u8(filter, 4); 1977bc9febe8749e98a3812a0dc4380ceae75c29450Johann filter5 = vdup_lane_u8(filter, 5); 1987bc9febe8749e98a3812a0dc4380ceae75c29450Johann 1997bc9febe8749e98a3812a0dc4380ceae75c29450Johann // 2 bytes of context, 4 bytes of src values, 3 bytes of context, 7 bytes of 2007bc9febe8749e98a3812a0dc4380ceae75c29450Johann // garbage. So much effort for that last single bit. 2017bc9febe8749e98a3812a0dc4380ceae75c29450Johann // The low values of each pair are for filter0. 2027bc9febe8749e98a3812a0dc4380ceae75c29450Johann s0 = vld1q_u8(src_ptr); 2037bc9febe8749e98a3812a0dc4380ceae75c29450Johann src_ptr += src_pixels_per_line; 2047bc9febe8749e98a3812a0dc4380ceae75c29450Johann s1 = vld1q_u8(src_ptr); 2057bc9febe8749e98a3812a0dc4380ceae75c29450Johann src_ptr += src_pixels_per_line; 2067bc9febe8749e98a3812a0dc4380ceae75c29450Johann s2 = vld1q_u8(src_ptr); 2077bc9febe8749e98a3812a0dc4380ceae75c29450Johann src_ptr += src_pixels_per_line; 2087bc9febe8749e98a3812a0dc4380ceae75c29450Johann s3 = vld1q_u8(src_ptr); 2097bc9febe8749e98a3812a0dc4380ceae75c29450Johann src_ptr += src_pixels_per_line; 2107bc9febe8749e98a3812a0dc4380ceae75c29450Johann 2117bc9febe8749e98a3812a0dc4380ceae75c29450Johann // Shift to extract values for filter[5] 2127bc9febe8749e98a3812a0dc4380ceae75c29450Johann // If src[] is 0, this puts: 2137bc9febe8749e98a3812a0dc4380ceae75c29450Johann // 3 4 5 6 7 8 9 10 in s0_f5 2147bc9febe8749e98a3812a0dc4380ceae75c29450Johann // Can't use vshr.u64 because it crosses the double word boundary. 2157bc9febe8749e98a3812a0dc4380ceae75c29450Johann s0_f5 = vext_u8(vget_low_u8(s0), vget_high_u8(s0), 5); 2167bc9febe8749e98a3812a0dc4380ceae75c29450Johann s1_f5 = vext_u8(vget_low_u8(s1), vget_high_u8(s1), 5); 2177bc9febe8749e98a3812a0dc4380ceae75c29450Johann s2_f5 = vext_u8(vget_low_u8(s2), vget_high_u8(s2), 5); 2187bc9febe8749e98a3812a0dc4380ceae75c29450Johann s3_f5 = vext_u8(vget_low_u8(s3), vget_high_u8(s3), 5); 2197bc9febe8749e98a3812a0dc4380ceae75c29450Johann 2207bc9febe8749e98a3812a0dc4380ceae75c29450Johann s01_f0 = vcombine_u8(vget_low_u8(s0), vget_low_u8(s1)); 2217bc9febe8749e98a3812a0dc4380ceae75c29450Johann s23_f0 = vcombine_u8(vget_low_u8(s2), vget_low_u8(s3)); 2227bc9febe8749e98a3812a0dc4380ceae75c29450Johann 2237bc9febe8749e98a3812a0dc4380ceae75c29450Johann s01_f5_q = vzip_u32(vreinterpret_u32_u8(s0_f5), vreinterpret_u32_u8(s1_f5)); 2247bc9febe8749e98a3812a0dc4380ceae75c29450Johann s23_f5_q = vzip_u32(vreinterpret_u32_u8(s2_f5), vreinterpret_u32_u8(s3_f5)); 2257bc9febe8749e98a3812a0dc4380ceae75c29450Johann d0123 = vmull_u8(vreinterpret_u8_u32(s01_f5_q.val[0]), filter5); 2267bc9febe8749e98a3812a0dc4380ceae75c29450Johann d4567 = vmull_u8(vreinterpret_u8_u32(s23_f5_q.val[0]), filter5); 2277bc9febe8749e98a3812a0dc4380ceae75c29450Johann 2287bc9febe8749e98a3812a0dc4380ceae75c29450Johann // Keep original src data as 64 bits to simplify shifting and extracting. 2297bc9febe8749e98a3812a0dc4380ceae75c29450Johann s01 = vreinterpretq_u64_u8(s01_f0); 2307bc9febe8749e98a3812a0dc4380ceae75c29450Johann s23 = vreinterpretq_u64_u8(s23_f0); 2317bc9febe8749e98a3812a0dc4380ceae75c29450Johann 2327bc9febe8749e98a3812a0dc4380ceae75c29450Johann // 3 4 5 6 * filter0 2337bc9febe8749e98a3812a0dc4380ceae75c29450Johann filter_add_accumulate(s01_f0, s23_f0, filter0, &d0123, &d4567); 2347bc9febe8749e98a3812a0dc4380ceae75c29450Johann 2357bc9febe8749e98a3812a0dc4380ceae75c29450Johann // Shift over one to use -1, 0, 1, 2 for filter1 2367bc9febe8749e98a3812a0dc4380ceae75c29450Johann // -1 0 1 2 * filter1 2377bc9febe8749e98a3812a0dc4380ceae75c29450Johann filter_sub_accumulate(vreinterpretq_u8_u64(vshrq_n_u64(s01, 8)), 2387bc9febe8749e98a3812a0dc4380ceae75c29450Johann vreinterpretq_u8_u64(vshrq_n_u64(s23, 8)), filter1, 2397bc9febe8749e98a3812a0dc4380ceae75c29450Johann &d0123, &d4567); 2407bc9febe8749e98a3812a0dc4380ceae75c29450Johann 2417bc9febe8749e98a3812a0dc4380ceae75c29450Johann // 2 3 4 5 * filter4 2427bc9febe8749e98a3812a0dc4380ceae75c29450Johann filter_sub_accumulate(vreinterpretq_u8_u64(vshrq_n_u64(s01, 32)), 2437bc9febe8749e98a3812a0dc4380ceae75c29450Johann vreinterpretq_u8_u64(vshrq_n_u64(s23, 32)), filter4, 2447bc9febe8749e98a3812a0dc4380ceae75c29450Johann &d0123, &d4567); 2457bc9febe8749e98a3812a0dc4380ceae75c29450Johann 2467bc9febe8749e98a3812a0dc4380ceae75c29450Johann // 0 1 2 3 * filter2 2477bc9febe8749e98a3812a0dc4380ceae75c29450Johann filter_add_accumulate(vreinterpretq_u8_u64(vshrq_n_u64(s01, 16)), 2487bc9febe8749e98a3812a0dc4380ceae75c29450Johann vreinterpretq_u8_u64(vshrq_n_u64(s23, 16)), filter2, 2497bc9febe8749e98a3812a0dc4380ceae75c29450Johann &d0123, &d4567); 2507bc9febe8749e98a3812a0dc4380ceae75c29450Johann 2517bc9febe8749e98a3812a0dc4380ceae75c29450Johann // 1 2 3 4 * filter3 2527bc9febe8749e98a3812a0dc4380ceae75c29450Johann s01_f3 = vshrq_n_u64(s01, 24); 2537bc9febe8749e98a3812a0dc4380ceae75c29450Johann s23_f3 = vshrq_n_u64(s23, 24); 2547bc9febe8749e98a3812a0dc4380ceae75c29450Johann s01_f3_q = vzip_u32(vreinterpret_u32_u64(vget_low_u64(s01_f3)), 2557bc9febe8749e98a3812a0dc4380ceae75c29450Johann vreinterpret_u32_u64(vget_high_u64(s01_f3))); 2567bc9febe8749e98a3812a0dc4380ceae75c29450Johann s23_f3_q = vzip_u32(vreinterpret_u32_u64(vget_low_u64(s23_f3)), 2577bc9febe8749e98a3812a0dc4380ceae75c29450Johann vreinterpret_u32_u64(vget_high_u64(s23_f3))); 2587bc9febe8749e98a3812a0dc4380ceae75c29450Johann // Accumulate into different registers so it can use saturated addition. 2597bc9febe8749e98a3812a0dc4380ceae75c29450Johann d0123_a = vmull_u8(vreinterpret_u8_u32(s01_f3_q.val[0]), filter3); 2607bc9febe8749e98a3812a0dc4380ceae75c29450Johann d4567_a = vmull_u8(vreinterpret_u8_u32(s23_f3_q.val[0]), filter3); 2617bc9febe8749e98a3812a0dc4380ceae75c29450Johann 2627bc9febe8749e98a3812a0dc4380ceae75c29450Johann e0123 = 2637bc9febe8749e98a3812a0dc4380ceae75c29450Johann vqaddq_s16(vreinterpretq_s16_u16(d0123), vreinterpretq_s16_u16(d0123_a)); 2647bc9febe8749e98a3812a0dc4380ceae75c29450Johann e4567 = 2657bc9febe8749e98a3812a0dc4380ceae75c29450Johann vqaddq_s16(vreinterpretq_s16_u16(d4567), vreinterpretq_s16_u16(d4567_a)); 2667bc9febe8749e98a3812a0dc4380ceae75c29450Johann 2677bc9febe8749e98a3812a0dc4380ceae75c29450Johann // Shift and narrow. 2687bc9febe8749e98a3812a0dc4380ceae75c29450Johann b0 = vqrshrun_n_s16(e0123, 7); 2697bc9febe8749e98a3812a0dc4380ceae75c29450Johann b2 = vqrshrun_n_s16(e4567, 7); 2707bc9febe8749e98a3812a0dc4380ceae75c29450Johann 2717bc9febe8749e98a3812a0dc4380ceae75c29450Johann if (yoffset == 0) { // firstpass_filter4x4_only 2720a39d0a697ff3603e8c100300fda363658e10b23James Zern store_unaligned_u8q(dst_ptr, dst_pitch, vcombine_u8(b0, b2)); 2737bc9febe8749e98a3812a0dc4380ceae75c29450Johann return; 2747bc9febe8749e98a3812a0dc4380ceae75c29450Johann } 2757bc9febe8749e98a3812a0dc4380ceae75c29450Johann 2767bc9febe8749e98a3812a0dc4380ceae75c29450Johann // Load additional context when doing both filters. 2777bc9febe8749e98a3812a0dc4380ceae75c29450Johann s0 = vld1q_u8(src_ptr); 2787bc9febe8749e98a3812a0dc4380ceae75c29450Johann src_ptr += src_pixels_per_line; 2797bc9febe8749e98a3812a0dc4380ceae75c29450Johann s1 = vld1q_u8(src_ptr); 2807bc9febe8749e98a3812a0dc4380ceae75c29450Johann src_ptr += src_pixels_per_line; 2817bc9febe8749e98a3812a0dc4380ceae75c29450Johann s2 = vld1q_u8(src_ptr); 2827bc9febe8749e98a3812a0dc4380ceae75c29450Johann src_ptr += src_pixels_per_line; 2837bc9febe8749e98a3812a0dc4380ceae75c29450Johann s3 = vld1q_u8(src_ptr); 2847bc9febe8749e98a3812a0dc4380ceae75c29450Johann src_ptr += src_pixels_per_line; 2857bc9febe8749e98a3812a0dc4380ceae75c29450Johann s4 = vld1q_u8(src_ptr); 2867bc9febe8749e98a3812a0dc4380ceae75c29450Johann 2877bc9febe8749e98a3812a0dc4380ceae75c29450Johann s0_f5 = vext_u8(vget_low_u8(s0), vget_high_u8(s0), 5); 2887bc9febe8749e98a3812a0dc4380ceae75c29450Johann s1_f5 = vext_u8(vget_low_u8(s1), vget_high_u8(s1), 5); 2897bc9febe8749e98a3812a0dc4380ceae75c29450Johann s2_f5 = vext_u8(vget_low_u8(s2), vget_high_u8(s2), 5); 2907bc9febe8749e98a3812a0dc4380ceae75c29450Johann s3_f5 = vext_u8(vget_low_u8(s3), vget_high_u8(s3), 5); 2917bc9febe8749e98a3812a0dc4380ceae75c29450Johann s4_f5 = vext_u8(vget_low_u8(s4), vget_high_u8(s4), 5); 2927bc9febe8749e98a3812a0dc4380ceae75c29450Johann 2937bc9febe8749e98a3812a0dc4380ceae75c29450Johann // 3 4 5 6 * filter0 2947bc9febe8749e98a3812a0dc4380ceae75c29450Johann s01_f0 = vcombine_u8(vget_low_u8(s0), vget_low_u8(s1)); 2957bc9febe8749e98a3812a0dc4380ceae75c29450Johann s23_f0 = vcombine_u8(vget_low_u8(s2), vget_low_u8(s3)); 2967bc9febe8749e98a3812a0dc4380ceae75c29450Johann 2977bc9febe8749e98a3812a0dc4380ceae75c29450Johann s01_f5_q = vzip_u32(vreinterpret_u32_u8(s0_f5), vreinterpret_u32_u8(s1_f5)); 2987bc9febe8749e98a3812a0dc4380ceae75c29450Johann s23_f5_q = vzip_u32(vreinterpret_u32_u8(s2_f5), vreinterpret_u32_u8(s3_f5)); 2997bc9febe8749e98a3812a0dc4380ceae75c29450Johann // But this time instead of 16 pixels to filter, there are 20. So an extra 3007bc9febe8749e98a3812a0dc4380ceae75c29450Johann // run with a doubleword register. 3017bc9febe8749e98a3812a0dc4380ceae75c29450Johann d0123 = vmull_u8(vreinterpret_u8_u32(s01_f5_q.val[0]), filter5); 3027bc9febe8749e98a3812a0dc4380ceae75c29450Johann d4567 = vmull_u8(vreinterpret_u8_u32(s23_f5_q.val[0]), filter5); 3037bc9febe8749e98a3812a0dc4380ceae75c29450Johann d89 = vmull_u8(s4_f5, filter5); 3047bc9febe8749e98a3812a0dc4380ceae75c29450Johann 3057bc9febe8749e98a3812a0dc4380ceae75c29450Johann // Save a copy as u64 for shifting. 3067bc9febe8749e98a3812a0dc4380ceae75c29450Johann s01 = vreinterpretq_u64_u8(s01_f0); 3077bc9febe8749e98a3812a0dc4380ceae75c29450Johann s23 = vreinterpretq_u64_u8(s23_f0); 3087bc9febe8749e98a3812a0dc4380ceae75c29450Johann 3097bc9febe8749e98a3812a0dc4380ceae75c29450Johann filter_add_accumulate(s01_f0, s23_f0, filter0, &d0123, &d4567); 3107bc9febe8749e98a3812a0dc4380ceae75c29450Johann d89 = vmlal_u8(d89, vget_low_u8(s4), filter0); 3117bc9febe8749e98a3812a0dc4380ceae75c29450Johann 3127bc9febe8749e98a3812a0dc4380ceae75c29450Johann filter_sub_accumulate(vreinterpretq_u8_u64(vshrq_n_u64(s01, 8)), 3137bc9febe8749e98a3812a0dc4380ceae75c29450Johann vreinterpretq_u8_u64(vshrq_n_u64(s23, 8)), filter1, 3147bc9febe8749e98a3812a0dc4380ceae75c29450Johann &d0123, &d4567); 3157bc9febe8749e98a3812a0dc4380ceae75c29450Johann s4_f1 = vext_u8(vget_low_u8(s4), vget_high_u8(s4), 1); 3167bc9febe8749e98a3812a0dc4380ceae75c29450Johann d89 = vmlsl_u8(d89, s4_f1, filter1); 3177bc9febe8749e98a3812a0dc4380ceae75c29450Johann 3187bc9febe8749e98a3812a0dc4380ceae75c29450Johann filter_sub_accumulate(vreinterpretq_u8_u64(vshrq_n_u64(s01, 32)), 3197bc9febe8749e98a3812a0dc4380ceae75c29450Johann vreinterpretq_u8_u64(vshrq_n_u64(s23, 32)), filter4, 3207bc9febe8749e98a3812a0dc4380ceae75c29450Johann &d0123, &d4567); 3217bc9febe8749e98a3812a0dc4380ceae75c29450Johann s4_f4 = vext_u8(vget_low_u8(s4), vget_high_u8(s4), 4); 3227bc9febe8749e98a3812a0dc4380ceae75c29450Johann d89 = vmlsl_u8(d89, s4_f4, filter4); 3237bc9febe8749e98a3812a0dc4380ceae75c29450Johann 3247bc9febe8749e98a3812a0dc4380ceae75c29450Johann filter_add_accumulate(vreinterpretq_u8_u64(vshrq_n_u64(s01, 16)), 3257bc9febe8749e98a3812a0dc4380ceae75c29450Johann vreinterpretq_u8_u64(vshrq_n_u64(s23, 16)), filter2, 3267bc9febe8749e98a3812a0dc4380ceae75c29450Johann &d0123, &d4567); 3277bc9febe8749e98a3812a0dc4380ceae75c29450Johann s4_f2 = vext_u8(vget_low_u8(s4), vget_high_u8(s4), 2); 3287bc9febe8749e98a3812a0dc4380ceae75c29450Johann d89 = vmlal_u8(d89, s4_f2, filter2); 3297bc9febe8749e98a3812a0dc4380ceae75c29450Johann 3307bc9febe8749e98a3812a0dc4380ceae75c29450Johann s01_f3 = vshrq_n_u64(s01, 24); 3317bc9febe8749e98a3812a0dc4380ceae75c29450Johann s23_f3 = vshrq_n_u64(s23, 24); 3327bc9febe8749e98a3812a0dc4380ceae75c29450Johann s01_f3_q = vzip_u32(vreinterpret_u32_u64(vget_low_u64(s01_f3)), 3337bc9febe8749e98a3812a0dc4380ceae75c29450Johann vreinterpret_u32_u64(vget_high_u64(s01_f3))); 3347bc9febe8749e98a3812a0dc4380ceae75c29450Johann s23_f3_q = vzip_u32(vreinterpret_u32_u64(vget_low_u64(s23_f3)), 3357bc9febe8749e98a3812a0dc4380ceae75c29450Johann vreinterpret_u32_u64(vget_high_u64(s23_f3))); 3367bc9febe8749e98a3812a0dc4380ceae75c29450Johann s4_f3 = vext_u8(vget_low_u8(s4), vget_high_u8(s4), 3); 3377bc9febe8749e98a3812a0dc4380ceae75c29450Johann d0123_a = vmull_u8(vreinterpret_u8_u32(s01_f3_q.val[0]), filter3); 3387bc9febe8749e98a3812a0dc4380ceae75c29450Johann d4567_a = vmull_u8(vreinterpret_u8_u32(s23_f3_q.val[0]), filter3); 3397bc9febe8749e98a3812a0dc4380ceae75c29450Johann d89_a = vmull_u8(s4_f3, filter3); 3407bc9febe8749e98a3812a0dc4380ceae75c29450Johann 3417bc9febe8749e98a3812a0dc4380ceae75c29450Johann e0123 = 3427bc9febe8749e98a3812a0dc4380ceae75c29450Johann vqaddq_s16(vreinterpretq_s16_u16(d0123), vreinterpretq_s16_u16(d0123_a)); 3437bc9febe8749e98a3812a0dc4380ceae75c29450Johann e4567 = 3447bc9febe8749e98a3812a0dc4380ceae75c29450Johann vqaddq_s16(vreinterpretq_s16_u16(d4567), vreinterpretq_s16_u16(d4567_a)); 3457bc9febe8749e98a3812a0dc4380ceae75c29450Johann e89 = vqaddq_s16(vreinterpretq_s16_u16(d89), vreinterpretq_s16_u16(d89_a)); 3467bc9febe8749e98a3812a0dc4380ceae75c29450Johann 3477bc9febe8749e98a3812a0dc4380ceae75c29450Johann b4 = vqrshrun_n_s16(e0123, 7); 3487bc9febe8749e98a3812a0dc4380ceae75c29450Johann b6 = vqrshrun_n_s16(e4567, 7); 3497bc9febe8749e98a3812a0dc4380ceae75c29450Johann b8 = vqrshrun_n_s16(e89, 7); 3507bc9febe8749e98a3812a0dc4380ceae75c29450Johann 3517bc9febe8749e98a3812a0dc4380ceae75c29450Johann // Second pass: 4x4 3527bc9febe8749e98a3812a0dc4380ceae75c29450Johann filter = vld1_u8(abs_filters[yoffset]); 3537bc9febe8749e98a3812a0dc4380ceae75c29450Johann filter0 = vdup_lane_u8(filter, 0); 3547bc9febe8749e98a3812a0dc4380ceae75c29450Johann filter1 = vdup_lane_u8(filter, 1); 3557bc9febe8749e98a3812a0dc4380ceae75c29450Johann filter2 = vdup_lane_u8(filter, 2); 3567bc9febe8749e98a3812a0dc4380ceae75c29450Johann filter3 = vdup_lane_u8(filter, 3); 3577bc9febe8749e98a3812a0dc4380ceae75c29450Johann filter4 = vdup_lane_u8(filter, 4); 3587bc9febe8749e98a3812a0dc4380ceae75c29450Johann filter5 = vdup_lane_u8(filter, 5); 3597bc9febe8749e98a3812a0dc4380ceae75c29450Johann 3607bc9febe8749e98a3812a0dc4380ceae75c29450Johann b1 = vext_u8(b0, b2, 4); 3617bc9febe8749e98a3812a0dc4380ceae75c29450Johann b3 = vext_u8(b2, b4, 4); 3627bc9febe8749e98a3812a0dc4380ceae75c29450Johann b5 = vext_u8(b4, b6, 4); 3637bc9febe8749e98a3812a0dc4380ceae75c29450Johann b7 = vext_u8(b6, b8, 4); 3647bc9febe8749e98a3812a0dc4380ceae75c29450Johann 3657bc9febe8749e98a3812a0dc4380ceae75c29450Johann c0 = vmull_u8(b0, filter0); 3667bc9febe8749e98a3812a0dc4380ceae75c29450Johann c1 = vmull_u8(b2, filter0); 3677bc9febe8749e98a3812a0dc4380ceae75c29450Johann c2 = vmull_u8(b5, filter5); 3687bc9febe8749e98a3812a0dc4380ceae75c29450Johann c3 = vmull_u8(b7, filter5); 3697bc9febe8749e98a3812a0dc4380ceae75c29450Johann 3707bc9febe8749e98a3812a0dc4380ceae75c29450Johann c0 = vmlsl_u8(c0, b4, filter4); 3717bc9febe8749e98a3812a0dc4380ceae75c29450Johann c1 = vmlsl_u8(c1, b6, filter4); 3727bc9febe8749e98a3812a0dc4380ceae75c29450Johann c2 = vmlsl_u8(c2, b1, filter1); 3737bc9febe8749e98a3812a0dc4380ceae75c29450Johann c3 = vmlsl_u8(c3, b3, filter1); 3747bc9febe8749e98a3812a0dc4380ceae75c29450Johann 3757bc9febe8749e98a3812a0dc4380ceae75c29450Johann c0 = vmlal_u8(c0, b2, filter2); 3767bc9febe8749e98a3812a0dc4380ceae75c29450Johann c1 = vmlal_u8(c1, b4, filter2); 3777bc9febe8749e98a3812a0dc4380ceae75c29450Johann c2 = vmlal_u8(c2, b3, filter3); 3787bc9febe8749e98a3812a0dc4380ceae75c29450Johann c3 = vmlal_u8(c3, b5, filter3); 3797bc9febe8749e98a3812a0dc4380ceae75c29450Johann 3807bc9febe8749e98a3812a0dc4380ceae75c29450Johann d0 = vqaddq_s16(vreinterpretq_s16_u16(c2), vreinterpretq_s16_u16(c0)); 3817bc9febe8749e98a3812a0dc4380ceae75c29450Johann d1 = vqaddq_s16(vreinterpretq_s16_u16(c3), vreinterpretq_s16_u16(c1)); 3827bc9febe8749e98a3812a0dc4380ceae75c29450Johann 3837bc9febe8749e98a3812a0dc4380ceae75c29450Johann e0 = vqrshrun_n_s16(d0, 7); 3847bc9febe8749e98a3812a0dc4380ceae75c29450Johann e1 = vqrshrun_n_s16(d1, 7); 3857bc9febe8749e98a3812a0dc4380ceae75c29450Johann 3860a39d0a697ff3603e8c100300fda363658e10b23James Zern store_unaligned_u8q(dst_ptr, dst_pitch, vcombine_u8(e0, e1)); 3877bc9febe8749e98a3812a0dc4380ceae75c29450Johann} 3887bc9febe8749e98a3812a0dc4380ceae75c29450Johann 3897bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid vp8_sixtap_predict8x4_neon(unsigned char *src_ptr, int src_pixels_per_line, 3907bc9febe8749e98a3812a0dc4380ceae75c29450Johann int xoffset, int yoffset, 3917bc9febe8749e98a3812a0dc4380ceae75c29450Johann unsigned char *dst_ptr, int dst_pitch) { 3927bc9febe8749e98a3812a0dc4380ceae75c29450Johann unsigned char *src; 3937bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8; 3947bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8x8_t d22u8, d23u8, d24u8, d25u8, d26u8; 3957bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8x8_t d27u8, d28u8, d29u8, d30u8, d31u8; 3967bc9febe8749e98a3812a0dc4380ceae75c29450Johann int8x8_t dtmps8, d0s8, d1s8, d2s8, d3s8, d4s8, d5s8; 3977bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint16x8_t q3u16, q4u16, q5u16, q6u16, q7u16; 3987bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint16x8_t q8u16, q9u16, q10u16, q11u16, q12u16; 3997bc9febe8749e98a3812a0dc4380ceae75c29450Johann int16x8_t q3s16, q4s16, q5s16, q6s16, q7s16; 4007bc9febe8749e98a3812a0dc4380ceae75c29450Johann int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16; 4017bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8x16_t q3u8, q4u8, q5u8, q6u8, q7u8; 4027bc9febe8749e98a3812a0dc4380ceae75c29450Johann 4037bc9febe8749e98a3812a0dc4380ceae75c29450Johann if (xoffset == 0) { // secondpass_filter8x4_only 4047bc9febe8749e98a3812a0dc4380ceae75c29450Johann // load second_pass filter 4057bc9febe8749e98a3812a0dc4380ceae75c29450Johann dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]); 406ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d0s8 = vdup_lane_s8(dtmps8, 0); 407ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d1s8 = vdup_lane_s8(dtmps8, 1); 408ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d2s8 = vdup_lane_s8(dtmps8, 2); 409ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d3s8 = vdup_lane_s8(dtmps8, 3); 410ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d4s8 = vdup_lane_s8(dtmps8, 4); 411ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d5s8 = vdup_lane_s8(dtmps8, 5); 412ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); 413ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); 414ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); 415ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); 416ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); 417ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); 418ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 4197bc9febe8749e98a3812a0dc4380ceae75c29450Johann // load src data 4207bc9febe8749e98a3812a0dc4380ceae75c29450Johann src = src_ptr - src_pixels_per_line * 2; 4217bc9febe8749e98a3812a0dc4380ceae75c29450Johann d22u8 = vld1_u8(src); 422ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian src += src_pixels_per_line; 4237bc9febe8749e98a3812a0dc4380ceae75c29450Johann d23u8 = vld1_u8(src); 424ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian src += src_pixels_per_line; 4257bc9febe8749e98a3812a0dc4380ceae75c29450Johann d24u8 = vld1_u8(src); 426ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian src += src_pixels_per_line; 4277bc9febe8749e98a3812a0dc4380ceae75c29450Johann d25u8 = vld1_u8(src); 428ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian src += src_pixels_per_line; 4297bc9febe8749e98a3812a0dc4380ceae75c29450Johann d26u8 = vld1_u8(src); 430ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian src += src_pixels_per_line; 4317bc9febe8749e98a3812a0dc4380ceae75c29450Johann d27u8 = vld1_u8(src); 432ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian src += src_pixels_per_line; 4337bc9febe8749e98a3812a0dc4380ceae75c29450Johann d28u8 = vld1_u8(src); 434ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian src += src_pixels_per_line; 4357bc9febe8749e98a3812a0dc4380ceae75c29450Johann d29u8 = vld1_u8(src); 436ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian src += src_pixels_per_line; 4377bc9febe8749e98a3812a0dc4380ceae75c29450Johann d30u8 = vld1_u8(src); 438ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 439ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q3u16 = vmull_u8(d22u8, d0u8); 440ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q4u16 = vmull_u8(d23u8, d0u8); 441ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q5u16 = vmull_u8(d24u8, d0u8); 442ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q6u16 = vmull_u8(d25u8, d0u8); 443ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 444ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q3u16 = vmlsl_u8(q3u16, d23u8, d1u8); 445ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q4u16 = vmlsl_u8(q4u16, d24u8, d1u8); 446ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q5u16 = vmlsl_u8(q5u16, d25u8, d1u8); 447ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q6u16 = vmlsl_u8(q6u16, d26u8, d1u8); 448ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 449ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q3u16 = vmlsl_u8(q3u16, d26u8, d4u8); 450ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q4u16 = vmlsl_u8(q4u16, d27u8, d4u8); 451ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q5u16 = vmlsl_u8(q5u16, d28u8, d4u8); 452ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q6u16 = vmlsl_u8(q6u16, d29u8, d4u8); 453ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 454ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q3u16 = vmlal_u8(q3u16, d24u8, d2u8); 455ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q4u16 = vmlal_u8(q4u16, d25u8, d2u8); 456ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q5u16 = vmlal_u8(q5u16, d26u8, d2u8); 457ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q6u16 = vmlal_u8(q6u16, d27u8, d2u8); 458ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 459ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q3u16 = vmlal_u8(q3u16, d27u8, d5u8); 460ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q4u16 = vmlal_u8(q4u16, d28u8, d5u8); 461ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q5u16 = vmlal_u8(q5u16, d29u8, d5u8); 462ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q6u16 = vmlal_u8(q6u16, d30u8, d5u8); 463ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 464ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q7u16 = vmull_u8(d25u8, d3u8); 465ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q8u16 = vmull_u8(d26u8, d3u8); 466ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q9u16 = vmull_u8(d27u8, d3u8); 467ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q10u16 = vmull_u8(d28u8, d3u8); 468ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 469ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q3s16 = vreinterpretq_s16_u16(q3u16); 470ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q4s16 = vreinterpretq_s16_u16(q4u16); 471ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q5s16 = vreinterpretq_s16_u16(q5u16); 472ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q6s16 = vreinterpretq_s16_u16(q6u16); 473ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q7s16 = vreinterpretq_s16_u16(q7u16); 474ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q8s16 = vreinterpretq_s16_u16(q8u16); 475ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q9s16 = vreinterpretq_s16_u16(q9u16); 476ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q10s16 = vreinterpretq_s16_u16(q10u16); 477ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 478ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q7s16 = vqaddq_s16(q7s16, q3s16); 479ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q8s16 = vqaddq_s16(q8s16, q4s16); 480ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q9s16 = vqaddq_s16(q9s16, q5s16); 481ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q10s16 = vqaddq_s16(q10s16, q6s16); 482ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 483ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d6u8 = vqrshrun_n_s16(q7s16, 7); 484ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d7u8 = vqrshrun_n_s16(q8s16, 7); 485ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d8u8 = vqrshrun_n_s16(q9s16, 7); 486ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d9u8 = vqrshrun_n_s16(q10s16, 7); 487ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 488ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian vst1_u8(dst_ptr, d6u8); 489ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian dst_ptr += dst_pitch; 490ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian vst1_u8(dst_ptr, d7u8); 491ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian dst_ptr += dst_pitch; 492ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian vst1_u8(dst_ptr, d8u8); 493ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian dst_ptr += dst_pitch; 494ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian vst1_u8(dst_ptr, d9u8); 495ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian return; 4967bc9febe8749e98a3812a0dc4380ceae75c29450Johann } 4977bc9febe8749e98a3812a0dc4380ceae75c29450Johann 4987bc9febe8749e98a3812a0dc4380ceae75c29450Johann // load first_pass filter 4997bc9febe8749e98a3812a0dc4380ceae75c29450Johann dtmps8 = vld1_s8(vp8_sub_pel_filters[xoffset]); 5007bc9febe8749e98a3812a0dc4380ceae75c29450Johann d0s8 = vdup_lane_s8(dtmps8, 0); 5017bc9febe8749e98a3812a0dc4380ceae75c29450Johann d1s8 = vdup_lane_s8(dtmps8, 1); 5027bc9febe8749e98a3812a0dc4380ceae75c29450Johann d2s8 = vdup_lane_s8(dtmps8, 2); 5037bc9febe8749e98a3812a0dc4380ceae75c29450Johann d3s8 = vdup_lane_s8(dtmps8, 3); 5047bc9febe8749e98a3812a0dc4380ceae75c29450Johann d4s8 = vdup_lane_s8(dtmps8, 4); 5057bc9febe8749e98a3812a0dc4380ceae75c29450Johann d5s8 = vdup_lane_s8(dtmps8, 5); 5067bc9febe8749e98a3812a0dc4380ceae75c29450Johann d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); 5077bc9febe8749e98a3812a0dc4380ceae75c29450Johann d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); 5087bc9febe8749e98a3812a0dc4380ceae75c29450Johann d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); 5097bc9febe8749e98a3812a0dc4380ceae75c29450Johann d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); 5107bc9febe8749e98a3812a0dc4380ceae75c29450Johann d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); 5117bc9febe8749e98a3812a0dc4380ceae75c29450Johann d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); 5127bc9febe8749e98a3812a0dc4380ceae75c29450Johann 5137bc9febe8749e98a3812a0dc4380ceae75c29450Johann // First pass: output_height lines x output_width columns (9x4) 5147bc9febe8749e98a3812a0dc4380ceae75c29450Johann if (yoffset == 0) // firstpass_filter4x4_only 5157bc9febe8749e98a3812a0dc4380ceae75c29450Johann src = src_ptr - 2; 5167bc9febe8749e98a3812a0dc4380ceae75c29450Johann else 5177bc9febe8749e98a3812a0dc4380ceae75c29450Johann src = src_ptr - 2 - (src_pixels_per_line * 2); 5187bc9febe8749e98a3812a0dc4380ceae75c29450Johann q3u8 = vld1q_u8(src); 5197bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_pixels_per_line; 5207bc9febe8749e98a3812a0dc4380ceae75c29450Johann q4u8 = vld1q_u8(src); 5217bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_pixels_per_line; 5227bc9febe8749e98a3812a0dc4380ceae75c29450Johann q5u8 = vld1q_u8(src); 5237bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_pixels_per_line; 5247bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6u8 = vld1q_u8(src); 5257bc9febe8749e98a3812a0dc4380ceae75c29450Johann 5267bc9febe8749e98a3812a0dc4380ceae75c29450Johann q7u16 = vmull_u8(vget_low_u8(q3u8), d0u8); 5277bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8u16 = vmull_u8(vget_low_u8(q4u8), d0u8); 5287bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9u16 = vmull_u8(vget_low_u8(q5u8), d0u8); 5297bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10u16 = vmull_u8(vget_low_u8(q6u8), d0u8); 5307bc9febe8749e98a3812a0dc4380ceae75c29450Johann 5317bc9febe8749e98a3812a0dc4380ceae75c29450Johann d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1); 5327bc9febe8749e98a3812a0dc4380ceae75c29450Johann d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1); 5337bc9febe8749e98a3812a0dc4380ceae75c29450Johann d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1); 5347bc9febe8749e98a3812a0dc4380ceae75c29450Johann d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 1); 5357bc9febe8749e98a3812a0dc4380ceae75c29450Johann 5367bc9febe8749e98a3812a0dc4380ceae75c29450Johann q7u16 = vmlsl_u8(q7u16, d28u8, d1u8); 5377bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8u16 = vmlsl_u8(q8u16, d29u8, d1u8); 5387bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9u16 = vmlsl_u8(q9u16, d30u8, d1u8); 5397bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10u16 = vmlsl_u8(q10u16, d31u8, d1u8); 5407bc9febe8749e98a3812a0dc4380ceae75c29450Johann 5417bc9febe8749e98a3812a0dc4380ceae75c29450Johann d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 4); 5427bc9febe8749e98a3812a0dc4380ceae75c29450Johann d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 4); 5437bc9febe8749e98a3812a0dc4380ceae75c29450Johann d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 4); 5447bc9febe8749e98a3812a0dc4380ceae75c29450Johann d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 4); 5457bc9febe8749e98a3812a0dc4380ceae75c29450Johann 5467bc9febe8749e98a3812a0dc4380ceae75c29450Johann q7u16 = vmlsl_u8(q7u16, d28u8, d4u8); 5477bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8u16 = vmlsl_u8(q8u16, d29u8, d4u8); 5487bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9u16 = vmlsl_u8(q9u16, d30u8, d4u8); 5497bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10u16 = vmlsl_u8(q10u16, d31u8, d4u8); 5507bc9febe8749e98a3812a0dc4380ceae75c29450Johann 5517bc9febe8749e98a3812a0dc4380ceae75c29450Johann d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 2); 5527bc9febe8749e98a3812a0dc4380ceae75c29450Johann d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 2); 5537bc9febe8749e98a3812a0dc4380ceae75c29450Johann d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 2); 5547bc9febe8749e98a3812a0dc4380ceae75c29450Johann d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 2); 5557bc9febe8749e98a3812a0dc4380ceae75c29450Johann 5567bc9febe8749e98a3812a0dc4380ceae75c29450Johann q7u16 = vmlal_u8(q7u16, d28u8, d2u8); 5577bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8u16 = vmlal_u8(q8u16, d29u8, d2u8); 5587bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9u16 = vmlal_u8(q9u16, d30u8, d2u8); 5597bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10u16 = vmlal_u8(q10u16, d31u8, d2u8); 5607bc9febe8749e98a3812a0dc4380ceae75c29450Johann 5617bc9febe8749e98a3812a0dc4380ceae75c29450Johann d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5); 5627bc9febe8749e98a3812a0dc4380ceae75c29450Johann d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5); 5637bc9febe8749e98a3812a0dc4380ceae75c29450Johann d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5); 5647bc9febe8749e98a3812a0dc4380ceae75c29450Johann d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5); 5657bc9febe8749e98a3812a0dc4380ceae75c29450Johann 5667bc9febe8749e98a3812a0dc4380ceae75c29450Johann q7u16 = vmlal_u8(q7u16, d28u8, d5u8); 5677bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8u16 = vmlal_u8(q8u16, d29u8, d5u8); 5687bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9u16 = vmlal_u8(q9u16, d30u8, d5u8); 5697bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10u16 = vmlal_u8(q10u16, d31u8, d5u8); 5707bc9febe8749e98a3812a0dc4380ceae75c29450Johann 5717bc9febe8749e98a3812a0dc4380ceae75c29450Johann d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 3); 5727bc9febe8749e98a3812a0dc4380ceae75c29450Johann d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 3); 5737bc9febe8749e98a3812a0dc4380ceae75c29450Johann d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 3); 5747bc9febe8749e98a3812a0dc4380ceae75c29450Johann d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 3); 5757bc9febe8749e98a3812a0dc4380ceae75c29450Johann 5767bc9febe8749e98a3812a0dc4380ceae75c29450Johann q3u16 = vmull_u8(d28u8, d3u8); 5777bc9febe8749e98a3812a0dc4380ceae75c29450Johann q4u16 = vmull_u8(d29u8, d3u8); 5787bc9febe8749e98a3812a0dc4380ceae75c29450Johann q5u16 = vmull_u8(d30u8, d3u8); 5797bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6u16 = vmull_u8(d31u8, d3u8); 5807bc9febe8749e98a3812a0dc4380ceae75c29450Johann 5817bc9febe8749e98a3812a0dc4380ceae75c29450Johann q3s16 = vreinterpretq_s16_u16(q3u16); 5827bc9febe8749e98a3812a0dc4380ceae75c29450Johann q4s16 = vreinterpretq_s16_u16(q4u16); 5837bc9febe8749e98a3812a0dc4380ceae75c29450Johann q5s16 = vreinterpretq_s16_u16(q5u16); 5847bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6s16 = vreinterpretq_s16_u16(q6u16); 5857bc9febe8749e98a3812a0dc4380ceae75c29450Johann q7s16 = vreinterpretq_s16_u16(q7u16); 5867bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8s16 = vreinterpretq_s16_u16(q8u16); 5877bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9s16 = vreinterpretq_s16_u16(q9u16); 5887bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10s16 = vreinterpretq_s16_u16(q10u16); 5897bc9febe8749e98a3812a0dc4380ceae75c29450Johann 5907bc9febe8749e98a3812a0dc4380ceae75c29450Johann q7s16 = vqaddq_s16(q7s16, q3s16); 5917bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8s16 = vqaddq_s16(q8s16, q4s16); 5927bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9s16 = vqaddq_s16(q9s16, q5s16); 5937bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10s16 = vqaddq_s16(q10s16, q6s16); 5947bc9febe8749e98a3812a0dc4380ceae75c29450Johann 5957bc9febe8749e98a3812a0dc4380ceae75c29450Johann d22u8 = vqrshrun_n_s16(q7s16, 7); 5967bc9febe8749e98a3812a0dc4380ceae75c29450Johann d23u8 = vqrshrun_n_s16(q8s16, 7); 5977bc9febe8749e98a3812a0dc4380ceae75c29450Johann d24u8 = vqrshrun_n_s16(q9s16, 7); 5987bc9febe8749e98a3812a0dc4380ceae75c29450Johann d25u8 = vqrshrun_n_s16(q10s16, 7); 5997bc9febe8749e98a3812a0dc4380ceae75c29450Johann 6007bc9febe8749e98a3812a0dc4380ceae75c29450Johann if (yoffset == 0) { // firstpass_filter8x4_only 6017bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1_u8(dst_ptr, d22u8); 6027bc9febe8749e98a3812a0dc4380ceae75c29450Johann dst_ptr += dst_pitch; 6037bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1_u8(dst_ptr, d23u8); 6047bc9febe8749e98a3812a0dc4380ceae75c29450Johann dst_ptr += dst_pitch; 6057bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1_u8(dst_ptr, d24u8); 6067bc9febe8749e98a3812a0dc4380ceae75c29450Johann dst_ptr += dst_pitch; 6077bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1_u8(dst_ptr, d25u8); 6087bc9febe8749e98a3812a0dc4380ceae75c29450Johann return; 6097bc9febe8749e98a3812a0dc4380ceae75c29450Johann } 6107bc9febe8749e98a3812a0dc4380ceae75c29450Johann 6117bc9febe8749e98a3812a0dc4380ceae75c29450Johann // First Pass on rest 5-line data 6127bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_pixels_per_line; 6137bc9febe8749e98a3812a0dc4380ceae75c29450Johann q3u8 = vld1q_u8(src); 6147bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_pixels_per_line; 6157bc9febe8749e98a3812a0dc4380ceae75c29450Johann q4u8 = vld1q_u8(src); 6167bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_pixels_per_line; 6177bc9febe8749e98a3812a0dc4380ceae75c29450Johann q5u8 = vld1q_u8(src); 6187bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_pixels_per_line; 6197bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6u8 = vld1q_u8(src); 6207bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_pixels_per_line; 6217bc9febe8749e98a3812a0dc4380ceae75c29450Johann q7u8 = vld1q_u8(src); 6227bc9febe8749e98a3812a0dc4380ceae75c29450Johann 6237bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8); 6247bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8); 6257bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10u16 = vmull_u8(vget_low_u8(q5u8), d0u8); 6267bc9febe8749e98a3812a0dc4380ceae75c29450Johann q11u16 = vmull_u8(vget_low_u8(q6u8), d0u8); 6277bc9febe8749e98a3812a0dc4380ceae75c29450Johann q12u16 = vmull_u8(vget_low_u8(q7u8), d0u8); 6287bc9febe8749e98a3812a0dc4380ceae75c29450Johann 6297bc9febe8749e98a3812a0dc4380ceae75c29450Johann d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1); 6307bc9febe8749e98a3812a0dc4380ceae75c29450Johann d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1); 6317bc9febe8749e98a3812a0dc4380ceae75c29450Johann d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1); 6327bc9febe8749e98a3812a0dc4380ceae75c29450Johann d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 1); 6337bc9febe8749e98a3812a0dc4380ceae75c29450Johann d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 1); 6347bc9febe8749e98a3812a0dc4380ceae75c29450Johann 6357bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8u16 = vmlsl_u8(q8u16, d27u8, d1u8); 6367bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9u16 = vmlsl_u8(q9u16, d28u8, d1u8); 6377bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10u16 = vmlsl_u8(q10u16, d29u8, d1u8); 6387bc9febe8749e98a3812a0dc4380ceae75c29450Johann q11u16 = vmlsl_u8(q11u16, d30u8, d1u8); 6397bc9febe8749e98a3812a0dc4380ceae75c29450Johann q12u16 = vmlsl_u8(q12u16, d31u8, d1u8); 6407bc9febe8749e98a3812a0dc4380ceae75c29450Johann 6417bc9febe8749e98a3812a0dc4380ceae75c29450Johann d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 4); 6427bc9febe8749e98a3812a0dc4380ceae75c29450Johann d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 4); 6437bc9febe8749e98a3812a0dc4380ceae75c29450Johann d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 4); 6447bc9febe8749e98a3812a0dc4380ceae75c29450Johann d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 4); 6457bc9febe8749e98a3812a0dc4380ceae75c29450Johann d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 4); 6467bc9febe8749e98a3812a0dc4380ceae75c29450Johann 6477bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8u16 = vmlsl_u8(q8u16, d27u8, d4u8); 6487bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9u16 = vmlsl_u8(q9u16, d28u8, d4u8); 6497bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10u16 = vmlsl_u8(q10u16, d29u8, d4u8); 6507bc9febe8749e98a3812a0dc4380ceae75c29450Johann q11u16 = vmlsl_u8(q11u16, d30u8, d4u8); 6517bc9febe8749e98a3812a0dc4380ceae75c29450Johann q12u16 = vmlsl_u8(q12u16, d31u8, d4u8); 6527bc9febe8749e98a3812a0dc4380ceae75c29450Johann 6537bc9febe8749e98a3812a0dc4380ceae75c29450Johann d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 2); 6547bc9febe8749e98a3812a0dc4380ceae75c29450Johann d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 2); 6557bc9febe8749e98a3812a0dc4380ceae75c29450Johann d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 2); 6567bc9febe8749e98a3812a0dc4380ceae75c29450Johann d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 2); 6577bc9febe8749e98a3812a0dc4380ceae75c29450Johann d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 2); 6587bc9febe8749e98a3812a0dc4380ceae75c29450Johann 6597bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8u16 = vmlal_u8(q8u16, d27u8, d2u8); 6607bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9u16 = vmlal_u8(q9u16, d28u8, d2u8); 6617bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10u16 = vmlal_u8(q10u16, d29u8, d2u8); 6627bc9febe8749e98a3812a0dc4380ceae75c29450Johann q11u16 = vmlal_u8(q11u16, d30u8, d2u8); 6637bc9febe8749e98a3812a0dc4380ceae75c29450Johann q12u16 = vmlal_u8(q12u16, d31u8, d2u8); 6647bc9febe8749e98a3812a0dc4380ceae75c29450Johann 6657bc9febe8749e98a3812a0dc4380ceae75c29450Johann d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5); 6667bc9febe8749e98a3812a0dc4380ceae75c29450Johann d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5); 6677bc9febe8749e98a3812a0dc4380ceae75c29450Johann d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5); 6687bc9febe8749e98a3812a0dc4380ceae75c29450Johann d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5); 6697bc9febe8749e98a3812a0dc4380ceae75c29450Johann d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 5); 6707bc9febe8749e98a3812a0dc4380ceae75c29450Johann 6717bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8u16 = vmlal_u8(q8u16, d27u8, d5u8); 6727bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9u16 = vmlal_u8(q9u16, d28u8, d5u8); 6737bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10u16 = vmlal_u8(q10u16, d29u8, d5u8); 6747bc9febe8749e98a3812a0dc4380ceae75c29450Johann q11u16 = vmlal_u8(q11u16, d30u8, d5u8); 6757bc9febe8749e98a3812a0dc4380ceae75c29450Johann q12u16 = vmlal_u8(q12u16, d31u8, d5u8); 6767bc9febe8749e98a3812a0dc4380ceae75c29450Johann 6777bc9febe8749e98a3812a0dc4380ceae75c29450Johann d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 3); 6787bc9febe8749e98a3812a0dc4380ceae75c29450Johann d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 3); 6797bc9febe8749e98a3812a0dc4380ceae75c29450Johann d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 3); 6807bc9febe8749e98a3812a0dc4380ceae75c29450Johann d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 3); 6817bc9febe8749e98a3812a0dc4380ceae75c29450Johann d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 3); 6827bc9febe8749e98a3812a0dc4380ceae75c29450Johann 6837bc9febe8749e98a3812a0dc4380ceae75c29450Johann q3u16 = vmull_u8(d27u8, d3u8); 6847bc9febe8749e98a3812a0dc4380ceae75c29450Johann q4u16 = vmull_u8(d28u8, d3u8); 6857bc9febe8749e98a3812a0dc4380ceae75c29450Johann q5u16 = vmull_u8(d29u8, d3u8); 6867bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6u16 = vmull_u8(d30u8, d3u8); 6877bc9febe8749e98a3812a0dc4380ceae75c29450Johann q7u16 = vmull_u8(d31u8, d3u8); 6887bc9febe8749e98a3812a0dc4380ceae75c29450Johann 6897bc9febe8749e98a3812a0dc4380ceae75c29450Johann q3s16 = vreinterpretq_s16_u16(q3u16); 6907bc9febe8749e98a3812a0dc4380ceae75c29450Johann q4s16 = vreinterpretq_s16_u16(q4u16); 6917bc9febe8749e98a3812a0dc4380ceae75c29450Johann q5s16 = vreinterpretq_s16_u16(q5u16); 6927bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6s16 = vreinterpretq_s16_u16(q6u16); 6937bc9febe8749e98a3812a0dc4380ceae75c29450Johann q7s16 = vreinterpretq_s16_u16(q7u16); 6947bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8s16 = vreinterpretq_s16_u16(q8u16); 6957bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9s16 = vreinterpretq_s16_u16(q9u16); 6967bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10s16 = vreinterpretq_s16_u16(q10u16); 6977bc9febe8749e98a3812a0dc4380ceae75c29450Johann q11s16 = vreinterpretq_s16_u16(q11u16); 6987bc9febe8749e98a3812a0dc4380ceae75c29450Johann q12s16 = vreinterpretq_s16_u16(q12u16); 6997bc9febe8749e98a3812a0dc4380ceae75c29450Johann 7007bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8s16 = vqaddq_s16(q8s16, q3s16); 7017bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9s16 = vqaddq_s16(q9s16, q4s16); 7027bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10s16 = vqaddq_s16(q10s16, q5s16); 7037bc9febe8749e98a3812a0dc4380ceae75c29450Johann q11s16 = vqaddq_s16(q11s16, q6s16); 7047bc9febe8749e98a3812a0dc4380ceae75c29450Johann q12s16 = vqaddq_s16(q12s16, q7s16); 7057bc9febe8749e98a3812a0dc4380ceae75c29450Johann 7067bc9febe8749e98a3812a0dc4380ceae75c29450Johann d26u8 = vqrshrun_n_s16(q8s16, 7); 7077bc9febe8749e98a3812a0dc4380ceae75c29450Johann d27u8 = vqrshrun_n_s16(q9s16, 7); 7087bc9febe8749e98a3812a0dc4380ceae75c29450Johann d28u8 = vqrshrun_n_s16(q10s16, 7); 7097bc9febe8749e98a3812a0dc4380ceae75c29450Johann d29u8 = vqrshrun_n_s16(q11s16, 7); 7107bc9febe8749e98a3812a0dc4380ceae75c29450Johann d30u8 = vqrshrun_n_s16(q12s16, 7); 7117bc9febe8749e98a3812a0dc4380ceae75c29450Johann 7127bc9febe8749e98a3812a0dc4380ceae75c29450Johann // Second pass: 8x4 7137bc9febe8749e98a3812a0dc4380ceae75c29450Johann dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]); 7147bc9febe8749e98a3812a0dc4380ceae75c29450Johann d0s8 = vdup_lane_s8(dtmps8, 0); 7157bc9febe8749e98a3812a0dc4380ceae75c29450Johann d1s8 = vdup_lane_s8(dtmps8, 1); 7167bc9febe8749e98a3812a0dc4380ceae75c29450Johann d2s8 = vdup_lane_s8(dtmps8, 2); 7177bc9febe8749e98a3812a0dc4380ceae75c29450Johann d3s8 = vdup_lane_s8(dtmps8, 3); 7187bc9febe8749e98a3812a0dc4380ceae75c29450Johann d4s8 = vdup_lane_s8(dtmps8, 4); 7197bc9febe8749e98a3812a0dc4380ceae75c29450Johann d5s8 = vdup_lane_s8(dtmps8, 5); 7207bc9febe8749e98a3812a0dc4380ceae75c29450Johann d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); 7217bc9febe8749e98a3812a0dc4380ceae75c29450Johann d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); 7227bc9febe8749e98a3812a0dc4380ceae75c29450Johann d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); 7237bc9febe8749e98a3812a0dc4380ceae75c29450Johann d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); 7247bc9febe8749e98a3812a0dc4380ceae75c29450Johann d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); 7257bc9febe8749e98a3812a0dc4380ceae75c29450Johann d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); 7267bc9febe8749e98a3812a0dc4380ceae75c29450Johann 7277bc9febe8749e98a3812a0dc4380ceae75c29450Johann q3u16 = vmull_u8(d22u8, d0u8); 7287bc9febe8749e98a3812a0dc4380ceae75c29450Johann q4u16 = vmull_u8(d23u8, d0u8); 7297bc9febe8749e98a3812a0dc4380ceae75c29450Johann q5u16 = vmull_u8(d24u8, d0u8); 7307bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6u16 = vmull_u8(d25u8, d0u8); 7317bc9febe8749e98a3812a0dc4380ceae75c29450Johann 7327bc9febe8749e98a3812a0dc4380ceae75c29450Johann q3u16 = vmlsl_u8(q3u16, d23u8, d1u8); 7337bc9febe8749e98a3812a0dc4380ceae75c29450Johann q4u16 = vmlsl_u8(q4u16, d24u8, d1u8); 7347bc9febe8749e98a3812a0dc4380ceae75c29450Johann q5u16 = vmlsl_u8(q5u16, d25u8, d1u8); 7357bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6u16 = vmlsl_u8(q6u16, d26u8, d1u8); 7367bc9febe8749e98a3812a0dc4380ceae75c29450Johann 7377bc9febe8749e98a3812a0dc4380ceae75c29450Johann q3u16 = vmlsl_u8(q3u16, d26u8, d4u8); 7387bc9febe8749e98a3812a0dc4380ceae75c29450Johann q4u16 = vmlsl_u8(q4u16, d27u8, d4u8); 7397bc9febe8749e98a3812a0dc4380ceae75c29450Johann q5u16 = vmlsl_u8(q5u16, d28u8, d4u8); 7407bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6u16 = vmlsl_u8(q6u16, d29u8, d4u8); 7417bc9febe8749e98a3812a0dc4380ceae75c29450Johann 7427bc9febe8749e98a3812a0dc4380ceae75c29450Johann q3u16 = vmlal_u8(q3u16, d24u8, d2u8); 7437bc9febe8749e98a3812a0dc4380ceae75c29450Johann q4u16 = vmlal_u8(q4u16, d25u8, d2u8); 7447bc9febe8749e98a3812a0dc4380ceae75c29450Johann q5u16 = vmlal_u8(q5u16, d26u8, d2u8); 7457bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6u16 = vmlal_u8(q6u16, d27u8, d2u8); 7467bc9febe8749e98a3812a0dc4380ceae75c29450Johann 7477bc9febe8749e98a3812a0dc4380ceae75c29450Johann q3u16 = vmlal_u8(q3u16, d27u8, d5u8); 7487bc9febe8749e98a3812a0dc4380ceae75c29450Johann q4u16 = vmlal_u8(q4u16, d28u8, d5u8); 7497bc9febe8749e98a3812a0dc4380ceae75c29450Johann q5u16 = vmlal_u8(q5u16, d29u8, d5u8); 7507bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6u16 = vmlal_u8(q6u16, d30u8, d5u8); 7517bc9febe8749e98a3812a0dc4380ceae75c29450Johann 7527bc9febe8749e98a3812a0dc4380ceae75c29450Johann q7u16 = vmull_u8(d25u8, d3u8); 7537bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8u16 = vmull_u8(d26u8, d3u8); 7547bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9u16 = vmull_u8(d27u8, d3u8); 7557bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10u16 = vmull_u8(d28u8, d3u8); 7567bc9febe8749e98a3812a0dc4380ceae75c29450Johann 7577bc9febe8749e98a3812a0dc4380ceae75c29450Johann q3s16 = vreinterpretq_s16_u16(q3u16); 7587bc9febe8749e98a3812a0dc4380ceae75c29450Johann q4s16 = vreinterpretq_s16_u16(q4u16); 7597bc9febe8749e98a3812a0dc4380ceae75c29450Johann q5s16 = vreinterpretq_s16_u16(q5u16); 7607bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6s16 = vreinterpretq_s16_u16(q6u16); 7617bc9febe8749e98a3812a0dc4380ceae75c29450Johann q7s16 = vreinterpretq_s16_u16(q7u16); 7627bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8s16 = vreinterpretq_s16_u16(q8u16); 7637bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9s16 = vreinterpretq_s16_u16(q9u16); 7647bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10s16 = vreinterpretq_s16_u16(q10u16); 7657bc9febe8749e98a3812a0dc4380ceae75c29450Johann 7667bc9febe8749e98a3812a0dc4380ceae75c29450Johann q7s16 = vqaddq_s16(q7s16, q3s16); 7677bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8s16 = vqaddq_s16(q8s16, q4s16); 7687bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9s16 = vqaddq_s16(q9s16, q5s16); 7697bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10s16 = vqaddq_s16(q10s16, q6s16); 7707bc9febe8749e98a3812a0dc4380ceae75c29450Johann 7717bc9febe8749e98a3812a0dc4380ceae75c29450Johann d6u8 = vqrshrun_n_s16(q7s16, 7); 7727bc9febe8749e98a3812a0dc4380ceae75c29450Johann d7u8 = vqrshrun_n_s16(q8s16, 7); 7737bc9febe8749e98a3812a0dc4380ceae75c29450Johann d8u8 = vqrshrun_n_s16(q9s16, 7); 7747bc9febe8749e98a3812a0dc4380ceae75c29450Johann d9u8 = vqrshrun_n_s16(q10s16, 7); 7757bc9febe8749e98a3812a0dc4380ceae75c29450Johann 7767bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1_u8(dst_ptr, d6u8); 7777bc9febe8749e98a3812a0dc4380ceae75c29450Johann dst_ptr += dst_pitch; 7787bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1_u8(dst_ptr, d7u8); 7797bc9febe8749e98a3812a0dc4380ceae75c29450Johann dst_ptr += dst_pitch; 7807bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1_u8(dst_ptr, d8u8); 7817bc9febe8749e98a3812a0dc4380ceae75c29450Johann dst_ptr += dst_pitch; 7827bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1_u8(dst_ptr, d9u8); 7837bc9febe8749e98a3812a0dc4380ceae75c29450Johann return; 784ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 785ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 7867bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid vp8_sixtap_predict8x8_neon(unsigned char *src_ptr, int src_pixels_per_line, 7877bc9febe8749e98a3812a0dc4380ceae75c29450Johann int xoffset, int yoffset, 7887bc9febe8749e98a3812a0dc4380ceae75c29450Johann unsigned char *dst_ptr, int dst_pitch) { 7897bc9febe8749e98a3812a0dc4380ceae75c29450Johann unsigned char *src, *tmpp; 7907bc9febe8749e98a3812a0dc4380ceae75c29450Johann unsigned char tmp[64]; 7917bc9febe8749e98a3812a0dc4380ceae75c29450Johann int i; 7927bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8; 7937bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8x8_t d18u8, d19u8, d20u8, d21u8, d22u8, d23u8, d24u8, d25u8; 7947bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8x8_t d26u8, d27u8, d28u8, d29u8, d30u8, d31u8; 7957bc9febe8749e98a3812a0dc4380ceae75c29450Johann int8x8_t dtmps8, d0s8, d1s8, d2s8, d3s8, d4s8, d5s8; 7967bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint16x8_t q3u16, q4u16, q5u16, q6u16, q7u16; 7977bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint16x8_t q8u16, q9u16, q10u16, q11u16, q12u16; 7987bc9febe8749e98a3812a0dc4380ceae75c29450Johann int16x8_t q3s16, q4s16, q5s16, q6s16, q7s16; 7997bc9febe8749e98a3812a0dc4380ceae75c29450Johann int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16; 8007bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8x16_t q3u8, q4u8, q5u8, q6u8, q7u8, q9u8, q10u8, q11u8, q12u8; 8017bc9febe8749e98a3812a0dc4380ceae75c29450Johann 8027bc9febe8749e98a3812a0dc4380ceae75c29450Johann if (xoffset == 0) { // secondpass_filter8x8_only 8037bc9febe8749e98a3812a0dc4380ceae75c29450Johann // load second_pass filter 8047bc9febe8749e98a3812a0dc4380ceae75c29450Johann dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]); 805ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d0s8 = vdup_lane_s8(dtmps8, 0); 806ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d1s8 = vdup_lane_s8(dtmps8, 1); 807ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d2s8 = vdup_lane_s8(dtmps8, 2); 808ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d3s8 = vdup_lane_s8(dtmps8, 3); 809ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d4s8 = vdup_lane_s8(dtmps8, 4); 810ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d5s8 = vdup_lane_s8(dtmps8, 5); 811ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); 812ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); 813ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); 814ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); 815ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); 816ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); 817ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 8187bc9febe8749e98a3812a0dc4380ceae75c29450Johann // load src data 8197bc9febe8749e98a3812a0dc4380ceae75c29450Johann src = src_ptr - src_pixels_per_line * 2; 8207bc9febe8749e98a3812a0dc4380ceae75c29450Johann d18u8 = vld1_u8(src); 8217bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_pixels_per_line; 8227bc9febe8749e98a3812a0dc4380ceae75c29450Johann d19u8 = vld1_u8(src); 8237bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_pixels_per_line; 8247bc9febe8749e98a3812a0dc4380ceae75c29450Johann d20u8 = vld1_u8(src); 8257bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_pixels_per_line; 8267bc9febe8749e98a3812a0dc4380ceae75c29450Johann d21u8 = vld1_u8(src); 8277bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_pixels_per_line; 8287bc9febe8749e98a3812a0dc4380ceae75c29450Johann d22u8 = vld1_u8(src); 8297bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_pixels_per_line; 8307bc9febe8749e98a3812a0dc4380ceae75c29450Johann d23u8 = vld1_u8(src); 8317bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_pixels_per_line; 8327bc9febe8749e98a3812a0dc4380ceae75c29450Johann d24u8 = vld1_u8(src); 8337bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_pixels_per_line; 8347bc9febe8749e98a3812a0dc4380ceae75c29450Johann d25u8 = vld1_u8(src); 8357bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_pixels_per_line; 8367bc9febe8749e98a3812a0dc4380ceae75c29450Johann d26u8 = vld1_u8(src); 8377bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_pixels_per_line; 8387bc9febe8749e98a3812a0dc4380ceae75c29450Johann d27u8 = vld1_u8(src); 8397bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_pixels_per_line; 8407bc9febe8749e98a3812a0dc4380ceae75c29450Johann d28u8 = vld1_u8(src); 8417bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_pixels_per_line; 8427bc9febe8749e98a3812a0dc4380ceae75c29450Johann d29u8 = vld1_u8(src); 8437bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_pixels_per_line; 8447bc9febe8749e98a3812a0dc4380ceae75c29450Johann d30u8 = vld1_u8(src); 845ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 846ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian for (i = 2; i > 0; i--) { 8477bc9febe8749e98a3812a0dc4380ceae75c29450Johann q3u16 = vmull_u8(d18u8, d0u8); 8487bc9febe8749e98a3812a0dc4380ceae75c29450Johann q4u16 = vmull_u8(d19u8, d0u8); 8497bc9febe8749e98a3812a0dc4380ceae75c29450Johann q5u16 = vmull_u8(d20u8, d0u8); 8507bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6u16 = vmull_u8(d21u8, d0u8); 8517bc9febe8749e98a3812a0dc4380ceae75c29450Johann 8527bc9febe8749e98a3812a0dc4380ceae75c29450Johann q3u16 = vmlsl_u8(q3u16, d19u8, d1u8); 8537bc9febe8749e98a3812a0dc4380ceae75c29450Johann q4u16 = vmlsl_u8(q4u16, d20u8, d1u8); 8547bc9febe8749e98a3812a0dc4380ceae75c29450Johann q5u16 = vmlsl_u8(q5u16, d21u8, d1u8); 8557bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6u16 = vmlsl_u8(q6u16, d22u8, d1u8); 8567bc9febe8749e98a3812a0dc4380ceae75c29450Johann 8577bc9febe8749e98a3812a0dc4380ceae75c29450Johann q3u16 = vmlsl_u8(q3u16, d22u8, d4u8); 8587bc9febe8749e98a3812a0dc4380ceae75c29450Johann q4u16 = vmlsl_u8(q4u16, d23u8, d4u8); 8597bc9febe8749e98a3812a0dc4380ceae75c29450Johann q5u16 = vmlsl_u8(q5u16, d24u8, d4u8); 8607bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6u16 = vmlsl_u8(q6u16, d25u8, d4u8); 8617bc9febe8749e98a3812a0dc4380ceae75c29450Johann 8627bc9febe8749e98a3812a0dc4380ceae75c29450Johann q3u16 = vmlal_u8(q3u16, d20u8, d2u8); 8637bc9febe8749e98a3812a0dc4380ceae75c29450Johann q4u16 = vmlal_u8(q4u16, d21u8, d2u8); 8647bc9febe8749e98a3812a0dc4380ceae75c29450Johann q5u16 = vmlal_u8(q5u16, d22u8, d2u8); 8657bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6u16 = vmlal_u8(q6u16, d23u8, d2u8); 8667bc9febe8749e98a3812a0dc4380ceae75c29450Johann 8677bc9febe8749e98a3812a0dc4380ceae75c29450Johann q3u16 = vmlal_u8(q3u16, d23u8, d5u8); 8687bc9febe8749e98a3812a0dc4380ceae75c29450Johann q4u16 = vmlal_u8(q4u16, d24u8, d5u8); 8697bc9febe8749e98a3812a0dc4380ceae75c29450Johann q5u16 = vmlal_u8(q5u16, d25u8, d5u8); 8707bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6u16 = vmlal_u8(q6u16, d26u8, d5u8); 8717bc9febe8749e98a3812a0dc4380ceae75c29450Johann 8727bc9febe8749e98a3812a0dc4380ceae75c29450Johann q7u16 = vmull_u8(d21u8, d3u8); 8737bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8u16 = vmull_u8(d22u8, d3u8); 8747bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9u16 = vmull_u8(d23u8, d3u8); 8757bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10u16 = vmull_u8(d24u8, d3u8); 8767bc9febe8749e98a3812a0dc4380ceae75c29450Johann 8777bc9febe8749e98a3812a0dc4380ceae75c29450Johann q3s16 = vreinterpretq_s16_u16(q3u16); 8787bc9febe8749e98a3812a0dc4380ceae75c29450Johann q4s16 = vreinterpretq_s16_u16(q4u16); 8797bc9febe8749e98a3812a0dc4380ceae75c29450Johann q5s16 = vreinterpretq_s16_u16(q5u16); 8807bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6s16 = vreinterpretq_s16_u16(q6u16); 8817bc9febe8749e98a3812a0dc4380ceae75c29450Johann q7s16 = vreinterpretq_s16_u16(q7u16); 8827bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8s16 = vreinterpretq_s16_u16(q8u16); 8837bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9s16 = vreinterpretq_s16_u16(q9u16); 8847bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10s16 = vreinterpretq_s16_u16(q10u16); 8857bc9febe8749e98a3812a0dc4380ceae75c29450Johann 8867bc9febe8749e98a3812a0dc4380ceae75c29450Johann q7s16 = vqaddq_s16(q7s16, q3s16); 8877bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8s16 = vqaddq_s16(q8s16, q4s16); 8887bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9s16 = vqaddq_s16(q9s16, q5s16); 8897bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10s16 = vqaddq_s16(q10s16, q6s16); 8907bc9febe8749e98a3812a0dc4380ceae75c29450Johann 8917bc9febe8749e98a3812a0dc4380ceae75c29450Johann d6u8 = vqrshrun_n_s16(q7s16, 7); 8927bc9febe8749e98a3812a0dc4380ceae75c29450Johann d7u8 = vqrshrun_n_s16(q8s16, 7); 8937bc9febe8749e98a3812a0dc4380ceae75c29450Johann d8u8 = vqrshrun_n_s16(q9s16, 7); 8947bc9febe8749e98a3812a0dc4380ceae75c29450Johann d9u8 = vqrshrun_n_s16(q10s16, 7); 8957bc9febe8749e98a3812a0dc4380ceae75c29450Johann 8967bc9febe8749e98a3812a0dc4380ceae75c29450Johann d18u8 = d22u8; 8977bc9febe8749e98a3812a0dc4380ceae75c29450Johann d19u8 = d23u8; 8987bc9febe8749e98a3812a0dc4380ceae75c29450Johann d20u8 = d24u8; 8997bc9febe8749e98a3812a0dc4380ceae75c29450Johann d21u8 = d25u8; 9007bc9febe8749e98a3812a0dc4380ceae75c29450Johann d22u8 = d26u8; 9017bc9febe8749e98a3812a0dc4380ceae75c29450Johann d23u8 = d27u8; 9027bc9febe8749e98a3812a0dc4380ceae75c29450Johann d24u8 = d28u8; 9037bc9febe8749e98a3812a0dc4380ceae75c29450Johann d25u8 = d29u8; 9047bc9febe8749e98a3812a0dc4380ceae75c29450Johann d26u8 = d30u8; 9057bc9febe8749e98a3812a0dc4380ceae75c29450Johann 9067bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1_u8(dst_ptr, d6u8); 9077bc9febe8749e98a3812a0dc4380ceae75c29450Johann dst_ptr += dst_pitch; 9087bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1_u8(dst_ptr, d7u8); 9097bc9febe8749e98a3812a0dc4380ceae75c29450Johann dst_ptr += dst_pitch; 9107bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1_u8(dst_ptr, d8u8); 9117bc9febe8749e98a3812a0dc4380ceae75c29450Johann dst_ptr += dst_pitch; 9127bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1_u8(dst_ptr, d9u8); 9137bc9febe8749e98a3812a0dc4380ceae75c29450Johann dst_ptr += dst_pitch; 914ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian } 9157bc9febe8749e98a3812a0dc4380ceae75c29450Johann return; 9167bc9febe8749e98a3812a0dc4380ceae75c29450Johann } 9177bc9febe8749e98a3812a0dc4380ceae75c29450Johann 9187bc9febe8749e98a3812a0dc4380ceae75c29450Johann // load first_pass filter 9197bc9febe8749e98a3812a0dc4380ceae75c29450Johann dtmps8 = vld1_s8(vp8_sub_pel_filters[xoffset]); 9207bc9febe8749e98a3812a0dc4380ceae75c29450Johann d0s8 = vdup_lane_s8(dtmps8, 0); 9217bc9febe8749e98a3812a0dc4380ceae75c29450Johann d1s8 = vdup_lane_s8(dtmps8, 1); 9227bc9febe8749e98a3812a0dc4380ceae75c29450Johann d2s8 = vdup_lane_s8(dtmps8, 2); 9237bc9febe8749e98a3812a0dc4380ceae75c29450Johann d3s8 = vdup_lane_s8(dtmps8, 3); 9247bc9febe8749e98a3812a0dc4380ceae75c29450Johann d4s8 = vdup_lane_s8(dtmps8, 4); 9257bc9febe8749e98a3812a0dc4380ceae75c29450Johann d5s8 = vdup_lane_s8(dtmps8, 5); 9267bc9febe8749e98a3812a0dc4380ceae75c29450Johann d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); 9277bc9febe8749e98a3812a0dc4380ceae75c29450Johann d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); 9287bc9febe8749e98a3812a0dc4380ceae75c29450Johann d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); 9297bc9febe8749e98a3812a0dc4380ceae75c29450Johann d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); 9307bc9febe8749e98a3812a0dc4380ceae75c29450Johann d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); 9317bc9febe8749e98a3812a0dc4380ceae75c29450Johann d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); 9327bc9febe8749e98a3812a0dc4380ceae75c29450Johann 9337bc9febe8749e98a3812a0dc4380ceae75c29450Johann // First pass: output_height lines x output_width columns (9x4) 9347bc9febe8749e98a3812a0dc4380ceae75c29450Johann if (yoffset == 0) // firstpass_filter4x4_only 9357bc9febe8749e98a3812a0dc4380ceae75c29450Johann src = src_ptr - 2; 9367bc9febe8749e98a3812a0dc4380ceae75c29450Johann else 9377bc9febe8749e98a3812a0dc4380ceae75c29450Johann src = src_ptr - 2 - (src_pixels_per_line * 2); 9387bc9febe8749e98a3812a0dc4380ceae75c29450Johann 9397bc9febe8749e98a3812a0dc4380ceae75c29450Johann tmpp = tmp; 9407bc9febe8749e98a3812a0dc4380ceae75c29450Johann for (i = 2; i > 0; i--) { 941ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q3u8 = vld1q_u8(src); 942ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian src += src_pixels_per_line; 943ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q4u8 = vld1q_u8(src); 944ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian src += src_pixels_per_line; 945ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q5u8 = vld1q_u8(src); 946ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian src += src_pixels_per_line; 947ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q6u8 = vld1q_u8(src); 948ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian src += src_pixels_per_line; 949ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 9507bc9febe8749e98a3812a0dc4380ceae75c29450Johann __builtin_prefetch(src); 9517bc9febe8749e98a3812a0dc4380ceae75c29450Johann __builtin_prefetch(src + src_pixels_per_line); 9527bc9febe8749e98a3812a0dc4380ceae75c29450Johann __builtin_prefetch(src + src_pixels_per_line * 2); 953ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 9547bc9febe8749e98a3812a0dc4380ceae75c29450Johann q7u16 = vmull_u8(vget_low_u8(q3u8), d0u8); 9557bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8u16 = vmull_u8(vget_low_u8(q4u8), d0u8); 9567bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9u16 = vmull_u8(vget_low_u8(q5u8), d0u8); 9577bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10u16 = vmull_u8(vget_low_u8(q6u8), d0u8); 958ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 9597bc9febe8749e98a3812a0dc4380ceae75c29450Johann d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1); 9607bc9febe8749e98a3812a0dc4380ceae75c29450Johann d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1); 9617bc9febe8749e98a3812a0dc4380ceae75c29450Johann d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1); 9627bc9febe8749e98a3812a0dc4380ceae75c29450Johann d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 1); 963ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 9647bc9febe8749e98a3812a0dc4380ceae75c29450Johann q7u16 = vmlsl_u8(q7u16, d28u8, d1u8); 9657bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8u16 = vmlsl_u8(q8u16, d29u8, d1u8); 9667bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9u16 = vmlsl_u8(q9u16, d30u8, d1u8); 9677bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10u16 = vmlsl_u8(q10u16, d31u8, d1u8); 968ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 9697bc9febe8749e98a3812a0dc4380ceae75c29450Johann d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 4); 9707bc9febe8749e98a3812a0dc4380ceae75c29450Johann d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 4); 9717bc9febe8749e98a3812a0dc4380ceae75c29450Johann d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 4); 9727bc9febe8749e98a3812a0dc4380ceae75c29450Johann d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 4); 973ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 9747bc9febe8749e98a3812a0dc4380ceae75c29450Johann q7u16 = vmlsl_u8(q7u16, d28u8, d4u8); 9757bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8u16 = vmlsl_u8(q8u16, d29u8, d4u8); 9767bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9u16 = vmlsl_u8(q9u16, d30u8, d4u8); 9777bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10u16 = vmlsl_u8(q10u16, d31u8, d4u8); 978ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 9797bc9febe8749e98a3812a0dc4380ceae75c29450Johann d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 2); 9807bc9febe8749e98a3812a0dc4380ceae75c29450Johann d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 2); 9817bc9febe8749e98a3812a0dc4380ceae75c29450Johann d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 2); 9827bc9febe8749e98a3812a0dc4380ceae75c29450Johann d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 2); 983ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 9847bc9febe8749e98a3812a0dc4380ceae75c29450Johann q7u16 = vmlal_u8(q7u16, d28u8, d2u8); 9857bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8u16 = vmlal_u8(q8u16, d29u8, d2u8); 9867bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9u16 = vmlal_u8(q9u16, d30u8, d2u8); 9877bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10u16 = vmlal_u8(q10u16, d31u8, d2u8); 988ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 9897bc9febe8749e98a3812a0dc4380ceae75c29450Johann d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5); 9907bc9febe8749e98a3812a0dc4380ceae75c29450Johann d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5); 9917bc9febe8749e98a3812a0dc4380ceae75c29450Johann d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5); 9927bc9febe8749e98a3812a0dc4380ceae75c29450Johann d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5); 993ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 9947bc9febe8749e98a3812a0dc4380ceae75c29450Johann q7u16 = vmlal_u8(q7u16, d28u8, d5u8); 9957bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8u16 = vmlal_u8(q8u16, d29u8, d5u8); 9967bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9u16 = vmlal_u8(q9u16, d30u8, d5u8); 9977bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10u16 = vmlal_u8(q10u16, d31u8, d5u8); 998ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 9997bc9febe8749e98a3812a0dc4380ceae75c29450Johann d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 3); 10007bc9febe8749e98a3812a0dc4380ceae75c29450Johann d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 3); 10017bc9febe8749e98a3812a0dc4380ceae75c29450Johann d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 3); 10027bc9febe8749e98a3812a0dc4380ceae75c29450Johann d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 3); 10037bc9febe8749e98a3812a0dc4380ceae75c29450Johann 10047bc9febe8749e98a3812a0dc4380ceae75c29450Johann q3u16 = vmull_u8(d28u8, d3u8); 10057bc9febe8749e98a3812a0dc4380ceae75c29450Johann q4u16 = vmull_u8(d29u8, d3u8); 10067bc9febe8749e98a3812a0dc4380ceae75c29450Johann q5u16 = vmull_u8(d30u8, d3u8); 10077bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6u16 = vmull_u8(d31u8, d3u8); 1008ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1009ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q3s16 = vreinterpretq_s16_u16(q3u16); 1010ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q4s16 = vreinterpretq_s16_u16(q4u16); 1011ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q5s16 = vreinterpretq_s16_u16(q5u16); 1012ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q6s16 = vreinterpretq_s16_u16(q6u16); 1013ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q7s16 = vreinterpretq_s16_u16(q7u16); 1014ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q8s16 = vreinterpretq_s16_u16(q8u16); 1015ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q9s16 = vreinterpretq_s16_u16(q9u16); 1016ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q10s16 = vreinterpretq_s16_u16(q10u16); 1017ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 10187bc9febe8749e98a3812a0dc4380ceae75c29450Johann q7s16 = vqaddq_s16(q7s16, q3s16); 10197bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8s16 = vqaddq_s16(q8s16, q4s16); 10207bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9s16 = vqaddq_s16(q9s16, q5s16); 10217bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10s16 = vqaddq_s16(q10s16, q6s16); 10227bc9febe8749e98a3812a0dc4380ceae75c29450Johann 10237bc9febe8749e98a3812a0dc4380ceae75c29450Johann d22u8 = vqrshrun_n_s16(q7s16, 7); 10247bc9febe8749e98a3812a0dc4380ceae75c29450Johann d23u8 = vqrshrun_n_s16(q8s16, 7); 10257bc9febe8749e98a3812a0dc4380ceae75c29450Johann d24u8 = vqrshrun_n_s16(q9s16, 7); 10267bc9febe8749e98a3812a0dc4380ceae75c29450Johann d25u8 = vqrshrun_n_s16(q10s16, 7); 10277bc9febe8749e98a3812a0dc4380ceae75c29450Johann 10287bc9febe8749e98a3812a0dc4380ceae75c29450Johann if (yoffset == 0) { // firstpass_filter8x4_only 10297bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1_u8(dst_ptr, d22u8); 10307bc9febe8749e98a3812a0dc4380ceae75c29450Johann dst_ptr += dst_pitch; 10317bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1_u8(dst_ptr, d23u8); 10327bc9febe8749e98a3812a0dc4380ceae75c29450Johann dst_ptr += dst_pitch; 10337bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1_u8(dst_ptr, d24u8); 10347bc9febe8749e98a3812a0dc4380ceae75c29450Johann dst_ptr += dst_pitch; 10357bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1_u8(dst_ptr, d25u8); 10367bc9febe8749e98a3812a0dc4380ceae75c29450Johann dst_ptr += dst_pitch; 10377bc9febe8749e98a3812a0dc4380ceae75c29450Johann } else { 10387bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1_u8(tmpp, d22u8); 10397bc9febe8749e98a3812a0dc4380ceae75c29450Johann tmpp += 8; 10407bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1_u8(tmpp, d23u8); 10417bc9febe8749e98a3812a0dc4380ceae75c29450Johann tmpp += 8; 10427bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1_u8(tmpp, d24u8); 10437bc9febe8749e98a3812a0dc4380ceae75c29450Johann tmpp += 8; 10447bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1_u8(tmpp, d25u8); 10457bc9febe8749e98a3812a0dc4380ceae75c29450Johann tmpp += 8; 10467bc9febe8749e98a3812a0dc4380ceae75c29450Johann } 10477bc9febe8749e98a3812a0dc4380ceae75c29450Johann } 10487bc9febe8749e98a3812a0dc4380ceae75c29450Johann if (yoffset == 0) return; 10497bc9febe8749e98a3812a0dc4380ceae75c29450Johann 10507bc9febe8749e98a3812a0dc4380ceae75c29450Johann // First Pass on rest 5-line data 10517bc9febe8749e98a3812a0dc4380ceae75c29450Johann q3u8 = vld1q_u8(src); 10527bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_pixels_per_line; 10537bc9febe8749e98a3812a0dc4380ceae75c29450Johann q4u8 = vld1q_u8(src); 10547bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_pixels_per_line; 10557bc9febe8749e98a3812a0dc4380ceae75c29450Johann q5u8 = vld1q_u8(src); 10567bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_pixels_per_line; 10577bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6u8 = vld1q_u8(src); 10587bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_pixels_per_line; 10597bc9febe8749e98a3812a0dc4380ceae75c29450Johann q7u8 = vld1q_u8(src); 10607bc9febe8749e98a3812a0dc4380ceae75c29450Johann 10617bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8); 10627bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8); 10637bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10u16 = vmull_u8(vget_low_u8(q5u8), d0u8); 10647bc9febe8749e98a3812a0dc4380ceae75c29450Johann q11u16 = vmull_u8(vget_low_u8(q6u8), d0u8); 10657bc9febe8749e98a3812a0dc4380ceae75c29450Johann q12u16 = vmull_u8(vget_low_u8(q7u8), d0u8); 10667bc9febe8749e98a3812a0dc4380ceae75c29450Johann 10677bc9febe8749e98a3812a0dc4380ceae75c29450Johann d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1); 10687bc9febe8749e98a3812a0dc4380ceae75c29450Johann d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1); 10697bc9febe8749e98a3812a0dc4380ceae75c29450Johann d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1); 10707bc9febe8749e98a3812a0dc4380ceae75c29450Johann d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 1); 10717bc9febe8749e98a3812a0dc4380ceae75c29450Johann d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 1); 10727bc9febe8749e98a3812a0dc4380ceae75c29450Johann 10737bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8u16 = vmlsl_u8(q8u16, d27u8, d1u8); 10747bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9u16 = vmlsl_u8(q9u16, d28u8, d1u8); 10757bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10u16 = vmlsl_u8(q10u16, d29u8, d1u8); 10767bc9febe8749e98a3812a0dc4380ceae75c29450Johann q11u16 = vmlsl_u8(q11u16, d30u8, d1u8); 10777bc9febe8749e98a3812a0dc4380ceae75c29450Johann q12u16 = vmlsl_u8(q12u16, d31u8, d1u8); 10787bc9febe8749e98a3812a0dc4380ceae75c29450Johann 10797bc9febe8749e98a3812a0dc4380ceae75c29450Johann d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 4); 10807bc9febe8749e98a3812a0dc4380ceae75c29450Johann d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 4); 10817bc9febe8749e98a3812a0dc4380ceae75c29450Johann d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 4); 10827bc9febe8749e98a3812a0dc4380ceae75c29450Johann d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 4); 10837bc9febe8749e98a3812a0dc4380ceae75c29450Johann d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 4); 10847bc9febe8749e98a3812a0dc4380ceae75c29450Johann 10857bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8u16 = vmlsl_u8(q8u16, d27u8, d4u8); 10867bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9u16 = vmlsl_u8(q9u16, d28u8, d4u8); 10877bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10u16 = vmlsl_u8(q10u16, d29u8, d4u8); 10887bc9febe8749e98a3812a0dc4380ceae75c29450Johann q11u16 = vmlsl_u8(q11u16, d30u8, d4u8); 10897bc9febe8749e98a3812a0dc4380ceae75c29450Johann q12u16 = vmlsl_u8(q12u16, d31u8, d4u8); 10907bc9febe8749e98a3812a0dc4380ceae75c29450Johann 10917bc9febe8749e98a3812a0dc4380ceae75c29450Johann d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 2); 10927bc9febe8749e98a3812a0dc4380ceae75c29450Johann d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 2); 10937bc9febe8749e98a3812a0dc4380ceae75c29450Johann d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 2); 10947bc9febe8749e98a3812a0dc4380ceae75c29450Johann d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 2); 10957bc9febe8749e98a3812a0dc4380ceae75c29450Johann d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 2); 10967bc9febe8749e98a3812a0dc4380ceae75c29450Johann 10977bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8u16 = vmlal_u8(q8u16, d27u8, d2u8); 10987bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9u16 = vmlal_u8(q9u16, d28u8, d2u8); 10997bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10u16 = vmlal_u8(q10u16, d29u8, d2u8); 11007bc9febe8749e98a3812a0dc4380ceae75c29450Johann q11u16 = vmlal_u8(q11u16, d30u8, d2u8); 11017bc9febe8749e98a3812a0dc4380ceae75c29450Johann q12u16 = vmlal_u8(q12u16, d31u8, d2u8); 11027bc9febe8749e98a3812a0dc4380ceae75c29450Johann 11037bc9febe8749e98a3812a0dc4380ceae75c29450Johann d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5); 11047bc9febe8749e98a3812a0dc4380ceae75c29450Johann d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5); 11057bc9febe8749e98a3812a0dc4380ceae75c29450Johann d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5); 11067bc9febe8749e98a3812a0dc4380ceae75c29450Johann d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5); 11077bc9febe8749e98a3812a0dc4380ceae75c29450Johann d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 5); 11087bc9febe8749e98a3812a0dc4380ceae75c29450Johann 11097bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8u16 = vmlal_u8(q8u16, d27u8, d5u8); 11107bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9u16 = vmlal_u8(q9u16, d28u8, d5u8); 11117bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10u16 = vmlal_u8(q10u16, d29u8, d5u8); 11127bc9febe8749e98a3812a0dc4380ceae75c29450Johann q11u16 = vmlal_u8(q11u16, d30u8, d5u8); 11137bc9febe8749e98a3812a0dc4380ceae75c29450Johann q12u16 = vmlal_u8(q12u16, d31u8, d5u8); 11147bc9febe8749e98a3812a0dc4380ceae75c29450Johann 11157bc9febe8749e98a3812a0dc4380ceae75c29450Johann d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 3); 11167bc9febe8749e98a3812a0dc4380ceae75c29450Johann d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 3); 11177bc9febe8749e98a3812a0dc4380ceae75c29450Johann d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 3); 11187bc9febe8749e98a3812a0dc4380ceae75c29450Johann d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 3); 11197bc9febe8749e98a3812a0dc4380ceae75c29450Johann d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 3); 11207bc9febe8749e98a3812a0dc4380ceae75c29450Johann 11217bc9febe8749e98a3812a0dc4380ceae75c29450Johann q3u16 = vmull_u8(d27u8, d3u8); 11227bc9febe8749e98a3812a0dc4380ceae75c29450Johann q4u16 = vmull_u8(d28u8, d3u8); 11237bc9febe8749e98a3812a0dc4380ceae75c29450Johann q5u16 = vmull_u8(d29u8, d3u8); 11247bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6u16 = vmull_u8(d30u8, d3u8); 11257bc9febe8749e98a3812a0dc4380ceae75c29450Johann q7u16 = vmull_u8(d31u8, d3u8); 11267bc9febe8749e98a3812a0dc4380ceae75c29450Johann 11277bc9febe8749e98a3812a0dc4380ceae75c29450Johann q3s16 = vreinterpretq_s16_u16(q3u16); 11287bc9febe8749e98a3812a0dc4380ceae75c29450Johann q4s16 = vreinterpretq_s16_u16(q4u16); 11297bc9febe8749e98a3812a0dc4380ceae75c29450Johann q5s16 = vreinterpretq_s16_u16(q5u16); 11307bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6s16 = vreinterpretq_s16_u16(q6u16); 11317bc9febe8749e98a3812a0dc4380ceae75c29450Johann q7s16 = vreinterpretq_s16_u16(q7u16); 11327bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8s16 = vreinterpretq_s16_u16(q8u16); 11337bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9s16 = vreinterpretq_s16_u16(q9u16); 11347bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10s16 = vreinterpretq_s16_u16(q10u16); 11357bc9febe8749e98a3812a0dc4380ceae75c29450Johann q11s16 = vreinterpretq_s16_u16(q11u16); 11367bc9febe8749e98a3812a0dc4380ceae75c29450Johann q12s16 = vreinterpretq_s16_u16(q12u16); 11377bc9febe8749e98a3812a0dc4380ceae75c29450Johann 11387bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8s16 = vqaddq_s16(q8s16, q3s16); 11397bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9s16 = vqaddq_s16(q9s16, q4s16); 11407bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10s16 = vqaddq_s16(q10s16, q5s16); 11417bc9febe8749e98a3812a0dc4380ceae75c29450Johann q11s16 = vqaddq_s16(q11s16, q6s16); 11427bc9febe8749e98a3812a0dc4380ceae75c29450Johann q12s16 = vqaddq_s16(q12s16, q7s16); 11437bc9febe8749e98a3812a0dc4380ceae75c29450Johann 11447bc9febe8749e98a3812a0dc4380ceae75c29450Johann d26u8 = vqrshrun_n_s16(q8s16, 7); 11457bc9febe8749e98a3812a0dc4380ceae75c29450Johann d27u8 = vqrshrun_n_s16(q9s16, 7); 11467bc9febe8749e98a3812a0dc4380ceae75c29450Johann d28u8 = vqrshrun_n_s16(q10s16, 7); 11477bc9febe8749e98a3812a0dc4380ceae75c29450Johann d29u8 = vqrshrun_n_s16(q11s16, 7); 11487bc9febe8749e98a3812a0dc4380ceae75c29450Johann d30u8 = vqrshrun_n_s16(q12s16, 7); 11497bc9febe8749e98a3812a0dc4380ceae75c29450Johann 11507bc9febe8749e98a3812a0dc4380ceae75c29450Johann // Second pass: 8x8 11517bc9febe8749e98a3812a0dc4380ceae75c29450Johann dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]); 11527bc9febe8749e98a3812a0dc4380ceae75c29450Johann d0s8 = vdup_lane_s8(dtmps8, 0); 11537bc9febe8749e98a3812a0dc4380ceae75c29450Johann d1s8 = vdup_lane_s8(dtmps8, 1); 11547bc9febe8749e98a3812a0dc4380ceae75c29450Johann d2s8 = vdup_lane_s8(dtmps8, 2); 11557bc9febe8749e98a3812a0dc4380ceae75c29450Johann d3s8 = vdup_lane_s8(dtmps8, 3); 11567bc9febe8749e98a3812a0dc4380ceae75c29450Johann d4s8 = vdup_lane_s8(dtmps8, 4); 11577bc9febe8749e98a3812a0dc4380ceae75c29450Johann d5s8 = vdup_lane_s8(dtmps8, 5); 11587bc9febe8749e98a3812a0dc4380ceae75c29450Johann d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); 11597bc9febe8749e98a3812a0dc4380ceae75c29450Johann d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); 11607bc9febe8749e98a3812a0dc4380ceae75c29450Johann d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); 11617bc9febe8749e98a3812a0dc4380ceae75c29450Johann d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); 11627bc9febe8749e98a3812a0dc4380ceae75c29450Johann d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); 11637bc9febe8749e98a3812a0dc4380ceae75c29450Johann d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); 11647bc9febe8749e98a3812a0dc4380ceae75c29450Johann 11657bc9febe8749e98a3812a0dc4380ceae75c29450Johann tmpp = tmp; 11667bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9u8 = vld1q_u8(tmpp); 11677bc9febe8749e98a3812a0dc4380ceae75c29450Johann tmpp += 16; 11687bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10u8 = vld1q_u8(tmpp); 11697bc9febe8749e98a3812a0dc4380ceae75c29450Johann tmpp += 16; 11707bc9febe8749e98a3812a0dc4380ceae75c29450Johann q11u8 = vld1q_u8(tmpp); 11717bc9febe8749e98a3812a0dc4380ceae75c29450Johann tmpp += 16; 11727bc9febe8749e98a3812a0dc4380ceae75c29450Johann q12u8 = vld1q_u8(tmpp); 11737bc9febe8749e98a3812a0dc4380ceae75c29450Johann 11747bc9febe8749e98a3812a0dc4380ceae75c29450Johann d18u8 = vget_low_u8(q9u8); 11757bc9febe8749e98a3812a0dc4380ceae75c29450Johann d19u8 = vget_high_u8(q9u8); 11767bc9febe8749e98a3812a0dc4380ceae75c29450Johann d20u8 = vget_low_u8(q10u8); 11777bc9febe8749e98a3812a0dc4380ceae75c29450Johann d21u8 = vget_high_u8(q10u8); 11787bc9febe8749e98a3812a0dc4380ceae75c29450Johann d22u8 = vget_low_u8(q11u8); 11797bc9febe8749e98a3812a0dc4380ceae75c29450Johann d23u8 = vget_high_u8(q11u8); 11807bc9febe8749e98a3812a0dc4380ceae75c29450Johann d24u8 = vget_low_u8(q12u8); 11817bc9febe8749e98a3812a0dc4380ceae75c29450Johann d25u8 = vget_high_u8(q12u8); 11827bc9febe8749e98a3812a0dc4380ceae75c29450Johann 11837bc9febe8749e98a3812a0dc4380ceae75c29450Johann for (i = 2; i > 0; i--) { 11847bc9febe8749e98a3812a0dc4380ceae75c29450Johann q3u16 = vmull_u8(d18u8, d0u8); 11857bc9febe8749e98a3812a0dc4380ceae75c29450Johann q4u16 = vmull_u8(d19u8, d0u8); 11867bc9febe8749e98a3812a0dc4380ceae75c29450Johann q5u16 = vmull_u8(d20u8, d0u8); 11877bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6u16 = vmull_u8(d21u8, d0u8); 11887bc9febe8749e98a3812a0dc4380ceae75c29450Johann 11897bc9febe8749e98a3812a0dc4380ceae75c29450Johann q3u16 = vmlsl_u8(q3u16, d19u8, d1u8); 11907bc9febe8749e98a3812a0dc4380ceae75c29450Johann q4u16 = vmlsl_u8(q4u16, d20u8, d1u8); 11917bc9febe8749e98a3812a0dc4380ceae75c29450Johann q5u16 = vmlsl_u8(q5u16, d21u8, d1u8); 11927bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6u16 = vmlsl_u8(q6u16, d22u8, d1u8); 11937bc9febe8749e98a3812a0dc4380ceae75c29450Johann 11947bc9febe8749e98a3812a0dc4380ceae75c29450Johann q3u16 = vmlsl_u8(q3u16, d22u8, d4u8); 11957bc9febe8749e98a3812a0dc4380ceae75c29450Johann q4u16 = vmlsl_u8(q4u16, d23u8, d4u8); 11967bc9febe8749e98a3812a0dc4380ceae75c29450Johann q5u16 = vmlsl_u8(q5u16, d24u8, d4u8); 11977bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6u16 = vmlsl_u8(q6u16, d25u8, d4u8); 11987bc9febe8749e98a3812a0dc4380ceae75c29450Johann 11997bc9febe8749e98a3812a0dc4380ceae75c29450Johann q3u16 = vmlal_u8(q3u16, d20u8, d2u8); 12007bc9febe8749e98a3812a0dc4380ceae75c29450Johann q4u16 = vmlal_u8(q4u16, d21u8, d2u8); 12017bc9febe8749e98a3812a0dc4380ceae75c29450Johann q5u16 = vmlal_u8(q5u16, d22u8, d2u8); 12027bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6u16 = vmlal_u8(q6u16, d23u8, d2u8); 12037bc9febe8749e98a3812a0dc4380ceae75c29450Johann 12047bc9febe8749e98a3812a0dc4380ceae75c29450Johann q3u16 = vmlal_u8(q3u16, d23u8, d5u8); 12057bc9febe8749e98a3812a0dc4380ceae75c29450Johann q4u16 = vmlal_u8(q4u16, d24u8, d5u8); 12067bc9febe8749e98a3812a0dc4380ceae75c29450Johann q5u16 = vmlal_u8(q5u16, d25u8, d5u8); 12077bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6u16 = vmlal_u8(q6u16, d26u8, d5u8); 12087bc9febe8749e98a3812a0dc4380ceae75c29450Johann 12097bc9febe8749e98a3812a0dc4380ceae75c29450Johann q7u16 = vmull_u8(d21u8, d3u8); 12107bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8u16 = vmull_u8(d22u8, d3u8); 12117bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9u16 = vmull_u8(d23u8, d3u8); 12127bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10u16 = vmull_u8(d24u8, d3u8); 1213ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 12147bc9febe8749e98a3812a0dc4380ceae75c29450Johann q3s16 = vreinterpretq_s16_u16(q3u16); 12157bc9febe8749e98a3812a0dc4380ceae75c29450Johann q4s16 = vreinterpretq_s16_u16(q4u16); 12167bc9febe8749e98a3812a0dc4380ceae75c29450Johann q5s16 = vreinterpretq_s16_u16(q5u16); 12177bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6s16 = vreinterpretq_s16_u16(q6u16); 12187bc9febe8749e98a3812a0dc4380ceae75c29450Johann q7s16 = vreinterpretq_s16_u16(q7u16); 12197bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8s16 = vreinterpretq_s16_u16(q8u16); 12207bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9s16 = vreinterpretq_s16_u16(q9u16); 12217bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10s16 = vreinterpretq_s16_u16(q10u16); 1222ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 12237bc9febe8749e98a3812a0dc4380ceae75c29450Johann q7s16 = vqaddq_s16(q7s16, q3s16); 12247bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8s16 = vqaddq_s16(q8s16, q4s16); 12257bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9s16 = vqaddq_s16(q9s16, q5s16); 12267bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10s16 = vqaddq_s16(q10s16, q6s16); 12277bc9febe8749e98a3812a0dc4380ceae75c29450Johann 12287bc9febe8749e98a3812a0dc4380ceae75c29450Johann d6u8 = vqrshrun_n_s16(q7s16, 7); 12297bc9febe8749e98a3812a0dc4380ceae75c29450Johann d7u8 = vqrshrun_n_s16(q8s16, 7); 12307bc9febe8749e98a3812a0dc4380ceae75c29450Johann d8u8 = vqrshrun_n_s16(q9s16, 7); 12317bc9febe8749e98a3812a0dc4380ceae75c29450Johann d9u8 = vqrshrun_n_s16(q10s16, 7); 12327bc9febe8749e98a3812a0dc4380ceae75c29450Johann 12337bc9febe8749e98a3812a0dc4380ceae75c29450Johann d18u8 = d22u8; 12347bc9febe8749e98a3812a0dc4380ceae75c29450Johann d19u8 = d23u8; 12357bc9febe8749e98a3812a0dc4380ceae75c29450Johann d20u8 = d24u8; 12367bc9febe8749e98a3812a0dc4380ceae75c29450Johann d21u8 = d25u8; 12377bc9febe8749e98a3812a0dc4380ceae75c29450Johann d22u8 = d26u8; 12387bc9febe8749e98a3812a0dc4380ceae75c29450Johann d23u8 = d27u8; 12397bc9febe8749e98a3812a0dc4380ceae75c29450Johann d24u8 = d28u8; 12407bc9febe8749e98a3812a0dc4380ceae75c29450Johann d25u8 = d29u8; 12417bc9febe8749e98a3812a0dc4380ceae75c29450Johann d26u8 = d30u8; 12427bc9febe8749e98a3812a0dc4380ceae75c29450Johann 12437bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1_u8(dst_ptr, d6u8); 12447bc9febe8749e98a3812a0dc4380ceae75c29450Johann dst_ptr += dst_pitch; 12457bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1_u8(dst_ptr, d7u8); 12467bc9febe8749e98a3812a0dc4380ceae75c29450Johann dst_ptr += dst_pitch; 12477bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1_u8(dst_ptr, d8u8); 12487bc9febe8749e98a3812a0dc4380ceae75c29450Johann dst_ptr += dst_pitch; 12497bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1_u8(dst_ptr, d9u8); 12507bc9febe8749e98a3812a0dc4380ceae75c29450Johann dst_ptr += dst_pitch; 12517bc9febe8749e98a3812a0dc4380ceae75c29450Johann } 12527bc9febe8749e98a3812a0dc4380ceae75c29450Johann return; 12537bc9febe8749e98a3812a0dc4380ceae75c29450Johann} 12547bc9febe8749e98a3812a0dc4380ceae75c29450Johann 12557bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid vp8_sixtap_predict16x16_neon(unsigned char *src_ptr, 12567bc9febe8749e98a3812a0dc4380ceae75c29450Johann int src_pixels_per_line, int xoffset, 12577bc9febe8749e98a3812a0dc4380ceae75c29450Johann int yoffset, unsigned char *dst_ptr, 12587bc9febe8749e98a3812a0dc4380ceae75c29450Johann int dst_pitch) { 12597bc9febe8749e98a3812a0dc4380ceae75c29450Johann unsigned char *src, *src_tmp, *dst, *tmpp; 12607bc9febe8749e98a3812a0dc4380ceae75c29450Johann unsigned char tmp[336]; 12617bc9febe8749e98a3812a0dc4380ceae75c29450Johann int i, j; 12627bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8; 12637bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8x8_t d10u8, d11u8, d12u8, d13u8, d14u8, d15u8, d18u8, d19u8; 12647bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8x8_t d20u8, d21u8, d22u8, d23u8, d24u8, d25u8, d26u8, d27u8; 12657bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8x8_t d28u8, d29u8, d30u8, d31u8; 12667bc9febe8749e98a3812a0dc4380ceae75c29450Johann int8x8_t dtmps8, d0s8, d1s8, d2s8, d3s8, d4s8, d5s8; 12677bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8x16_t q3u8, q4u8; 12687bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint16x8_t q3u16, q4u16, q5u16, q6u16, q7u16, q8u16, q9u16, q10u16; 12697bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint16x8_t q11u16, q12u16, q13u16, q15u16; 12707bc9febe8749e98a3812a0dc4380ceae75c29450Johann int16x8_t q3s16, q4s16, q5s16, q6s16, q7s16, q8s16, q9s16, q10s16; 12717bc9febe8749e98a3812a0dc4380ceae75c29450Johann int16x8_t q11s16, q12s16, q13s16, q15s16; 12727bc9febe8749e98a3812a0dc4380ceae75c29450Johann 12737bc9febe8749e98a3812a0dc4380ceae75c29450Johann if (xoffset == 0) { // secondpass_filter8x8_only 12747bc9febe8749e98a3812a0dc4380ceae75c29450Johann // load second_pass filter 1275ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]); 1276ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d0s8 = vdup_lane_s8(dtmps8, 0); 1277ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d1s8 = vdup_lane_s8(dtmps8, 1); 1278ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d2s8 = vdup_lane_s8(dtmps8, 2); 1279ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d3s8 = vdup_lane_s8(dtmps8, 3); 1280ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d4s8 = vdup_lane_s8(dtmps8, 4); 1281ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d5s8 = vdup_lane_s8(dtmps8, 5); 1282ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); 1283ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); 1284ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); 1285ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); 1286ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); 1287ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); 1288ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 12897bc9febe8749e98a3812a0dc4380ceae75c29450Johann // load src data 12907bc9febe8749e98a3812a0dc4380ceae75c29450Johann src_tmp = src_ptr - src_pixels_per_line * 2; 12917bc9febe8749e98a3812a0dc4380ceae75c29450Johann for (i = 0; i < 2; ++i) { 12927bc9febe8749e98a3812a0dc4380ceae75c29450Johann src = src_tmp + i * 8; 12937bc9febe8749e98a3812a0dc4380ceae75c29450Johann dst = dst_ptr + i * 8; 12947bc9febe8749e98a3812a0dc4380ceae75c29450Johann d18u8 = vld1_u8(src); 12957bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_pixels_per_line; 12967bc9febe8749e98a3812a0dc4380ceae75c29450Johann d19u8 = vld1_u8(src); 12977bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_pixels_per_line; 12987bc9febe8749e98a3812a0dc4380ceae75c29450Johann d20u8 = vld1_u8(src); 12997bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_pixels_per_line; 13007bc9febe8749e98a3812a0dc4380ceae75c29450Johann d21u8 = vld1_u8(src); 13017bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_pixels_per_line; 13027bc9febe8749e98a3812a0dc4380ceae75c29450Johann d22u8 = vld1_u8(src); 13037bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_pixels_per_line; 13047bc9febe8749e98a3812a0dc4380ceae75c29450Johann for (j = 0; j < 4; ++j) { 13057bc9febe8749e98a3812a0dc4380ceae75c29450Johann d23u8 = vld1_u8(src); 13067bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_pixels_per_line; 13077bc9febe8749e98a3812a0dc4380ceae75c29450Johann d24u8 = vld1_u8(src); 13087bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_pixels_per_line; 13097bc9febe8749e98a3812a0dc4380ceae75c29450Johann d25u8 = vld1_u8(src); 13107bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_pixels_per_line; 13117bc9febe8749e98a3812a0dc4380ceae75c29450Johann d26u8 = vld1_u8(src); 13127bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_pixels_per_line; 1313ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1314ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q3u16 = vmull_u8(d18u8, d0u8); 1315ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q4u16 = vmull_u8(d19u8, d0u8); 1316ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q5u16 = vmull_u8(d20u8, d0u8); 1317ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q6u16 = vmull_u8(d21u8, d0u8); 1318ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1319ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q3u16 = vmlsl_u8(q3u16, d19u8, d1u8); 1320ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q4u16 = vmlsl_u8(q4u16, d20u8, d1u8); 1321ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q5u16 = vmlsl_u8(q5u16, d21u8, d1u8); 1322ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q6u16 = vmlsl_u8(q6u16, d22u8, d1u8); 1323ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1324ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q3u16 = vmlsl_u8(q3u16, d22u8, d4u8); 1325ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q4u16 = vmlsl_u8(q4u16, d23u8, d4u8); 1326ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q5u16 = vmlsl_u8(q5u16, d24u8, d4u8); 1327ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q6u16 = vmlsl_u8(q6u16, d25u8, d4u8); 1328ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1329ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q3u16 = vmlal_u8(q3u16, d20u8, d2u8); 1330ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q4u16 = vmlal_u8(q4u16, d21u8, d2u8); 1331ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q5u16 = vmlal_u8(q5u16, d22u8, d2u8); 1332ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q6u16 = vmlal_u8(q6u16, d23u8, d2u8); 1333ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1334ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q3u16 = vmlal_u8(q3u16, d23u8, d5u8); 1335ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q4u16 = vmlal_u8(q4u16, d24u8, d5u8); 1336ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q5u16 = vmlal_u8(q5u16, d25u8, d5u8); 1337ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q6u16 = vmlal_u8(q6u16, d26u8, d5u8); 1338ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1339ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q7u16 = vmull_u8(d21u8, d3u8); 1340ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q8u16 = vmull_u8(d22u8, d3u8); 1341ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q9u16 = vmull_u8(d23u8, d3u8); 1342ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q10u16 = vmull_u8(d24u8, d3u8); 1343ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1344ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q3s16 = vreinterpretq_s16_u16(q3u16); 1345ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q4s16 = vreinterpretq_s16_u16(q4u16); 1346ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q5s16 = vreinterpretq_s16_u16(q5u16); 1347ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q6s16 = vreinterpretq_s16_u16(q6u16); 1348ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q7s16 = vreinterpretq_s16_u16(q7u16); 1349ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q8s16 = vreinterpretq_s16_u16(q8u16); 1350ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q9s16 = vreinterpretq_s16_u16(q9u16); 1351ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q10s16 = vreinterpretq_s16_u16(q10u16); 1352ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1353ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q7s16 = vqaddq_s16(q7s16, q3s16); 1354ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q8s16 = vqaddq_s16(q8s16, q4s16); 1355ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q9s16 = vqaddq_s16(q9s16, q5s16); 1356ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian q10s16 = vqaddq_s16(q10s16, q6s16); 1357ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1358ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d6u8 = vqrshrun_n_s16(q7s16, 7); 1359ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d7u8 = vqrshrun_n_s16(q8s16, 7); 1360ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d8u8 = vqrshrun_n_s16(q9s16, 7); 1361ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d9u8 = vqrshrun_n_s16(q10s16, 7); 1362ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1363ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d18u8 = d22u8; 1364ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d19u8 = d23u8; 1365ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d20u8 = d24u8; 1366ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d21u8 = d25u8; 1367ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian d22u8 = d26u8; 1368ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 13697bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1_u8(dst, d6u8); 13707bc9febe8749e98a3812a0dc4380ceae75c29450Johann dst += dst_pitch; 13717bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1_u8(dst, d7u8); 13727bc9febe8749e98a3812a0dc4380ceae75c29450Johann dst += dst_pitch; 13737bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1_u8(dst, d8u8); 13747bc9febe8749e98a3812a0dc4380ceae75c29450Johann dst += dst_pitch; 13757bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1_u8(dst, d9u8); 13767bc9febe8749e98a3812a0dc4380ceae75c29450Johann dst += dst_pitch; 13777bc9febe8749e98a3812a0dc4380ceae75c29450Johann } 1378ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian } 13797bc9febe8749e98a3812a0dc4380ceae75c29450Johann return; 13807bc9febe8749e98a3812a0dc4380ceae75c29450Johann } 13817bc9febe8749e98a3812a0dc4380ceae75c29450Johann 13827bc9febe8749e98a3812a0dc4380ceae75c29450Johann // load first_pass filter 13837bc9febe8749e98a3812a0dc4380ceae75c29450Johann dtmps8 = vld1_s8(vp8_sub_pel_filters[xoffset]); 13847bc9febe8749e98a3812a0dc4380ceae75c29450Johann d0s8 = vdup_lane_s8(dtmps8, 0); 13857bc9febe8749e98a3812a0dc4380ceae75c29450Johann d1s8 = vdup_lane_s8(dtmps8, 1); 13867bc9febe8749e98a3812a0dc4380ceae75c29450Johann d2s8 = vdup_lane_s8(dtmps8, 2); 13877bc9febe8749e98a3812a0dc4380ceae75c29450Johann d3s8 = vdup_lane_s8(dtmps8, 3); 13887bc9febe8749e98a3812a0dc4380ceae75c29450Johann d4s8 = vdup_lane_s8(dtmps8, 4); 13897bc9febe8749e98a3812a0dc4380ceae75c29450Johann d5s8 = vdup_lane_s8(dtmps8, 5); 13907bc9febe8749e98a3812a0dc4380ceae75c29450Johann d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); 13917bc9febe8749e98a3812a0dc4380ceae75c29450Johann d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); 13927bc9febe8749e98a3812a0dc4380ceae75c29450Johann d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); 13937bc9febe8749e98a3812a0dc4380ceae75c29450Johann d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); 13947bc9febe8749e98a3812a0dc4380ceae75c29450Johann d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); 13957bc9febe8749e98a3812a0dc4380ceae75c29450Johann d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); 13967bc9febe8749e98a3812a0dc4380ceae75c29450Johann 13977bc9febe8749e98a3812a0dc4380ceae75c29450Johann // First pass: output_height lines x output_width columns (9x4) 13987bc9febe8749e98a3812a0dc4380ceae75c29450Johann if (yoffset == 0) { // firstpass_filter4x4_only 13997bc9febe8749e98a3812a0dc4380ceae75c29450Johann src = src_ptr - 2; 14007bc9febe8749e98a3812a0dc4380ceae75c29450Johann dst = dst_ptr; 14017bc9febe8749e98a3812a0dc4380ceae75c29450Johann for (i = 0; i < 8; ++i) { 14027bc9febe8749e98a3812a0dc4380ceae75c29450Johann d6u8 = vld1_u8(src); 14037bc9febe8749e98a3812a0dc4380ceae75c29450Johann d7u8 = vld1_u8(src + 8); 14047bc9febe8749e98a3812a0dc4380ceae75c29450Johann d8u8 = vld1_u8(src + 16); 14057bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_pixels_per_line; 14067bc9febe8749e98a3812a0dc4380ceae75c29450Johann d9u8 = vld1_u8(src); 14077bc9febe8749e98a3812a0dc4380ceae75c29450Johann d10u8 = vld1_u8(src + 8); 14087bc9febe8749e98a3812a0dc4380ceae75c29450Johann d11u8 = vld1_u8(src + 16); 14097bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_pixels_per_line; 14107bc9febe8749e98a3812a0dc4380ceae75c29450Johann 14117bc9febe8749e98a3812a0dc4380ceae75c29450Johann __builtin_prefetch(src); 14127bc9febe8749e98a3812a0dc4380ceae75c29450Johann __builtin_prefetch(src + src_pixels_per_line); 14137bc9febe8749e98a3812a0dc4380ceae75c29450Johann 14147bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6u16 = vmull_u8(d6u8, d0u8); 14157bc9febe8749e98a3812a0dc4380ceae75c29450Johann q7u16 = vmull_u8(d7u8, d0u8); 14167bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8u16 = vmull_u8(d9u8, d0u8); 14177bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9u16 = vmull_u8(d10u8, d0u8); 14187bc9febe8749e98a3812a0dc4380ceae75c29450Johann 14197bc9febe8749e98a3812a0dc4380ceae75c29450Johann d20u8 = vext_u8(d6u8, d7u8, 1); 14207bc9febe8749e98a3812a0dc4380ceae75c29450Johann d21u8 = vext_u8(d9u8, d10u8, 1); 14217bc9febe8749e98a3812a0dc4380ceae75c29450Johann d22u8 = vext_u8(d7u8, d8u8, 1); 14227bc9febe8749e98a3812a0dc4380ceae75c29450Johann d23u8 = vext_u8(d10u8, d11u8, 1); 14237bc9febe8749e98a3812a0dc4380ceae75c29450Johann d24u8 = vext_u8(d6u8, d7u8, 4); 14247bc9febe8749e98a3812a0dc4380ceae75c29450Johann d25u8 = vext_u8(d9u8, d10u8, 4); 14257bc9febe8749e98a3812a0dc4380ceae75c29450Johann d26u8 = vext_u8(d7u8, d8u8, 4); 14267bc9febe8749e98a3812a0dc4380ceae75c29450Johann d27u8 = vext_u8(d10u8, d11u8, 4); 14277bc9febe8749e98a3812a0dc4380ceae75c29450Johann d28u8 = vext_u8(d6u8, d7u8, 5); 14287bc9febe8749e98a3812a0dc4380ceae75c29450Johann d29u8 = vext_u8(d9u8, d10u8, 5); 14297bc9febe8749e98a3812a0dc4380ceae75c29450Johann 14307bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6u16 = vmlsl_u8(q6u16, d20u8, d1u8); 14317bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8u16 = vmlsl_u8(q8u16, d21u8, d1u8); 14327bc9febe8749e98a3812a0dc4380ceae75c29450Johann q7u16 = vmlsl_u8(q7u16, d22u8, d1u8); 14337bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9u16 = vmlsl_u8(q9u16, d23u8, d1u8); 14347bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6u16 = vmlsl_u8(q6u16, d24u8, d4u8); 14357bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8u16 = vmlsl_u8(q8u16, d25u8, d4u8); 14367bc9febe8749e98a3812a0dc4380ceae75c29450Johann q7u16 = vmlsl_u8(q7u16, d26u8, d4u8); 14377bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9u16 = vmlsl_u8(q9u16, d27u8, d4u8); 14387bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6u16 = vmlal_u8(q6u16, d28u8, d5u8); 14397bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8u16 = vmlal_u8(q8u16, d29u8, d5u8); 14407bc9febe8749e98a3812a0dc4380ceae75c29450Johann 14417bc9febe8749e98a3812a0dc4380ceae75c29450Johann d20u8 = vext_u8(d7u8, d8u8, 5); 14427bc9febe8749e98a3812a0dc4380ceae75c29450Johann d21u8 = vext_u8(d10u8, d11u8, 5); 14437bc9febe8749e98a3812a0dc4380ceae75c29450Johann d22u8 = vext_u8(d6u8, d7u8, 2); 14447bc9febe8749e98a3812a0dc4380ceae75c29450Johann d23u8 = vext_u8(d9u8, d10u8, 2); 14457bc9febe8749e98a3812a0dc4380ceae75c29450Johann d24u8 = vext_u8(d7u8, d8u8, 2); 14467bc9febe8749e98a3812a0dc4380ceae75c29450Johann d25u8 = vext_u8(d10u8, d11u8, 2); 14477bc9febe8749e98a3812a0dc4380ceae75c29450Johann d26u8 = vext_u8(d6u8, d7u8, 3); 14487bc9febe8749e98a3812a0dc4380ceae75c29450Johann d27u8 = vext_u8(d9u8, d10u8, 3); 14497bc9febe8749e98a3812a0dc4380ceae75c29450Johann d28u8 = vext_u8(d7u8, d8u8, 3); 14507bc9febe8749e98a3812a0dc4380ceae75c29450Johann d29u8 = vext_u8(d10u8, d11u8, 3); 14517bc9febe8749e98a3812a0dc4380ceae75c29450Johann 14527bc9febe8749e98a3812a0dc4380ceae75c29450Johann q7u16 = vmlal_u8(q7u16, d20u8, d5u8); 14537bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9u16 = vmlal_u8(q9u16, d21u8, d5u8); 14547bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6u16 = vmlal_u8(q6u16, d22u8, d2u8); 14557bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8u16 = vmlal_u8(q8u16, d23u8, d2u8); 14567bc9febe8749e98a3812a0dc4380ceae75c29450Johann q7u16 = vmlal_u8(q7u16, d24u8, d2u8); 14577bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9u16 = vmlal_u8(q9u16, d25u8, d2u8); 14587bc9febe8749e98a3812a0dc4380ceae75c29450Johann 14597bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10u16 = vmull_u8(d26u8, d3u8); 14607bc9febe8749e98a3812a0dc4380ceae75c29450Johann q11u16 = vmull_u8(d27u8, d3u8); 14617bc9febe8749e98a3812a0dc4380ceae75c29450Johann q12u16 = vmull_u8(d28u8, d3u8); 14627bc9febe8749e98a3812a0dc4380ceae75c29450Johann q15u16 = vmull_u8(d29u8, d3u8); 14637bc9febe8749e98a3812a0dc4380ceae75c29450Johann 14647bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6s16 = vreinterpretq_s16_u16(q6u16); 14657bc9febe8749e98a3812a0dc4380ceae75c29450Johann q7s16 = vreinterpretq_s16_u16(q7u16); 14667bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8s16 = vreinterpretq_s16_u16(q8u16); 14677bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9s16 = vreinterpretq_s16_u16(q9u16); 14687bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10s16 = vreinterpretq_s16_u16(q10u16); 14697bc9febe8749e98a3812a0dc4380ceae75c29450Johann q11s16 = vreinterpretq_s16_u16(q11u16); 14707bc9febe8749e98a3812a0dc4380ceae75c29450Johann q12s16 = vreinterpretq_s16_u16(q12u16); 14717bc9febe8749e98a3812a0dc4380ceae75c29450Johann q15s16 = vreinterpretq_s16_u16(q15u16); 14727bc9febe8749e98a3812a0dc4380ceae75c29450Johann 14737bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6s16 = vqaddq_s16(q6s16, q10s16); 14747bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8s16 = vqaddq_s16(q8s16, q11s16); 14757bc9febe8749e98a3812a0dc4380ceae75c29450Johann q7s16 = vqaddq_s16(q7s16, q12s16); 14767bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9s16 = vqaddq_s16(q9s16, q15s16); 14777bc9febe8749e98a3812a0dc4380ceae75c29450Johann 14787bc9febe8749e98a3812a0dc4380ceae75c29450Johann d6u8 = vqrshrun_n_s16(q6s16, 7); 14797bc9febe8749e98a3812a0dc4380ceae75c29450Johann d7u8 = vqrshrun_n_s16(q7s16, 7); 14807bc9febe8749e98a3812a0dc4380ceae75c29450Johann d8u8 = vqrshrun_n_s16(q8s16, 7); 14817bc9febe8749e98a3812a0dc4380ceae75c29450Johann d9u8 = vqrshrun_n_s16(q9s16, 7); 14827bc9febe8749e98a3812a0dc4380ceae75c29450Johann 14837bc9febe8749e98a3812a0dc4380ceae75c29450Johann q3u8 = vcombine_u8(d6u8, d7u8); 14847bc9febe8749e98a3812a0dc4380ceae75c29450Johann q4u8 = vcombine_u8(d8u8, d9u8); 14857bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1q_u8(dst, q3u8); 14867bc9febe8749e98a3812a0dc4380ceae75c29450Johann dst += dst_pitch; 14877bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1q_u8(dst, q4u8); 14887bc9febe8749e98a3812a0dc4380ceae75c29450Johann dst += dst_pitch; 1489ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian } 14907bc9febe8749e98a3812a0dc4380ceae75c29450Johann return; 14917bc9febe8749e98a3812a0dc4380ceae75c29450Johann } 14927bc9febe8749e98a3812a0dc4380ceae75c29450Johann 14937bc9febe8749e98a3812a0dc4380ceae75c29450Johann src = src_ptr - 2 - src_pixels_per_line * 2; 14947bc9febe8749e98a3812a0dc4380ceae75c29450Johann tmpp = tmp; 14957bc9febe8749e98a3812a0dc4380ceae75c29450Johann for (i = 0; i < 7; ++i) { 14967bc9febe8749e98a3812a0dc4380ceae75c29450Johann d6u8 = vld1_u8(src); 14977bc9febe8749e98a3812a0dc4380ceae75c29450Johann d7u8 = vld1_u8(src + 8); 14987bc9febe8749e98a3812a0dc4380ceae75c29450Johann d8u8 = vld1_u8(src + 16); 14997bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_pixels_per_line; 15007bc9febe8749e98a3812a0dc4380ceae75c29450Johann d9u8 = vld1_u8(src); 15017bc9febe8749e98a3812a0dc4380ceae75c29450Johann d10u8 = vld1_u8(src + 8); 15027bc9febe8749e98a3812a0dc4380ceae75c29450Johann d11u8 = vld1_u8(src + 16); 15037bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_pixels_per_line; 15047bc9febe8749e98a3812a0dc4380ceae75c29450Johann d12u8 = vld1_u8(src); 15057bc9febe8749e98a3812a0dc4380ceae75c29450Johann d13u8 = vld1_u8(src + 8); 15067bc9febe8749e98a3812a0dc4380ceae75c29450Johann d14u8 = vld1_u8(src + 16); 15077bc9febe8749e98a3812a0dc4380ceae75c29450Johann src += src_pixels_per_line; 1508ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 15097bc9febe8749e98a3812a0dc4380ceae75c29450Johann __builtin_prefetch(src); 15107bc9febe8749e98a3812a0dc4380ceae75c29450Johann __builtin_prefetch(src + src_pixels_per_line); 15117bc9febe8749e98a3812a0dc4380ceae75c29450Johann __builtin_prefetch(src + src_pixels_per_line * 2); 15127bc9febe8749e98a3812a0dc4380ceae75c29450Johann 15137bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8u16 = vmull_u8(d6u8, d0u8); 15147bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9u16 = vmull_u8(d7u8, d0u8); 15157bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10u16 = vmull_u8(d9u8, d0u8); 15167bc9febe8749e98a3812a0dc4380ceae75c29450Johann q11u16 = vmull_u8(d10u8, d0u8); 15177bc9febe8749e98a3812a0dc4380ceae75c29450Johann q12u16 = vmull_u8(d12u8, d0u8); 15187bc9febe8749e98a3812a0dc4380ceae75c29450Johann q13u16 = vmull_u8(d13u8, d0u8); 15197bc9febe8749e98a3812a0dc4380ceae75c29450Johann 15207bc9febe8749e98a3812a0dc4380ceae75c29450Johann d28u8 = vext_u8(d6u8, d7u8, 1); 15217bc9febe8749e98a3812a0dc4380ceae75c29450Johann d29u8 = vext_u8(d9u8, d10u8, 1); 15227bc9febe8749e98a3812a0dc4380ceae75c29450Johann d30u8 = vext_u8(d12u8, d13u8, 1); 15237bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8u16 = vmlsl_u8(q8u16, d28u8, d1u8); 15247bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10u16 = vmlsl_u8(q10u16, d29u8, d1u8); 15257bc9febe8749e98a3812a0dc4380ceae75c29450Johann q12u16 = vmlsl_u8(q12u16, d30u8, d1u8); 15267bc9febe8749e98a3812a0dc4380ceae75c29450Johann d28u8 = vext_u8(d7u8, d8u8, 1); 15277bc9febe8749e98a3812a0dc4380ceae75c29450Johann d29u8 = vext_u8(d10u8, d11u8, 1); 15287bc9febe8749e98a3812a0dc4380ceae75c29450Johann d30u8 = vext_u8(d13u8, d14u8, 1); 15297bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9u16 = vmlsl_u8(q9u16, d28u8, d1u8); 15307bc9febe8749e98a3812a0dc4380ceae75c29450Johann q11u16 = vmlsl_u8(q11u16, d29u8, d1u8); 15317bc9febe8749e98a3812a0dc4380ceae75c29450Johann q13u16 = vmlsl_u8(q13u16, d30u8, d1u8); 1532ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 15337bc9febe8749e98a3812a0dc4380ceae75c29450Johann d28u8 = vext_u8(d6u8, d7u8, 4); 15347bc9febe8749e98a3812a0dc4380ceae75c29450Johann d29u8 = vext_u8(d9u8, d10u8, 4); 15357bc9febe8749e98a3812a0dc4380ceae75c29450Johann d30u8 = vext_u8(d12u8, d13u8, 4); 15367bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8u16 = vmlsl_u8(q8u16, d28u8, d4u8); 15377bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10u16 = vmlsl_u8(q10u16, d29u8, d4u8); 15387bc9febe8749e98a3812a0dc4380ceae75c29450Johann q12u16 = vmlsl_u8(q12u16, d30u8, d4u8); 15397bc9febe8749e98a3812a0dc4380ceae75c29450Johann d28u8 = vext_u8(d7u8, d8u8, 4); 15407bc9febe8749e98a3812a0dc4380ceae75c29450Johann d29u8 = vext_u8(d10u8, d11u8, 4); 15417bc9febe8749e98a3812a0dc4380ceae75c29450Johann d30u8 = vext_u8(d13u8, d14u8, 4); 15427bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9u16 = vmlsl_u8(q9u16, d28u8, d4u8); 15437bc9febe8749e98a3812a0dc4380ceae75c29450Johann q11u16 = vmlsl_u8(q11u16, d29u8, d4u8); 15447bc9febe8749e98a3812a0dc4380ceae75c29450Johann q13u16 = vmlsl_u8(q13u16, d30u8, d4u8); 1545ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 15467bc9febe8749e98a3812a0dc4380ceae75c29450Johann d28u8 = vext_u8(d6u8, d7u8, 5); 15477bc9febe8749e98a3812a0dc4380ceae75c29450Johann d29u8 = vext_u8(d9u8, d10u8, 5); 15487bc9febe8749e98a3812a0dc4380ceae75c29450Johann d30u8 = vext_u8(d12u8, d13u8, 5); 15497bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8u16 = vmlal_u8(q8u16, d28u8, d5u8); 15507bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10u16 = vmlal_u8(q10u16, d29u8, d5u8); 15517bc9febe8749e98a3812a0dc4380ceae75c29450Johann q12u16 = vmlal_u8(q12u16, d30u8, d5u8); 15527bc9febe8749e98a3812a0dc4380ceae75c29450Johann d28u8 = vext_u8(d7u8, d8u8, 5); 15537bc9febe8749e98a3812a0dc4380ceae75c29450Johann d29u8 = vext_u8(d10u8, d11u8, 5); 15547bc9febe8749e98a3812a0dc4380ceae75c29450Johann d30u8 = vext_u8(d13u8, d14u8, 5); 15557bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9u16 = vmlal_u8(q9u16, d28u8, d5u8); 15567bc9febe8749e98a3812a0dc4380ceae75c29450Johann q11u16 = vmlal_u8(q11u16, d29u8, d5u8); 15577bc9febe8749e98a3812a0dc4380ceae75c29450Johann q13u16 = vmlal_u8(q13u16, d30u8, d5u8); 1558ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 15597bc9febe8749e98a3812a0dc4380ceae75c29450Johann d28u8 = vext_u8(d6u8, d7u8, 2); 15607bc9febe8749e98a3812a0dc4380ceae75c29450Johann d29u8 = vext_u8(d9u8, d10u8, 2); 15617bc9febe8749e98a3812a0dc4380ceae75c29450Johann d30u8 = vext_u8(d12u8, d13u8, 2); 15627bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8u16 = vmlal_u8(q8u16, d28u8, d2u8); 15637bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10u16 = vmlal_u8(q10u16, d29u8, d2u8); 15647bc9febe8749e98a3812a0dc4380ceae75c29450Johann q12u16 = vmlal_u8(q12u16, d30u8, d2u8); 15657bc9febe8749e98a3812a0dc4380ceae75c29450Johann d28u8 = vext_u8(d7u8, d8u8, 2); 15667bc9febe8749e98a3812a0dc4380ceae75c29450Johann d29u8 = vext_u8(d10u8, d11u8, 2); 15677bc9febe8749e98a3812a0dc4380ceae75c29450Johann d30u8 = vext_u8(d13u8, d14u8, 2); 15687bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9u16 = vmlal_u8(q9u16, d28u8, d2u8); 15697bc9febe8749e98a3812a0dc4380ceae75c29450Johann q11u16 = vmlal_u8(q11u16, d29u8, d2u8); 15707bc9febe8749e98a3812a0dc4380ceae75c29450Johann q13u16 = vmlal_u8(q13u16, d30u8, d2u8); 15717bc9febe8749e98a3812a0dc4380ceae75c29450Johann 15727bc9febe8749e98a3812a0dc4380ceae75c29450Johann d28u8 = vext_u8(d6u8, d7u8, 3); 15737bc9febe8749e98a3812a0dc4380ceae75c29450Johann d29u8 = vext_u8(d9u8, d10u8, 3); 15747bc9febe8749e98a3812a0dc4380ceae75c29450Johann d30u8 = vext_u8(d12u8, d13u8, 3); 15757bc9febe8749e98a3812a0dc4380ceae75c29450Johann d15u8 = vext_u8(d7u8, d8u8, 3); 15767bc9febe8749e98a3812a0dc4380ceae75c29450Johann d31u8 = vext_u8(d10u8, d11u8, 3); 15777bc9febe8749e98a3812a0dc4380ceae75c29450Johann d6u8 = vext_u8(d13u8, d14u8, 3); 15787bc9febe8749e98a3812a0dc4380ceae75c29450Johann q4u16 = vmull_u8(d28u8, d3u8); 15797bc9febe8749e98a3812a0dc4380ceae75c29450Johann q5u16 = vmull_u8(d29u8, d3u8); 15807bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6u16 = vmull_u8(d30u8, d3u8); 15817bc9febe8749e98a3812a0dc4380ceae75c29450Johann q4s16 = vreinterpretq_s16_u16(q4u16); 15827bc9febe8749e98a3812a0dc4380ceae75c29450Johann q5s16 = vreinterpretq_s16_u16(q5u16); 15837bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6s16 = vreinterpretq_s16_u16(q6u16); 15847bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8s16 = vreinterpretq_s16_u16(q8u16); 15857bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10s16 = vreinterpretq_s16_u16(q10u16); 15867bc9febe8749e98a3812a0dc4380ceae75c29450Johann q12s16 = vreinterpretq_s16_u16(q12u16); 15877bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8s16 = vqaddq_s16(q8s16, q4s16); 15887bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10s16 = vqaddq_s16(q10s16, q5s16); 15897bc9febe8749e98a3812a0dc4380ceae75c29450Johann q12s16 = vqaddq_s16(q12s16, q6s16); 1590ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 15917bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6u16 = vmull_u8(d15u8, d3u8); 15927bc9febe8749e98a3812a0dc4380ceae75c29450Johann q7u16 = vmull_u8(d31u8, d3u8); 15937bc9febe8749e98a3812a0dc4380ceae75c29450Johann q3u16 = vmull_u8(d6u8, d3u8); 15947bc9febe8749e98a3812a0dc4380ceae75c29450Johann q3s16 = vreinterpretq_s16_u16(q3u16); 15957bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6s16 = vreinterpretq_s16_u16(q6u16); 15967bc9febe8749e98a3812a0dc4380ceae75c29450Johann q7s16 = vreinterpretq_s16_u16(q7u16); 15977bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9s16 = vreinterpretq_s16_u16(q9u16); 15987bc9febe8749e98a3812a0dc4380ceae75c29450Johann q11s16 = vreinterpretq_s16_u16(q11u16); 15997bc9febe8749e98a3812a0dc4380ceae75c29450Johann q13s16 = vreinterpretq_s16_u16(q13u16); 16007bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9s16 = vqaddq_s16(q9s16, q6s16); 16017bc9febe8749e98a3812a0dc4380ceae75c29450Johann q11s16 = vqaddq_s16(q11s16, q7s16); 16027bc9febe8749e98a3812a0dc4380ceae75c29450Johann q13s16 = vqaddq_s16(q13s16, q3s16); 16037bc9febe8749e98a3812a0dc4380ceae75c29450Johann 16047bc9febe8749e98a3812a0dc4380ceae75c29450Johann d6u8 = vqrshrun_n_s16(q8s16, 7); 16057bc9febe8749e98a3812a0dc4380ceae75c29450Johann d7u8 = vqrshrun_n_s16(q9s16, 7); 16067bc9febe8749e98a3812a0dc4380ceae75c29450Johann d8u8 = vqrshrun_n_s16(q10s16, 7); 16077bc9febe8749e98a3812a0dc4380ceae75c29450Johann d9u8 = vqrshrun_n_s16(q11s16, 7); 16087bc9febe8749e98a3812a0dc4380ceae75c29450Johann d10u8 = vqrshrun_n_s16(q12s16, 7); 16097bc9febe8749e98a3812a0dc4380ceae75c29450Johann d11u8 = vqrshrun_n_s16(q13s16, 7); 16107bc9febe8749e98a3812a0dc4380ceae75c29450Johann 16117bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1_u8(tmpp, d6u8); 16127bc9febe8749e98a3812a0dc4380ceae75c29450Johann tmpp += 8; 16137bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1_u8(tmpp, d7u8); 16147bc9febe8749e98a3812a0dc4380ceae75c29450Johann tmpp += 8; 16157bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1_u8(tmpp, d8u8); 16167bc9febe8749e98a3812a0dc4380ceae75c29450Johann tmpp += 8; 16177bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1_u8(tmpp, d9u8); 16187bc9febe8749e98a3812a0dc4380ceae75c29450Johann tmpp += 8; 16197bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1_u8(tmpp, d10u8); 16207bc9febe8749e98a3812a0dc4380ceae75c29450Johann tmpp += 8; 16217bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1_u8(tmpp, d11u8); 16227bc9febe8749e98a3812a0dc4380ceae75c29450Johann tmpp += 8; 16237bc9febe8749e98a3812a0dc4380ceae75c29450Johann } 16247bc9febe8749e98a3812a0dc4380ceae75c29450Johann 16257bc9febe8749e98a3812a0dc4380ceae75c29450Johann // Second pass: 16x16 16267bc9febe8749e98a3812a0dc4380ceae75c29450Johann dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]); 16277bc9febe8749e98a3812a0dc4380ceae75c29450Johann d0s8 = vdup_lane_s8(dtmps8, 0); 16287bc9febe8749e98a3812a0dc4380ceae75c29450Johann d1s8 = vdup_lane_s8(dtmps8, 1); 16297bc9febe8749e98a3812a0dc4380ceae75c29450Johann d2s8 = vdup_lane_s8(dtmps8, 2); 16307bc9febe8749e98a3812a0dc4380ceae75c29450Johann d3s8 = vdup_lane_s8(dtmps8, 3); 16317bc9febe8749e98a3812a0dc4380ceae75c29450Johann d4s8 = vdup_lane_s8(dtmps8, 4); 16327bc9febe8749e98a3812a0dc4380ceae75c29450Johann d5s8 = vdup_lane_s8(dtmps8, 5); 16337bc9febe8749e98a3812a0dc4380ceae75c29450Johann d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); 16347bc9febe8749e98a3812a0dc4380ceae75c29450Johann d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); 16357bc9febe8749e98a3812a0dc4380ceae75c29450Johann d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); 16367bc9febe8749e98a3812a0dc4380ceae75c29450Johann d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); 16377bc9febe8749e98a3812a0dc4380ceae75c29450Johann d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); 16387bc9febe8749e98a3812a0dc4380ceae75c29450Johann d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); 16397bc9febe8749e98a3812a0dc4380ceae75c29450Johann 16407bc9febe8749e98a3812a0dc4380ceae75c29450Johann for (i = 0; i < 2; ++i) { 16417bc9febe8749e98a3812a0dc4380ceae75c29450Johann dst = dst_ptr + 8 * i; 16427bc9febe8749e98a3812a0dc4380ceae75c29450Johann tmpp = tmp + 8 * i; 16437bc9febe8749e98a3812a0dc4380ceae75c29450Johann d18u8 = vld1_u8(tmpp); 16447bc9febe8749e98a3812a0dc4380ceae75c29450Johann tmpp += 16; 16457bc9febe8749e98a3812a0dc4380ceae75c29450Johann d19u8 = vld1_u8(tmpp); 16467bc9febe8749e98a3812a0dc4380ceae75c29450Johann tmpp += 16; 16477bc9febe8749e98a3812a0dc4380ceae75c29450Johann d20u8 = vld1_u8(tmpp); 16487bc9febe8749e98a3812a0dc4380ceae75c29450Johann tmpp += 16; 16497bc9febe8749e98a3812a0dc4380ceae75c29450Johann d21u8 = vld1_u8(tmpp); 16507bc9febe8749e98a3812a0dc4380ceae75c29450Johann tmpp += 16; 16517bc9febe8749e98a3812a0dc4380ceae75c29450Johann d22u8 = vld1_u8(tmpp); 16527bc9febe8749e98a3812a0dc4380ceae75c29450Johann tmpp += 16; 16537bc9febe8749e98a3812a0dc4380ceae75c29450Johann for (j = 0; j < 4; ++j) { 16547bc9febe8749e98a3812a0dc4380ceae75c29450Johann d23u8 = vld1_u8(tmpp); 16557bc9febe8749e98a3812a0dc4380ceae75c29450Johann tmpp += 16; 16567bc9febe8749e98a3812a0dc4380ceae75c29450Johann d24u8 = vld1_u8(tmpp); 16577bc9febe8749e98a3812a0dc4380ceae75c29450Johann tmpp += 16; 16587bc9febe8749e98a3812a0dc4380ceae75c29450Johann d25u8 = vld1_u8(tmpp); 16597bc9febe8749e98a3812a0dc4380ceae75c29450Johann tmpp += 16; 16607bc9febe8749e98a3812a0dc4380ceae75c29450Johann d26u8 = vld1_u8(tmpp); 16617bc9febe8749e98a3812a0dc4380ceae75c29450Johann tmpp += 16; 16627bc9febe8749e98a3812a0dc4380ceae75c29450Johann 16637bc9febe8749e98a3812a0dc4380ceae75c29450Johann q3u16 = vmull_u8(d18u8, d0u8); 16647bc9febe8749e98a3812a0dc4380ceae75c29450Johann q4u16 = vmull_u8(d19u8, d0u8); 16657bc9febe8749e98a3812a0dc4380ceae75c29450Johann q5u16 = vmull_u8(d20u8, d0u8); 16667bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6u16 = vmull_u8(d21u8, d0u8); 16677bc9febe8749e98a3812a0dc4380ceae75c29450Johann 16687bc9febe8749e98a3812a0dc4380ceae75c29450Johann q3u16 = vmlsl_u8(q3u16, d19u8, d1u8); 16697bc9febe8749e98a3812a0dc4380ceae75c29450Johann q4u16 = vmlsl_u8(q4u16, d20u8, d1u8); 16707bc9febe8749e98a3812a0dc4380ceae75c29450Johann q5u16 = vmlsl_u8(q5u16, d21u8, d1u8); 16717bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6u16 = vmlsl_u8(q6u16, d22u8, d1u8); 16727bc9febe8749e98a3812a0dc4380ceae75c29450Johann 16737bc9febe8749e98a3812a0dc4380ceae75c29450Johann q3u16 = vmlsl_u8(q3u16, d22u8, d4u8); 16747bc9febe8749e98a3812a0dc4380ceae75c29450Johann q4u16 = vmlsl_u8(q4u16, d23u8, d4u8); 16757bc9febe8749e98a3812a0dc4380ceae75c29450Johann q5u16 = vmlsl_u8(q5u16, d24u8, d4u8); 16767bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6u16 = vmlsl_u8(q6u16, d25u8, d4u8); 16777bc9febe8749e98a3812a0dc4380ceae75c29450Johann 16787bc9febe8749e98a3812a0dc4380ceae75c29450Johann q3u16 = vmlal_u8(q3u16, d20u8, d2u8); 16797bc9febe8749e98a3812a0dc4380ceae75c29450Johann q4u16 = vmlal_u8(q4u16, d21u8, d2u8); 16807bc9febe8749e98a3812a0dc4380ceae75c29450Johann q5u16 = vmlal_u8(q5u16, d22u8, d2u8); 16817bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6u16 = vmlal_u8(q6u16, d23u8, d2u8); 16827bc9febe8749e98a3812a0dc4380ceae75c29450Johann 16837bc9febe8749e98a3812a0dc4380ceae75c29450Johann q3u16 = vmlal_u8(q3u16, d23u8, d5u8); 16847bc9febe8749e98a3812a0dc4380ceae75c29450Johann q4u16 = vmlal_u8(q4u16, d24u8, d5u8); 16857bc9febe8749e98a3812a0dc4380ceae75c29450Johann q5u16 = vmlal_u8(q5u16, d25u8, d5u8); 16867bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6u16 = vmlal_u8(q6u16, d26u8, d5u8); 16877bc9febe8749e98a3812a0dc4380ceae75c29450Johann 16887bc9febe8749e98a3812a0dc4380ceae75c29450Johann q7u16 = vmull_u8(d21u8, d3u8); 16897bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8u16 = vmull_u8(d22u8, d3u8); 16907bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9u16 = vmull_u8(d23u8, d3u8); 16917bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10u16 = vmull_u8(d24u8, d3u8); 16927bc9febe8749e98a3812a0dc4380ceae75c29450Johann 16937bc9febe8749e98a3812a0dc4380ceae75c29450Johann q3s16 = vreinterpretq_s16_u16(q3u16); 16947bc9febe8749e98a3812a0dc4380ceae75c29450Johann q4s16 = vreinterpretq_s16_u16(q4u16); 16957bc9febe8749e98a3812a0dc4380ceae75c29450Johann q5s16 = vreinterpretq_s16_u16(q5u16); 16967bc9febe8749e98a3812a0dc4380ceae75c29450Johann q6s16 = vreinterpretq_s16_u16(q6u16); 16977bc9febe8749e98a3812a0dc4380ceae75c29450Johann q7s16 = vreinterpretq_s16_u16(q7u16); 16987bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8s16 = vreinterpretq_s16_u16(q8u16); 16997bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9s16 = vreinterpretq_s16_u16(q9u16); 17007bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10s16 = vreinterpretq_s16_u16(q10u16); 17017bc9febe8749e98a3812a0dc4380ceae75c29450Johann 17027bc9febe8749e98a3812a0dc4380ceae75c29450Johann q7s16 = vqaddq_s16(q7s16, q3s16); 17037bc9febe8749e98a3812a0dc4380ceae75c29450Johann q8s16 = vqaddq_s16(q8s16, q4s16); 17047bc9febe8749e98a3812a0dc4380ceae75c29450Johann q9s16 = vqaddq_s16(q9s16, q5s16); 17057bc9febe8749e98a3812a0dc4380ceae75c29450Johann q10s16 = vqaddq_s16(q10s16, q6s16); 17067bc9febe8749e98a3812a0dc4380ceae75c29450Johann 17077bc9febe8749e98a3812a0dc4380ceae75c29450Johann d6u8 = vqrshrun_n_s16(q7s16, 7); 17087bc9febe8749e98a3812a0dc4380ceae75c29450Johann d7u8 = vqrshrun_n_s16(q8s16, 7); 17097bc9febe8749e98a3812a0dc4380ceae75c29450Johann d8u8 = vqrshrun_n_s16(q9s16, 7); 17107bc9febe8749e98a3812a0dc4380ceae75c29450Johann d9u8 = vqrshrun_n_s16(q10s16, 7); 17117bc9febe8749e98a3812a0dc4380ceae75c29450Johann 17127bc9febe8749e98a3812a0dc4380ceae75c29450Johann d18u8 = d22u8; 17137bc9febe8749e98a3812a0dc4380ceae75c29450Johann d19u8 = d23u8; 17147bc9febe8749e98a3812a0dc4380ceae75c29450Johann d20u8 = d24u8; 17157bc9febe8749e98a3812a0dc4380ceae75c29450Johann d21u8 = d25u8; 17167bc9febe8749e98a3812a0dc4380ceae75c29450Johann d22u8 = d26u8; 17177bc9febe8749e98a3812a0dc4380ceae75c29450Johann 17187bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1_u8(dst, d6u8); 17197bc9febe8749e98a3812a0dc4380ceae75c29450Johann dst += dst_pitch; 17207bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1_u8(dst, d7u8); 17217bc9febe8749e98a3812a0dc4380ceae75c29450Johann dst += dst_pitch; 17227bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1_u8(dst, d8u8); 17237bc9febe8749e98a3812a0dc4380ceae75c29450Johann dst += dst_pitch; 17247bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1_u8(dst, d9u8); 17257bc9febe8749e98a3812a0dc4380ceae75c29450Johann dst += dst_pitch; 1726ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian } 17277bc9febe8749e98a3812a0dc4380ceae75c29450Johann } 17287bc9febe8749e98a3812a0dc4380ceae75c29450Johann return; 1729ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 1730