18b92989c89bec8632aa47dc58dc162f199d62edcJames Zern/* 28b92989c89bec8632aa47dc58dc162f199d62edcJames Zern * Copyright (c) 2017 The WebM project authors. All Rights Reserved. 38b92989c89bec8632aa47dc58dc162f199d62edcJames Zern * 48b92989c89bec8632aa47dc58dc162f199d62edcJames Zern * Use of this source code is governed by a BSD-style license 58b92989c89bec8632aa47dc58dc162f199d62edcJames Zern * that can be found in the LICENSE file in the root of the source 68b92989c89bec8632aa47dc58dc162f199d62edcJames Zern * tree. An additional intellectual property rights grant can be found 78b92989c89bec8632aa47dc58dc162f199d62edcJames Zern * in the file PATENTS. All contributing project authors may 88b92989c89bec8632aa47dc58dc162f199d62edcJames Zern * be found in the AUTHORS file in the root of the source tree. 98b92989c89bec8632aa47dc58dc162f199d62edcJames Zern */ 108b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 118b92989c89bec8632aa47dc58dc162f199d62edcJames Zern#include <arm_neon.h> 128b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 138b92989c89bec8632aa47dc58dc162f199d62edcJames Zern#include "./vpx_config.h" 148b92989c89bec8632aa47dc58dc162f199d62edcJames Zern#include "./vpx_dsp_rtcd.h" 158b92989c89bec8632aa47dc58dc162f199d62edcJames Zern#include "vpx_dsp/arm/idct_neon.h" 168b92989c89bec8632aa47dc58dc162f199d62edcJames Zern#include "vpx_dsp/arm/transpose_neon.h" 178b92989c89bec8632aa47dc58dc162f199d62edcJames Zern#include "vpx_dsp/txfm_common.h" 188b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 198b92989c89bec8632aa47dc58dc162f199d62edcJames Zernstatic INLINE void load_8x8_s32_dual( 208b92989c89bec8632aa47dc58dc162f199d62edcJames Zern const tran_low_t *input, int32x4x2_t *const in0, int32x4x2_t *const in1, 218b92989c89bec8632aa47dc58dc162f199d62edcJames Zern int32x4x2_t *const in2, int32x4x2_t *const in3, int32x4x2_t *const in4, 228b92989c89bec8632aa47dc58dc162f199d62edcJames Zern int32x4x2_t *const in5, int32x4x2_t *const in6, int32x4x2_t *const in7) { 238b92989c89bec8632aa47dc58dc162f199d62edcJames Zern in0->val[0] = vld1q_s32(input); 248b92989c89bec8632aa47dc58dc162f199d62edcJames Zern in0->val[1] = vld1q_s32(input + 4); 258b92989c89bec8632aa47dc58dc162f199d62edcJames Zern input += 32; 268b92989c89bec8632aa47dc58dc162f199d62edcJames Zern in1->val[0] = vld1q_s32(input); 278b92989c89bec8632aa47dc58dc162f199d62edcJames Zern in1->val[1] = vld1q_s32(input + 4); 288b92989c89bec8632aa47dc58dc162f199d62edcJames Zern input += 32; 298b92989c89bec8632aa47dc58dc162f199d62edcJames Zern in2->val[0] = vld1q_s32(input); 308b92989c89bec8632aa47dc58dc162f199d62edcJames Zern in2->val[1] = vld1q_s32(input + 4); 318b92989c89bec8632aa47dc58dc162f199d62edcJames Zern input += 32; 328b92989c89bec8632aa47dc58dc162f199d62edcJames Zern in3->val[0] = vld1q_s32(input); 338b92989c89bec8632aa47dc58dc162f199d62edcJames Zern in3->val[1] = vld1q_s32(input + 4); 348b92989c89bec8632aa47dc58dc162f199d62edcJames Zern input += 32; 358b92989c89bec8632aa47dc58dc162f199d62edcJames Zern in4->val[0] = vld1q_s32(input); 368b92989c89bec8632aa47dc58dc162f199d62edcJames Zern in4->val[1] = vld1q_s32(input + 4); 378b92989c89bec8632aa47dc58dc162f199d62edcJames Zern input += 32; 388b92989c89bec8632aa47dc58dc162f199d62edcJames Zern in5->val[0] = vld1q_s32(input); 398b92989c89bec8632aa47dc58dc162f199d62edcJames Zern in5->val[1] = vld1q_s32(input + 4); 408b92989c89bec8632aa47dc58dc162f199d62edcJames Zern input += 32; 418b92989c89bec8632aa47dc58dc162f199d62edcJames Zern in6->val[0] = vld1q_s32(input); 428b92989c89bec8632aa47dc58dc162f199d62edcJames Zern in6->val[1] = vld1q_s32(input + 4); 438b92989c89bec8632aa47dc58dc162f199d62edcJames Zern input += 32; 448b92989c89bec8632aa47dc58dc162f199d62edcJames Zern in7->val[0] = vld1q_s32(input); 458b92989c89bec8632aa47dc58dc162f199d62edcJames Zern in7->val[1] = vld1q_s32(input + 4); 468b92989c89bec8632aa47dc58dc162f199d62edcJames Zern} 478b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 488b92989c89bec8632aa47dc58dc162f199d62edcJames Zernstatic INLINE void load_4x8_s32_dual(const tran_low_t *input, 498b92989c89bec8632aa47dc58dc162f199d62edcJames Zern int32x4_t *const in0, int32x4_t *const in1, 508b92989c89bec8632aa47dc58dc162f199d62edcJames Zern int32x4_t *const in2, int32x4_t *const in3, 518b92989c89bec8632aa47dc58dc162f199d62edcJames Zern int32x4_t *const in4, int32x4_t *const in5, 528b92989c89bec8632aa47dc58dc162f199d62edcJames Zern int32x4_t *const in6, 538b92989c89bec8632aa47dc58dc162f199d62edcJames Zern int32x4_t *const in7) { 548b92989c89bec8632aa47dc58dc162f199d62edcJames Zern *in0 = vld1q_s32(input); 558b92989c89bec8632aa47dc58dc162f199d62edcJames Zern input += 32; 568b92989c89bec8632aa47dc58dc162f199d62edcJames Zern *in1 = vld1q_s32(input); 578b92989c89bec8632aa47dc58dc162f199d62edcJames Zern input += 32; 588b92989c89bec8632aa47dc58dc162f199d62edcJames Zern *in2 = vld1q_s32(input); 598b92989c89bec8632aa47dc58dc162f199d62edcJames Zern input += 32; 608b92989c89bec8632aa47dc58dc162f199d62edcJames Zern *in3 = vld1q_s32(input); 618b92989c89bec8632aa47dc58dc162f199d62edcJames Zern input += 32; 628b92989c89bec8632aa47dc58dc162f199d62edcJames Zern *in4 = vld1q_s32(input); 638b92989c89bec8632aa47dc58dc162f199d62edcJames Zern input += 32; 648b92989c89bec8632aa47dc58dc162f199d62edcJames Zern *in5 = vld1q_s32(input); 658b92989c89bec8632aa47dc58dc162f199d62edcJames Zern input += 32; 668b92989c89bec8632aa47dc58dc162f199d62edcJames Zern *in6 = vld1q_s32(input); 678b92989c89bec8632aa47dc58dc162f199d62edcJames Zern input += 32; 688b92989c89bec8632aa47dc58dc162f199d62edcJames Zern *in7 = vld1q_s32(input); 698b92989c89bec8632aa47dc58dc162f199d62edcJames Zern} 708b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 718b92989c89bec8632aa47dc58dc162f199d62edcJames Zern// Only for the first pass of the _135_ variant. Since it only uses values from 728b92989c89bec8632aa47dc58dc162f199d62edcJames Zern// the top left 16x16 it can safely assume all the remaining values are 0 and 738b92989c89bec8632aa47dc58dc162f199d62edcJames Zern// skip an awful lot of calculations. In fact, only the first 12 columns make 748b92989c89bec8632aa47dc58dc162f199d62edcJames Zern// the cut. None of the elements in the 13th, 14th, 15th or 16th columns are 758b92989c89bec8632aa47dc58dc162f199d62edcJames Zern// used so it skips any calls to input[12|13|14|15] too. 768b92989c89bec8632aa47dc58dc162f199d62edcJames Zern// In C this does a single row of 32 for each call. Here it transposes the top 778b92989c89bec8632aa47dc58dc162f199d62edcJames Zern// left 12x8 to allow using SIMD. 788b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 798b92989c89bec8632aa47dc58dc162f199d62edcJames Zern// vp9/common/vp9_scan.c:vp9_default_iscan_32x32 arranges the first 135 non-zero 808b92989c89bec8632aa47dc58dc162f199d62edcJames Zern// coefficients as follows: 818b92989c89bec8632aa47dc58dc162f199d62edcJames Zern// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 828b92989c89bec8632aa47dc58dc162f199d62edcJames Zern// 0 0 2 5 10 17 25 38 47 62 83 101 121 838b92989c89bec8632aa47dc58dc162f199d62edcJames Zern// 1 1 4 8 15 22 30 45 58 74 92 112 133 848b92989c89bec8632aa47dc58dc162f199d62edcJames Zern// 2 3 7 12 18 28 36 52 64 82 102 118 858b92989c89bec8632aa47dc58dc162f199d62edcJames Zern// 3 6 11 16 23 31 43 60 73 90 109 126 868b92989c89bec8632aa47dc58dc162f199d62edcJames Zern// 4 9 14 19 29 37 50 65 78 98 116 134 878b92989c89bec8632aa47dc58dc162f199d62edcJames Zern// 5 13 20 26 35 44 54 72 85 105 123 888b92989c89bec8632aa47dc58dc162f199d62edcJames Zern// 6 21 27 33 42 53 63 80 94 113 132 898b92989c89bec8632aa47dc58dc162f199d62edcJames Zern// 7 24 32 39 48 57 71 88 104 120 908b92989c89bec8632aa47dc58dc162f199d62edcJames Zern// 8 34 40 46 56 68 81 96 111 130 918b92989c89bec8632aa47dc58dc162f199d62edcJames Zern// 9 41 49 55 67 77 91 107 124 928b92989c89bec8632aa47dc58dc162f199d62edcJames Zern// 10 51 59 66 76 89 99 119 131 938b92989c89bec8632aa47dc58dc162f199d62edcJames Zern// 11 61 69 75 87 100 114 129 948b92989c89bec8632aa47dc58dc162f199d62edcJames Zern// 12 70 79 86 97 108 122 958b92989c89bec8632aa47dc58dc162f199d62edcJames Zern// 13 84 93 103 110 125 968b92989c89bec8632aa47dc58dc162f199d62edcJames Zern// 14 98 106 115 127 978b92989c89bec8632aa47dc58dc162f199d62edcJames Zern// 15 117 128 988b92989c89bec8632aa47dc58dc162f199d62edcJames Zernstatic void vpx_highbd_idct32_12_neon(const tran_low_t *const input, 998b92989c89bec8632aa47dc58dc162f199d62edcJames Zern int32_t *output) { 1008b92989c89bec8632aa47dc58dc162f199d62edcJames Zern int32x4x2_t in[12], s1[32], s2[32], s3[32], s4[32], s5[32], s6[32], s7[32], 1018b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s8[32]; 1028b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 1038b92989c89bec8632aa47dc58dc162f199d62edcJames Zern load_8x8_s32_dual(input, &in[0], &in[1], &in[2], &in[3], &in[4], &in[5], 1048b92989c89bec8632aa47dc58dc162f199d62edcJames Zern &in[6], &in[7]); 1058b92989c89bec8632aa47dc58dc162f199d62edcJames Zern transpose_s32_8x8(&in[0], &in[1], &in[2], &in[3], &in[4], &in[5], &in[6], 1068b92989c89bec8632aa47dc58dc162f199d62edcJames Zern &in[7]); 1078b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 1088b92989c89bec8632aa47dc58dc162f199d62edcJames Zern load_4x8_s32_dual(input + 8, &in[8].val[0], &in[8].val[1], &in[9].val[0], 1098b92989c89bec8632aa47dc58dc162f199d62edcJames Zern &in[9].val[1], &in[10].val[0], &in[10].val[1], 1108b92989c89bec8632aa47dc58dc162f199d62edcJames Zern &in[11].val[0], &in[11].val[1]); 1118b92989c89bec8632aa47dc58dc162f199d62edcJames Zern transpose_s32_4x8(&in[8].val[0], &in[8].val[1], &in[9].val[0], &in[9].val[1], 1128b92989c89bec8632aa47dc58dc162f199d62edcJames Zern &in[10].val[0], &in[10].val[1], &in[11].val[0], 1138b92989c89bec8632aa47dc58dc162f199d62edcJames Zern &in[11].val[1]); 1148b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 1158b92989c89bec8632aa47dc58dc162f199d62edcJames Zern // stage 1 1168b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s1[16] = multiply_shift_and_narrow_s32_dual(in[1], cospi_31_64); 1178b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s1[31] = multiply_shift_and_narrow_s32_dual(in[1], cospi_1_64); 1188b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 1198b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s1[18] = multiply_shift_and_narrow_s32_dual(in[9], cospi_23_64); 1208b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s1[29] = multiply_shift_and_narrow_s32_dual(in[9], cospi_9_64); 1218b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 1228b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s1[19] = multiply_shift_and_narrow_s32_dual(in[7], -cospi_25_64); 1238b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s1[28] = multiply_shift_and_narrow_s32_dual(in[7], cospi_7_64); 1248b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 1258b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s1[20] = multiply_shift_and_narrow_s32_dual(in[5], cospi_27_64); 1268b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s1[27] = multiply_shift_and_narrow_s32_dual(in[5], cospi_5_64); 1278b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 1288b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s1[21] = multiply_shift_and_narrow_s32_dual(in[11], -cospi_21_64); 1298b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s1[26] = multiply_shift_and_narrow_s32_dual(in[11], cospi_11_64); 1308b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 1318b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s1[23] = multiply_shift_and_narrow_s32_dual(in[3], -cospi_29_64); 1328b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s1[24] = multiply_shift_and_narrow_s32_dual(in[3], cospi_3_64); 1338b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 1348b92989c89bec8632aa47dc58dc162f199d62edcJames Zern // stage 2 1358b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[8] = multiply_shift_and_narrow_s32_dual(in[2], cospi_30_64); 1368b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[15] = multiply_shift_and_narrow_s32_dual(in[2], cospi_2_64); 1378b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 1388b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[10] = multiply_shift_and_narrow_s32_dual(in[10], cospi_22_64); 1398b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[13] = multiply_shift_and_narrow_s32_dual(in[10], cospi_10_64); 1408b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 1418b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[11] = multiply_shift_and_narrow_s32_dual(in[6], -cospi_26_64); 1428b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[12] = multiply_shift_and_narrow_s32_dual(in[6], cospi_6_64); 1438b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 1448b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[18] = highbd_idct_sub_dual(s1[19], s1[18]); 1458b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[19] = highbd_idct_add_dual(s1[18], s1[19]); 1468b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[20] = highbd_idct_add_dual(s1[20], s1[21]); 1478b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[21] = highbd_idct_sub_dual(s1[20], s1[21]); 1488b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[26] = highbd_idct_sub_dual(s1[27], s1[26]); 1498b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[27] = highbd_idct_add_dual(s1[26], s1[27]); 1508b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[28] = highbd_idct_add_dual(s1[28], s1[29]); 1518b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[29] = highbd_idct_sub_dual(s1[28], s1[29]); 1528b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 1538b92989c89bec8632aa47dc58dc162f199d62edcJames Zern // stage 3 1548b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s3[4] = multiply_shift_and_narrow_s32_dual(in[4], cospi_28_64); 1558b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s3[7] = multiply_shift_and_narrow_s32_dual(in[4], cospi_4_64); 1568b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 1578b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s3[10] = highbd_idct_sub_dual(s2[11], s2[10]); 1588b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s3[11] = highbd_idct_add_dual(s2[10], s2[11]); 1598b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s3[12] = highbd_idct_add_dual(s2[12], s2[13]); 1608b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s3[13] = highbd_idct_sub_dual(s2[12], s2[13]); 1618b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 1628b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s3[17] = multiply_accumulate_shift_and_narrow_s32_dual(s1[16], -cospi_4_64, 1638b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s1[31], cospi_28_64); 1648b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s3[30] = multiply_accumulate_shift_and_narrow_s32_dual(s1[16], cospi_28_64, 1658b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s1[31], cospi_4_64); 1668b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 1678b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s3[18] = multiply_accumulate_shift_and_narrow_s32_dual(s2[18], -cospi_28_64, 1688b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[29], -cospi_4_64); 1698b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s3[29] = multiply_accumulate_shift_and_narrow_s32_dual(s2[18], -cospi_4_64, 1708b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[29], cospi_28_64); 1718b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 1728b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s3[21] = multiply_accumulate_shift_and_narrow_s32_dual(s2[21], -cospi_20_64, 1738b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[26], cospi_12_64); 1748b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s3[26] = multiply_accumulate_shift_and_narrow_s32_dual(s2[21], cospi_12_64, 1758b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[26], cospi_20_64); 1768b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 1778b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s3[22] = multiply_accumulate_shift_and_narrow_s32_dual(s1[23], -cospi_12_64, 1788b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s1[24], -cospi_20_64); 1798b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s3[25] = multiply_accumulate_shift_and_narrow_s32_dual(s1[23], -cospi_20_64, 1808b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s1[24], cospi_12_64); 1818b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 1828b92989c89bec8632aa47dc58dc162f199d62edcJames Zern // stage 4 1838b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[0] = multiply_shift_and_narrow_s32_dual(in[0], cospi_16_64); 1848b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[2] = multiply_shift_and_narrow_s32_dual(in[8], cospi_24_64); 1858b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[3] = multiply_shift_and_narrow_s32_dual(in[8], cospi_8_64); 1868b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 1878b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[9] = multiply_accumulate_shift_and_narrow_s32_dual(s2[8], -cospi_8_64, 1888b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[15], cospi_24_64); 1898b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[14] = multiply_accumulate_shift_and_narrow_s32_dual(s2[8], cospi_24_64, 1908b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[15], cospi_8_64); 1918b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 1928b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[10] = multiply_accumulate_shift_and_narrow_s32_dual(s3[10], -cospi_24_64, 1938b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s3[13], -cospi_8_64); 1948b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[13] = multiply_accumulate_shift_and_narrow_s32_dual(s3[10], -cospi_8_64, 1958b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s3[13], cospi_24_64); 1968b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 1978b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[16] = highbd_idct_add_dual(s1[16], s2[19]); 1988b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[17] = highbd_idct_add_dual(s3[17], s3[18]); 1998b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[18] = highbd_idct_sub_dual(s3[17], s3[18]); 2008b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[19] = highbd_idct_sub_dual(s1[16], s2[19]); 2018b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[20] = highbd_idct_sub_dual(s1[23], s2[20]); 2028b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[21] = highbd_idct_sub_dual(s3[22], s3[21]); 2038b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[22] = highbd_idct_add_dual(s3[21], s3[22]); 2048b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[23] = highbd_idct_add_dual(s2[20], s1[23]); 2058b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[24] = highbd_idct_add_dual(s1[24], s2[27]); 2068b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[25] = highbd_idct_add_dual(s3[25], s3[26]); 2078b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[26] = highbd_idct_sub_dual(s3[25], s3[26]); 2088b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[27] = highbd_idct_sub_dual(s1[24], s2[27]); 2098b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[28] = highbd_idct_sub_dual(s1[31], s2[28]); 2108b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[29] = highbd_idct_sub_dual(s3[30], s3[29]); 2118b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[30] = highbd_idct_add_dual(s3[29], s3[30]); 2128b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[31] = highbd_idct_add_dual(s2[28], s1[31]); 2138b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 2148b92989c89bec8632aa47dc58dc162f199d62edcJames Zern // stage 5 2158b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s5[0] = highbd_idct_add_dual(s4[0], s4[3]); 2168b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s5[1] = highbd_idct_add_dual(s4[0], s4[2]); 2178b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s5[2] = highbd_idct_sub_dual(s4[0], s4[2]); 2188b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s5[3] = highbd_idct_sub_dual(s4[0], s4[3]); 2198b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 2208b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s5[5] = sub_multiply_shift_and_narrow_s32_dual(s3[7], s3[4], cospi_16_64); 2218b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s5[6] = add_multiply_shift_and_narrow_s32_dual(s3[4], s3[7], cospi_16_64); 2228b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 2238b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s5[8] = highbd_idct_add_dual(s2[8], s3[11]); 2248b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s5[9] = highbd_idct_add_dual(s4[9], s4[10]); 2258b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s5[10] = highbd_idct_sub_dual(s4[9], s4[10]); 2268b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s5[11] = highbd_idct_sub_dual(s2[8], s3[11]); 2278b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s5[12] = highbd_idct_sub_dual(s2[15], s3[12]); 2288b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s5[13] = highbd_idct_sub_dual(s4[14], s4[13]); 2298b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s5[14] = highbd_idct_add_dual(s4[13], s4[14]); 2308b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s5[15] = highbd_idct_add_dual(s2[15], s3[12]); 2318b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 2328b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s5[18] = multiply_accumulate_shift_and_narrow_s32_dual(s4[18], -cospi_8_64, 2338b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[29], cospi_24_64); 2348b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s5[29] = multiply_accumulate_shift_and_narrow_s32_dual(s4[18], cospi_24_64, 2358b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[29], cospi_8_64); 2368b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 2378b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s5[19] = multiply_accumulate_shift_and_narrow_s32_dual(s4[19], -cospi_8_64, 2388b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[28], cospi_24_64); 2398b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s5[28] = multiply_accumulate_shift_and_narrow_s32_dual(s4[19], cospi_24_64, 2408b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[28], cospi_8_64); 2418b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 2428b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s5[20] = multiply_accumulate_shift_and_narrow_s32_dual(s4[20], -cospi_24_64, 2438b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[27], -cospi_8_64); 2448b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s5[27] = multiply_accumulate_shift_and_narrow_s32_dual(s4[20], -cospi_8_64, 2458b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[27], cospi_24_64); 2468b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 2478b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s5[21] = multiply_accumulate_shift_and_narrow_s32_dual(s4[21], -cospi_24_64, 2488b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[26], -cospi_8_64); 2498b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s5[26] = multiply_accumulate_shift_and_narrow_s32_dual(s4[21], -cospi_8_64, 2508b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[26], cospi_24_64); 2518b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 2528b92989c89bec8632aa47dc58dc162f199d62edcJames Zern // stage 6 2538b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[0] = highbd_idct_add_dual(s5[0], s3[7]); 2548b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[1] = highbd_idct_add_dual(s5[1], s5[6]); 2558b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[2] = highbd_idct_add_dual(s5[2], s5[5]); 2568b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[3] = highbd_idct_add_dual(s5[3], s3[4]); 2578b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[4] = highbd_idct_sub_dual(s5[3], s3[4]); 2588b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[5] = highbd_idct_sub_dual(s5[2], s5[5]); 2598b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[6] = highbd_idct_sub_dual(s5[1], s5[6]); 2608b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[7] = highbd_idct_sub_dual(s5[0], s3[7]); 2618b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 2628b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[10] = sub_multiply_shift_and_narrow_s32_dual(s5[13], s5[10], cospi_16_64); 2638b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[13] = add_multiply_shift_and_narrow_s32_dual(s5[10], s5[13], cospi_16_64); 2648b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 2658b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[11] = sub_multiply_shift_and_narrow_s32_dual(s5[12], s5[11], cospi_16_64); 2668b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[12] = add_multiply_shift_and_narrow_s32_dual(s5[11], s5[12], cospi_16_64); 2678b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 2688b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[16] = highbd_idct_add_dual(s4[16], s4[23]); 2698b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[17] = highbd_idct_add_dual(s4[17], s4[22]); 2708b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[18] = highbd_idct_add_dual(s5[18], s5[21]); 2718b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[19] = highbd_idct_add_dual(s5[19], s5[20]); 2728b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[20] = highbd_idct_sub_dual(s5[19], s5[20]); 2738b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[21] = highbd_idct_sub_dual(s5[18], s5[21]); 2748b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[22] = highbd_idct_sub_dual(s4[17], s4[22]); 2758b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[23] = highbd_idct_sub_dual(s4[16], s4[23]); 2768b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 2778b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[24] = highbd_idct_sub_dual(s4[31], s4[24]); 2788b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[25] = highbd_idct_sub_dual(s4[30], s4[25]); 2798b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[26] = highbd_idct_sub_dual(s5[29], s5[26]); 2808b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[27] = highbd_idct_sub_dual(s5[28], s5[27]); 2818b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[28] = highbd_idct_add_dual(s5[27], s5[28]); 2828b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[29] = highbd_idct_add_dual(s5[26], s5[29]); 2838b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[30] = highbd_idct_add_dual(s4[25], s4[30]); 2848b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[31] = highbd_idct_add_dual(s4[24], s4[31]); 2858b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 2868b92989c89bec8632aa47dc58dc162f199d62edcJames Zern // stage 7 2878b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[0] = highbd_idct_add_dual(s6[0], s5[15]); 2888b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[1] = highbd_idct_add_dual(s6[1], s5[14]); 2898b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[2] = highbd_idct_add_dual(s6[2], s6[13]); 2908b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[3] = highbd_idct_add_dual(s6[3], s6[12]); 2918b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[4] = highbd_idct_add_dual(s6[4], s6[11]); 2928b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[5] = highbd_idct_add_dual(s6[5], s6[10]); 2938b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[6] = highbd_idct_add_dual(s6[6], s5[9]); 2948b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[7] = highbd_idct_add_dual(s6[7], s5[8]); 2958b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[8] = highbd_idct_sub_dual(s6[7], s5[8]); 2968b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[9] = highbd_idct_sub_dual(s6[6], s5[9]); 2978b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[10] = highbd_idct_sub_dual(s6[5], s6[10]); 2988b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[11] = highbd_idct_sub_dual(s6[4], s6[11]); 2998b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[12] = highbd_idct_sub_dual(s6[3], s6[12]); 3008b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[13] = highbd_idct_sub_dual(s6[2], s6[13]); 3018b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[14] = highbd_idct_sub_dual(s6[1], s5[14]); 3028b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[15] = highbd_idct_sub_dual(s6[0], s5[15]); 3038b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 3048b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[20] = sub_multiply_shift_and_narrow_s32_dual(s6[27], s6[20], cospi_16_64); 3058b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[27] = add_multiply_shift_and_narrow_s32_dual(s6[20], s6[27], cospi_16_64); 3068b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 3078b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[21] = sub_multiply_shift_and_narrow_s32_dual(s6[26], s6[21], cospi_16_64); 3088b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[26] = add_multiply_shift_and_narrow_s32_dual(s6[21], s6[26], cospi_16_64); 3098b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 3108b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[22] = sub_multiply_shift_and_narrow_s32_dual(s6[25], s6[22], cospi_16_64); 3118b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[25] = add_multiply_shift_and_narrow_s32_dual(s6[22], s6[25], cospi_16_64); 3128b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 3138b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[23] = sub_multiply_shift_and_narrow_s32_dual(s6[24], s6[23], cospi_16_64); 3148b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[24] = add_multiply_shift_and_narrow_s32_dual(s6[23], s6[24], cospi_16_64); 3158b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 3168b92989c89bec8632aa47dc58dc162f199d62edcJames Zern // final stage 3178b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s8[0] = highbd_idct_add_dual(s7[0], s6[31]); 3188b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s8[1] = highbd_idct_add_dual(s7[1], s6[30]); 3198b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s8[2] = highbd_idct_add_dual(s7[2], s6[29]); 3208b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s8[3] = highbd_idct_add_dual(s7[3], s6[28]); 3218b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s8[4] = highbd_idct_add_dual(s7[4], s7[27]); 3228b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s8[5] = highbd_idct_add_dual(s7[5], s7[26]); 3238b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s8[6] = highbd_idct_add_dual(s7[6], s7[25]); 3248b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s8[7] = highbd_idct_add_dual(s7[7], s7[24]); 3258b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s8[8] = highbd_idct_add_dual(s7[8], s7[23]); 3268b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s8[9] = highbd_idct_add_dual(s7[9], s7[22]); 3278b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s8[10] = highbd_idct_add_dual(s7[10], s7[21]); 3288b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s8[11] = highbd_idct_add_dual(s7[11], s7[20]); 3298b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s8[12] = highbd_idct_add_dual(s7[12], s6[19]); 3308b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s8[13] = highbd_idct_add_dual(s7[13], s6[18]); 3318b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s8[14] = highbd_idct_add_dual(s7[14], s6[17]); 3328b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s8[15] = highbd_idct_add_dual(s7[15], s6[16]); 3338b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s8[16] = highbd_idct_sub_dual(s7[15], s6[16]); 3348b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s8[17] = highbd_idct_sub_dual(s7[14], s6[17]); 3358b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s8[18] = highbd_idct_sub_dual(s7[13], s6[18]); 3368b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s8[19] = highbd_idct_sub_dual(s7[12], s6[19]); 3378b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s8[20] = highbd_idct_sub_dual(s7[11], s7[20]); 3388b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s8[21] = highbd_idct_sub_dual(s7[10], s7[21]); 3398b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s8[22] = highbd_idct_sub_dual(s7[9], s7[22]); 3408b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s8[23] = highbd_idct_sub_dual(s7[8], s7[23]); 3418b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s8[24] = highbd_idct_sub_dual(s7[7], s7[24]); 3428b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s8[25] = highbd_idct_sub_dual(s7[6], s7[25]); 3438b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s8[26] = highbd_idct_sub_dual(s7[5], s7[26]); 3448b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s8[27] = highbd_idct_sub_dual(s7[4], s7[27]); 3458b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s8[28] = highbd_idct_sub_dual(s7[3], s6[28]); 3468b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s8[29] = highbd_idct_sub_dual(s7[2], s6[29]); 3478b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s8[30] = highbd_idct_sub_dual(s7[1], s6[30]); 3488b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s8[31] = highbd_idct_sub_dual(s7[0], s6[31]); 3498b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 3508b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 0, s8[0].val[0]); 3518b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 4, s8[0].val[1]); 3528b92989c89bec8632aa47dc58dc162f199d62edcJames Zern output += 16; 3538b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 0, s8[1].val[0]); 3548b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 4, s8[1].val[1]); 3558b92989c89bec8632aa47dc58dc162f199d62edcJames Zern output += 16; 3568b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 0, s8[2].val[0]); 3578b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 4, s8[2].val[1]); 3588b92989c89bec8632aa47dc58dc162f199d62edcJames Zern output += 16; 3598b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 0, s8[3].val[0]); 3608b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 4, s8[3].val[1]); 3618b92989c89bec8632aa47dc58dc162f199d62edcJames Zern output += 16; 3628b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 0, s8[4].val[0]); 3638b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 4, s8[4].val[1]); 3648b92989c89bec8632aa47dc58dc162f199d62edcJames Zern output += 16; 3658b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 0, s8[5].val[0]); 3668b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 4, s8[5].val[1]); 3678b92989c89bec8632aa47dc58dc162f199d62edcJames Zern output += 16; 3688b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 0, s8[6].val[0]); 3698b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 4, s8[6].val[1]); 3708b92989c89bec8632aa47dc58dc162f199d62edcJames Zern output += 16; 3718b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 0, s8[7].val[0]); 3728b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 4, s8[7].val[1]); 3738b92989c89bec8632aa47dc58dc162f199d62edcJames Zern output += 16; 3748b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 3758b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 0, s8[8].val[0]); 3768b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 4, s8[8].val[1]); 3778b92989c89bec8632aa47dc58dc162f199d62edcJames Zern output += 16; 3788b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 0, s8[9].val[0]); 3798b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 4, s8[9].val[1]); 3808b92989c89bec8632aa47dc58dc162f199d62edcJames Zern output += 16; 3818b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 0, s8[10].val[0]); 3828b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 4, s8[10].val[1]); 3838b92989c89bec8632aa47dc58dc162f199d62edcJames Zern output += 16; 3848b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 0, s8[11].val[0]); 3858b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 4, s8[11].val[1]); 3868b92989c89bec8632aa47dc58dc162f199d62edcJames Zern output += 16; 3878b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 0, s8[12].val[0]); 3888b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 4, s8[12].val[1]); 3898b92989c89bec8632aa47dc58dc162f199d62edcJames Zern output += 16; 3908b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 0, s8[13].val[0]); 3918b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 4, s8[13].val[1]); 3928b92989c89bec8632aa47dc58dc162f199d62edcJames Zern output += 16; 3938b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 0, s8[14].val[0]); 3948b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 4, s8[14].val[1]); 3958b92989c89bec8632aa47dc58dc162f199d62edcJames Zern output += 16; 3968b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 0, s8[15].val[0]); 3978b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 4, s8[15].val[1]); 3988b92989c89bec8632aa47dc58dc162f199d62edcJames Zern output += 16; 3998b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 4008b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 0, s8[16].val[0]); 4018b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 4, s8[16].val[1]); 4028b92989c89bec8632aa47dc58dc162f199d62edcJames Zern output += 16; 4038b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 0, s8[17].val[0]); 4048b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 4, s8[17].val[1]); 4058b92989c89bec8632aa47dc58dc162f199d62edcJames Zern output += 16; 4068b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 0, s8[18].val[0]); 4078b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 4, s8[18].val[1]); 4088b92989c89bec8632aa47dc58dc162f199d62edcJames Zern output += 16; 4098b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 0, s8[19].val[0]); 4108b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 4, s8[19].val[1]); 4118b92989c89bec8632aa47dc58dc162f199d62edcJames Zern output += 16; 4128b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 0, s8[20].val[0]); 4138b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 4, s8[20].val[1]); 4148b92989c89bec8632aa47dc58dc162f199d62edcJames Zern output += 16; 4158b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 0, s8[21].val[0]); 4168b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 4, s8[21].val[1]); 4178b92989c89bec8632aa47dc58dc162f199d62edcJames Zern output += 16; 4188b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 0, s8[22].val[0]); 4198b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 4, s8[22].val[1]); 4208b92989c89bec8632aa47dc58dc162f199d62edcJames Zern output += 16; 4218b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 0, s8[23].val[0]); 4228b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 4, s8[23].val[1]); 4238b92989c89bec8632aa47dc58dc162f199d62edcJames Zern output += 16; 4248b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 4258b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 0, s8[24].val[0]); 4268b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 4, s8[24].val[1]); 4278b92989c89bec8632aa47dc58dc162f199d62edcJames Zern output += 16; 4288b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 0, s8[25].val[0]); 4298b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 4, s8[25].val[1]); 4308b92989c89bec8632aa47dc58dc162f199d62edcJames Zern output += 16; 4318b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 0, s8[26].val[0]); 4328b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 4, s8[26].val[1]); 4338b92989c89bec8632aa47dc58dc162f199d62edcJames Zern output += 16; 4348b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 0, s8[27].val[0]); 4358b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 4, s8[27].val[1]); 4368b92989c89bec8632aa47dc58dc162f199d62edcJames Zern output += 16; 4378b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 0, s8[28].val[0]); 4388b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 4, s8[28].val[1]); 4398b92989c89bec8632aa47dc58dc162f199d62edcJames Zern output += 16; 4408b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 0, s8[29].val[0]); 4418b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 4, s8[29].val[1]); 4428b92989c89bec8632aa47dc58dc162f199d62edcJames Zern output += 16; 4438b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 0, s8[30].val[0]); 4448b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 4, s8[30].val[1]); 4458b92989c89bec8632aa47dc58dc162f199d62edcJames Zern output += 16; 4468b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 0, s8[31].val[0]); 4478b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vst1q_s32(output + 4, s8[31].val[1]); 4488b92989c89bec8632aa47dc58dc162f199d62edcJames Zern} 4498b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 4508b92989c89bec8632aa47dc58dc162f199d62edcJames Zernstatic void vpx_highbd_idct32_16_neon(const int32_t *const input, 4518b92989c89bec8632aa47dc58dc162f199d62edcJames Zern uint16_t *const output, const int stride, 4528b92989c89bec8632aa47dc58dc162f199d62edcJames Zern const int bd) { 4538b92989c89bec8632aa47dc58dc162f199d62edcJames Zern int32x4x2_t in[16], s1[32], s2[32], s3[32], s4[32], s5[32], s6[32], s7[32], 4548b92989c89bec8632aa47dc58dc162f199d62edcJames Zern out[32]; 4558b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 4568b92989c89bec8632aa47dc58dc162f199d62edcJames Zern load_and_transpose_s32_8x8(input, 16, &in[0], &in[1], &in[2], &in[3], &in[4], 4578b92989c89bec8632aa47dc58dc162f199d62edcJames Zern &in[5], &in[6], &in[7]); 4588b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 4598b92989c89bec8632aa47dc58dc162f199d62edcJames Zern load_and_transpose_s32_8x8(input + 8, 16, &in[8], &in[9], &in[10], &in[11], 4608b92989c89bec8632aa47dc58dc162f199d62edcJames Zern &in[12], &in[13], &in[14], &in[15]); 4618b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 4628b92989c89bec8632aa47dc58dc162f199d62edcJames Zern // stage 1 4638b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s1[16] = multiply_shift_and_narrow_s32_dual(in[1], cospi_31_64); 4648b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s1[31] = multiply_shift_and_narrow_s32_dual(in[1], cospi_1_64); 4658b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 4668b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s1[17] = multiply_shift_and_narrow_s32_dual(in[15], -cospi_17_64); 4678b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s1[30] = multiply_shift_and_narrow_s32_dual(in[15], cospi_15_64); 4688b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 4698b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s1[18] = multiply_shift_and_narrow_s32_dual(in[9], cospi_23_64); 4708b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s1[29] = multiply_shift_and_narrow_s32_dual(in[9], cospi_9_64); 4718b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 4728b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s1[19] = multiply_shift_and_narrow_s32_dual(in[7], -cospi_25_64); 4738b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s1[28] = multiply_shift_and_narrow_s32_dual(in[7], cospi_7_64); 4748b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 4758b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s1[20] = multiply_shift_and_narrow_s32_dual(in[5], cospi_27_64); 4768b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s1[27] = multiply_shift_and_narrow_s32_dual(in[5], cospi_5_64); 4778b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 4788b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s1[21] = multiply_shift_and_narrow_s32_dual(in[11], -cospi_21_64); 4798b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s1[26] = multiply_shift_and_narrow_s32_dual(in[11], cospi_11_64); 4808b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 4818b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s1[22] = multiply_shift_and_narrow_s32_dual(in[13], cospi_19_64); 4828b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s1[25] = multiply_shift_and_narrow_s32_dual(in[13], cospi_13_64); 4838b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 4848b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s1[23] = multiply_shift_and_narrow_s32_dual(in[3], -cospi_29_64); 4858b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s1[24] = multiply_shift_and_narrow_s32_dual(in[3], cospi_3_64); 4868b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 4878b92989c89bec8632aa47dc58dc162f199d62edcJames Zern // stage 2 4888b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[8] = multiply_shift_and_narrow_s32_dual(in[2], cospi_30_64); 4898b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[15] = multiply_shift_and_narrow_s32_dual(in[2], cospi_2_64); 4908b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 4918b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[9] = multiply_shift_and_narrow_s32_dual(in[14], -cospi_18_64); 4928b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[14] = multiply_shift_and_narrow_s32_dual(in[14], cospi_14_64); 4938b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 4948b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[10] = multiply_shift_and_narrow_s32_dual(in[10], cospi_22_64); 4958b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[13] = multiply_shift_and_narrow_s32_dual(in[10], cospi_10_64); 4968b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 4978b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[11] = multiply_shift_and_narrow_s32_dual(in[6], -cospi_26_64); 4988b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[12] = multiply_shift_and_narrow_s32_dual(in[6], cospi_6_64); 4998b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 5008b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[16] = highbd_idct_add_dual(s1[16], s1[17]); 5018b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[17] = highbd_idct_sub_dual(s1[16], s1[17]); 5028b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[18] = highbd_idct_sub_dual(s1[19], s1[18]); 5038b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[19] = highbd_idct_add_dual(s1[18], s1[19]); 5048b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[20] = highbd_idct_add_dual(s1[20], s1[21]); 5058b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[21] = highbd_idct_sub_dual(s1[20], s1[21]); 5068b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[22] = highbd_idct_sub_dual(s1[23], s1[22]); 5078b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[23] = highbd_idct_add_dual(s1[22], s1[23]); 5088b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[24] = highbd_idct_add_dual(s1[24], s1[25]); 5098b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[25] = highbd_idct_sub_dual(s1[24], s1[25]); 5108b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[26] = highbd_idct_sub_dual(s1[27], s1[26]); 5118b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[27] = highbd_idct_add_dual(s1[26], s1[27]); 5128b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[28] = highbd_idct_add_dual(s1[28], s1[29]); 5138b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[29] = highbd_idct_sub_dual(s1[28], s1[29]); 5148b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[30] = highbd_idct_sub_dual(s1[31], s1[30]); 5158b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[31] = highbd_idct_add_dual(s1[30], s1[31]); 5168b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 5178b92989c89bec8632aa47dc58dc162f199d62edcJames Zern // stage 3 5188b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s3[4] = multiply_shift_and_narrow_s32_dual(in[4], cospi_28_64); 5198b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s3[7] = multiply_shift_and_narrow_s32_dual(in[4], cospi_4_64); 5208b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 5218b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s3[5] = multiply_shift_and_narrow_s32_dual(in[12], -cospi_20_64); 5228b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s3[6] = multiply_shift_and_narrow_s32_dual(in[12], cospi_12_64); 5238b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 5248b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s3[8] = highbd_idct_add_dual(s2[8], s2[9]); 5258b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s3[9] = highbd_idct_sub_dual(s2[8], s2[9]); 5268b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s3[10] = highbd_idct_sub_dual(s2[11], s2[10]); 5278b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s3[11] = highbd_idct_add_dual(s2[10], s2[11]); 5288b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s3[12] = highbd_idct_add_dual(s2[12], s2[13]); 5298b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s3[13] = highbd_idct_sub_dual(s2[12], s2[13]); 5308b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s3[14] = highbd_idct_sub_dual(s2[15], s2[14]); 5318b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s3[15] = highbd_idct_add_dual(s2[14], s2[15]); 5328b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 5338b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s3[17] = multiply_accumulate_shift_and_narrow_s32_dual(s2[17], -cospi_4_64, 5348b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[30], cospi_28_64); 5358b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s3[30] = multiply_accumulate_shift_and_narrow_s32_dual(s2[17], cospi_28_64, 5368b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[30], cospi_4_64); 5378b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 5388b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s3[18] = multiply_accumulate_shift_and_narrow_s32_dual(s2[18], -cospi_28_64, 5398b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[29], -cospi_4_64); 5408b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s3[29] = multiply_accumulate_shift_and_narrow_s32_dual(s2[18], -cospi_4_64, 5418b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[29], cospi_28_64); 5428b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 5438b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s3[21] = multiply_accumulate_shift_and_narrow_s32_dual(s2[21], -cospi_20_64, 5448b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[26], cospi_12_64); 5458b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s3[26] = multiply_accumulate_shift_and_narrow_s32_dual(s2[21], cospi_12_64, 5468b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[26], cospi_20_64); 5478b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 5488b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s3[22] = multiply_accumulate_shift_and_narrow_s32_dual(s2[22], -cospi_12_64, 5498b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[25], -cospi_20_64); 5508b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s3[25] = multiply_accumulate_shift_and_narrow_s32_dual(s2[22], -cospi_20_64, 5518b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s2[25], cospi_12_64); 5528b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 5538b92989c89bec8632aa47dc58dc162f199d62edcJames Zern // stage 4 5548b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[0] = multiply_shift_and_narrow_s32_dual(in[0], cospi_16_64); 5558b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[2] = multiply_shift_and_narrow_s32_dual(in[8], cospi_24_64); 5568b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[3] = multiply_shift_and_narrow_s32_dual(in[8], cospi_8_64); 5578b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 5588b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[4] = highbd_idct_add_dual(s3[4], s3[5]); 5598b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[5] = highbd_idct_sub_dual(s3[4], s3[5]); 5608b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[6] = highbd_idct_sub_dual(s3[7], s3[6]); 5618b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[7] = highbd_idct_add_dual(s3[6], s3[7]); 5628b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 5638b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[9] = multiply_accumulate_shift_and_narrow_s32_dual(s3[9], -cospi_8_64, 5648b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s3[14], cospi_24_64); 5658b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[14] = multiply_accumulate_shift_and_narrow_s32_dual(s3[9], cospi_24_64, 5668b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s3[14], cospi_8_64); 5678b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 5688b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[10] = multiply_accumulate_shift_and_narrow_s32_dual(s3[10], -cospi_24_64, 5698b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s3[13], -cospi_8_64); 5708b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[13] = multiply_accumulate_shift_and_narrow_s32_dual(s3[10], -cospi_8_64, 5718b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s3[13], cospi_24_64); 5728b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 5738b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[16] = highbd_idct_add_dual(s2[16], s2[19]); 5748b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[17] = highbd_idct_add_dual(s3[17], s3[18]); 5758b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[18] = highbd_idct_sub_dual(s3[17], s3[18]); 5768b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[19] = highbd_idct_sub_dual(s2[16], s2[19]); 5778b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[20] = highbd_idct_sub_dual(s2[23], s2[20]); 5788b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[21] = highbd_idct_sub_dual(s3[22], s3[21]); 5798b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[22] = highbd_idct_add_dual(s3[21], s3[22]); 5808b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[23] = highbd_idct_add_dual(s2[20], s2[23]); 5818b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[24] = highbd_idct_add_dual(s2[24], s2[27]); 5828b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[25] = highbd_idct_add_dual(s3[25], s3[26]); 5838b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[26] = highbd_idct_sub_dual(s3[25], s3[26]); 5848b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[27] = highbd_idct_sub_dual(s2[24], s2[27]); 5858b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[28] = highbd_idct_sub_dual(s2[31], s2[28]); 5868b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[29] = highbd_idct_sub_dual(s3[30], s3[29]); 5878b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[30] = highbd_idct_add_dual(s3[29], s3[30]); 5888b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[31] = highbd_idct_add_dual(s2[28], s2[31]); 5898b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 5908b92989c89bec8632aa47dc58dc162f199d62edcJames Zern // stage 5 5918b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s5[0] = highbd_idct_add_dual(s4[0], s4[3]); 5928b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s5[1] = highbd_idct_add_dual(s4[0], s4[2]); 5938b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s5[2] = highbd_idct_sub_dual(s4[0], s4[2]); 5948b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s5[3] = highbd_idct_sub_dual(s4[0], s4[3]); 5958b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 5968b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s5[5] = sub_multiply_shift_and_narrow_s32_dual(s4[6], s4[5], cospi_16_64); 5978b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s5[6] = add_multiply_shift_and_narrow_s32_dual(s4[5], s4[6], cospi_16_64); 5988b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 5998b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s5[8] = highbd_idct_add_dual(s3[8], s3[11]); 6008b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s5[9] = highbd_idct_add_dual(s4[9], s4[10]); 6018b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s5[10] = highbd_idct_sub_dual(s4[9], s4[10]); 6028b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s5[11] = highbd_idct_sub_dual(s3[8], s3[11]); 6038b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s5[12] = highbd_idct_sub_dual(s3[15], s3[12]); 6048b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s5[13] = highbd_idct_sub_dual(s4[14], s4[13]); 6058b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s5[14] = highbd_idct_add_dual(s4[13], s4[14]); 6068b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s5[15] = highbd_idct_add_dual(s3[15], s3[12]); 6078b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 6088b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s5[18] = multiply_accumulate_shift_and_narrow_s32_dual(s4[18], -cospi_8_64, 6098b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[29], cospi_24_64); 6108b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s5[29] = multiply_accumulate_shift_and_narrow_s32_dual(s4[18], cospi_24_64, 6118b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[29], cospi_8_64); 6128b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 6138b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s5[19] = multiply_accumulate_shift_and_narrow_s32_dual(s4[19], -cospi_8_64, 6148b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[28], cospi_24_64); 6158b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s5[28] = multiply_accumulate_shift_and_narrow_s32_dual(s4[19], cospi_24_64, 6168b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[28], cospi_8_64); 6178b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 6188b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s5[20] = multiply_accumulate_shift_and_narrow_s32_dual(s4[20], -cospi_24_64, 6198b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[27], -cospi_8_64); 6208b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s5[27] = multiply_accumulate_shift_and_narrow_s32_dual(s4[20], -cospi_8_64, 6218b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[27], cospi_24_64); 6228b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 6238b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s5[21] = multiply_accumulate_shift_and_narrow_s32_dual(s4[21], -cospi_24_64, 6248b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[26], -cospi_8_64); 6258b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s5[26] = multiply_accumulate_shift_and_narrow_s32_dual(s4[21], -cospi_8_64, 6268b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s4[26], cospi_24_64); 6278b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 6288b92989c89bec8632aa47dc58dc162f199d62edcJames Zern // stage 6 6298b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[0] = highbd_idct_add_dual(s5[0], s4[7]); 6308b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[1] = highbd_idct_add_dual(s5[1], s5[6]); 6318b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[2] = highbd_idct_add_dual(s5[2], s5[5]); 6328b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[3] = highbd_idct_add_dual(s5[3], s4[4]); 6338b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[4] = highbd_idct_sub_dual(s5[3], s4[4]); 6348b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[5] = highbd_idct_sub_dual(s5[2], s5[5]); 6358b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[6] = highbd_idct_sub_dual(s5[1], s5[6]); 6368b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[7] = highbd_idct_sub_dual(s5[0], s4[7]); 6378b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 6388b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[10] = sub_multiply_shift_and_narrow_s32_dual(s5[13], s5[10], cospi_16_64); 6398b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[13] = add_multiply_shift_and_narrow_s32_dual(s5[10], s5[13], cospi_16_64); 6408b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 6418b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[11] = sub_multiply_shift_and_narrow_s32_dual(s5[12], s5[11], cospi_16_64); 6428b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[12] = add_multiply_shift_and_narrow_s32_dual(s5[11], s5[12], cospi_16_64); 6438b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 6448b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[16] = highbd_idct_add_dual(s4[16], s4[23]); 6458b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[17] = highbd_idct_add_dual(s4[17], s4[22]); 6468b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[18] = highbd_idct_add_dual(s5[18], s5[21]); 6478b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[19] = highbd_idct_add_dual(s5[19], s5[20]); 6488b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[20] = highbd_idct_sub_dual(s5[19], s5[20]); 6498b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[21] = highbd_idct_sub_dual(s5[18], s5[21]); 6508b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[22] = highbd_idct_sub_dual(s4[17], s4[22]); 6518b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[23] = highbd_idct_sub_dual(s4[16], s4[23]); 6528b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[24] = highbd_idct_sub_dual(s4[31], s4[24]); 6538b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[25] = highbd_idct_sub_dual(s4[30], s4[25]); 6548b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[26] = highbd_idct_sub_dual(s5[29], s5[26]); 6558b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[27] = highbd_idct_sub_dual(s5[28], s5[27]); 6568b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[28] = highbd_idct_add_dual(s5[27], s5[28]); 6578b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[29] = highbd_idct_add_dual(s5[26], s5[29]); 6588b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[30] = highbd_idct_add_dual(s4[25], s4[30]); 6598b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s6[31] = highbd_idct_add_dual(s4[24], s4[31]); 6608b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 6618b92989c89bec8632aa47dc58dc162f199d62edcJames Zern // stage 7 6628b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[0] = highbd_idct_add_dual(s6[0], s5[15]); 6638b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[1] = highbd_idct_add_dual(s6[1], s5[14]); 6648b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[2] = highbd_idct_add_dual(s6[2], s6[13]); 6658b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[3] = highbd_idct_add_dual(s6[3], s6[12]); 6668b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[4] = highbd_idct_add_dual(s6[4], s6[11]); 6678b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[5] = highbd_idct_add_dual(s6[5], s6[10]); 6688b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[6] = highbd_idct_add_dual(s6[6], s5[9]); 6698b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[7] = highbd_idct_add_dual(s6[7], s5[8]); 6708b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[8] = highbd_idct_sub_dual(s6[7], s5[8]); 6718b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[9] = highbd_idct_sub_dual(s6[6], s5[9]); 6728b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[10] = highbd_idct_sub_dual(s6[5], s6[10]); 6738b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[11] = highbd_idct_sub_dual(s6[4], s6[11]); 6748b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[12] = highbd_idct_sub_dual(s6[3], s6[12]); 6758b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[13] = highbd_idct_sub_dual(s6[2], s6[13]); 6768b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[14] = highbd_idct_sub_dual(s6[1], s5[14]); 6778b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[15] = highbd_idct_sub_dual(s6[0], s5[15]); 6788b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 6798b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[20] = sub_multiply_shift_and_narrow_s32_dual(s6[27], s6[20], cospi_16_64); 6808b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[27] = add_multiply_shift_and_narrow_s32_dual(s6[20], s6[27], cospi_16_64); 6818b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 6828b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[21] = sub_multiply_shift_and_narrow_s32_dual(s6[26], s6[21], cospi_16_64); 6838b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[26] = add_multiply_shift_and_narrow_s32_dual(s6[21], s6[26], cospi_16_64); 6848b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 6858b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[22] = sub_multiply_shift_and_narrow_s32_dual(s6[25], s6[22], cospi_16_64); 6868b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[25] = add_multiply_shift_and_narrow_s32_dual(s6[22], s6[25], cospi_16_64); 6878b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 6888b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[23] = sub_multiply_shift_and_narrow_s32_dual(s6[24], s6[23], cospi_16_64); 6898b92989c89bec8632aa47dc58dc162f199d62edcJames Zern s7[24] = add_multiply_shift_and_narrow_s32_dual(s6[23], s6[24], cospi_16_64); 6908b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 6918b92989c89bec8632aa47dc58dc162f199d62edcJames Zern // final stage 6928b92989c89bec8632aa47dc58dc162f199d62edcJames Zern out[0] = highbd_idct_add_dual(s7[0], s6[31]); 6938b92989c89bec8632aa47dc58dc162f199d62edcJames Zern out[1] = highbd_idct_add_dual(s7[1], s6[30]); 6948b92989c89bec8632aa47dc58dc162f199d62edcJames Zern out[2] = highbd_idct_add_dual(s7[2], s6[29]); 6958b92989c89bec8632aa47dc58dc162f199d62edcJames Zern out[3] = highbd_idct_add_dual(s7[3], s6[28]); 6968b92989c89bec8632aa47dc58dc162f199d62edcJames Zern out[4] = highbd_idct_add_dual(s7[4], s7[27]); 6978b92989c89bec8632aa47dc58dc162f199d62edcJames Zern out[5] = highbd_idct_add_dual(s7[5], s7[26]); 6988b92989c89bec8632aa47dc58dc162f199d62edcJames Zern out[6] = highbd_idct_add_dual(s7[6], s7[25]); 6998b92989c89bec8632aa47dc58dc162f199d62edcJames Zern out[7] = highbd_idct_add_dual(s7[7], s7[24]); 7008b92989c89bec8632aa47dc58dc162f199d62edcJames Zern out[8] = highbd_idct_add_dual(s7[8], s7[23]); 7018b92989c89bec8632aa47dc58dc162f199d62edcJames Zern out[9] = highbd_idct_add_dual(s7[9], s7[22]); 7028b92989c89bec8632aa47dc58dc162f199d62edcJames Zern out[10] = highbd_idct_add_dual(s7[10], s7[21]); 7038b92989c89bec8632aa47dc58dc162f199d62edcJames Zern out[11] = highbd_idct_add_dual(s7[11], s7[20]); 7048b92989c89bec8632aa47dc58dc162f199d62edcJames Zern out[12] = highbd_idct_add_dual(s7[12], s6[19]); 7058b92989c89bec8632aa47dc58dc162f199d62edcJames Zern out[13] = highbd_idct_add_dual(s7[13], s6[18]); 7068b92989c89bec8632aa47dc58dc162f199d62edcJames Zern out[14] = highbd_idct_add_dual(s7[14], s6[17]); 7078b92989c89bec8632aa47dc58dc162f199d62edcJames Zern out[15] = highbd_idct_add_dual(s7[15], s6[16]); 7088b92989c89bec8632aa47dc58dc162f199d62edcJames Zern out[16] = highbd_idct_sub_dual(s7[15], s6[16]); 7098b92989c89bec8632aa47dc58dc162f199d62edcJames Zern out[17] = highbd_idct_sub_dual(s7[14], s6[17]); 7108b92989c89bec8632aa47dc58dc162f199d62edcJames Zern out[18] = highbd_idct_sub_dual(s7[13], s6[18]); 7118b92989c89bec8632aa47dc58dc162f199d62edcJames Zern out[19] = highbd_idct_sub_dual(s7[12], s6[19]); 7128b92989c89bec8632aa47dc58dc162f199d62edcJames Zern out[20] = highbd_idct_sub_dual(s7[11], s7[20]); 7138b92989c89bec8632aa47dc58dc162f199d62edcJames Zern out[21] = highbd_idct_sub_dual(s7[10], s7[21]); 7148b92989c89bec8632aa47dc58dc162f199d62edcJames Zern out[22] = highbd_idct_sub_dual(s7[9], s7[22]); 7158b92989c89bec8632aa47dc58dc162f199d62edcJames Zern out[23] = highbd_idct_sub_dual(s7[8], s7[23]); 7168b92989c89bec8632aa47dc58dc162f199d62edcJames Zern out[24] = highbd_idct_sub_dual(s7[7], s7[24]); 7178b92989c89bec8632aa47dc58dc162f199d62edcJames Zern out[25] = highbd_idct_sub_dual(s7[6], s7[25]); 7188b92989c89bec8632aa47dc58dc162f199d62edcJames Zern out[26] = highbd_idct_sub_dual(s7[5], s7[26]); 7198b92989c89bec8632aa47dc58dc162f199d62edcJames Zern out[27] = highbd_idct_sub_dual(s7[4], s7[27]); 7208b92989c89bec8632aa47dc58dc162f199d62edcJames Zern out[28] = highbd_idct_sub_dual(s7[3], s6[28]); 7218b92989c89bec8632aa47dc58dc162f199d62edcJames Zern out[29] = highbd_idct_sub_dual(s7[2], s6[29]); 7228b92989c89bec8632aa47dc58dc162f199d62edcJames Zern out[30] = highbd_idct_sub_dual(s7[1], s6[30]); 7238b92989c89bec8632aa47dc58dc162f199d62edcJames Zern out[31] = highbd_idct_sub_dual(s7[0], s6[31]); 7248b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 7258b92989c89bec8632aa47dc58dc162f199d62edcJames Zern highbd_idct16x16_add_store(out, output, stride, bd); 7268b92989c89bec8632aa47dc58dc162f199d62edcJames Zern highbd_idct16x16_add_store(out + 16, output + 16 * stride, stride, bd); 7278b92989c89bec8632aa47dc58dc162f199d62edcJames Zern} 7288b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 7298b92989c89bec8632aa47dc58dc162f199d62edcJames Zernvoid vpx_highbd_idct32x32_135_add_neon(const tran_low_t *input, uint16_t *dest, 7308b92989c89bec8632aa47dc58dc162f199d62edcJames Zern int stride, int bd) { 7318b92989c89bec8632aa47dc58dc162f199d62edcJames Zern int i; 7328b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 7338b92989c89bec8632aa47dc58dc162f199d62edcJames Zern if (bd == 8) { 7348b92989c89bec8632aa47dc58dc162f199d62edcJames Zern int16_t temp[32 * 16]; 7358b92989c89bec8632aa47dc58dc162f199d62edcJames Zern int16_t *t = temp; 7368b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vpx_idct32_12_neon(input, temp); 7378b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vpx_idct32_12_neon(input + 32 * 8, temp + 8); 7388b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 7398b92989c89bec8632aa47dc58dc162f199d62edcJames Zern for (i = 0; i < 32; i += 8) { 7408b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vpx_idct32_16_neon(t, dest, stride, 1); 7418b92989c89bec8632aa47dc58dc162f199d62edcJames Zern t += (16 * 8); 7428b92989c89bec8632aa47dc58dc162f199d62edcJames Zern dest += 8; 7438b92989c89bec8632aa47dc58dc162f199d62edcJames Zern } 7448b92989c89bec8632aa47dc58dc162f199d62edcJames Zern } else { 7458b92989c89bec8632aa47dc58dc162f199d62edcJames Zern int32_t temp[32 * 16]; 7468b92989c89bec8632aa47dc58dc162f199d62edcJames Zern int32_t *t = temp; 7478b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vpx_highbd_idct32_12_neon(input, temp); 7488b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vpx_highbd_idct32_12_neon(input + 32 * 8, temp + 8); 7498b92989c89bec8632aa47dc58dc162f199d62edcJames Zern 7508b92989c89bec8632aa47dc58dc162f199d62edcJames Zern for (i = 0; i < 32; i += 8) { 7518b92989c89bec8632aa47dc58dc162f199d62edcJames Zern vpx_highbd_idct32_16_neon(t, dest, stride, bd); 7528b92989c89bec8632aa47dc58dc162f199d62edcJames Zern t += (16 * 8); 7538b92989c89bec8632aa47dc58dc162f199d62edcJames Zern dest += 8; 7548b92989c89bec8632aa47dc58dc162f199d62edcJames Zern } 7558b92989c89bec8632aa47dc58dc162f199d62edcJames Zern } 7568b92989c89bec8632aa47dc58dc162f199d62edcJames Zern} 757