16b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org/* 26b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 36b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org * 46b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org * Use of this source code is governed by a BSD-style license 56b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org * that can be found in the LICENSE file in the root of the source 66b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org * tree. An additional intellectual property rights grant can be found 76b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org * in the file PATENTS. All contributing project authors may 86b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org * be found in the AUTHORS file in the root of the source tree. 96b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org */ 106b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org 116b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org/* This file contains WebRtcIsacfix_MatrixProduct1Neon() and 126b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org * WebRtcIsacfix_MatrixProduct2Neon() for ARM Neon platform. API's are in 136b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org * entropy_coding.c. Results are bit exact with the c code for 146b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org * generic platforms. 156b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org */ 166b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org 176b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org#include "entropy_coding.h" 186b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org 196b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org#include <arm_neon.h> 206b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org#include <assert.h> 216b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org#include <stddef.h> 226b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org 236b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org#include "signal_processing_library.h" 246b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org 256b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.orgvoid WebRtcIsacfix_MatrixProduct1Neon(const int16_t matrix0[], 266b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org const int32_t matrix1[], 276b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org int32_t matrix_product[], 286b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org const int matrix1_index_factor1, 296b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org const int matrix0_index_factor1, 306b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org const int matrix1_index_init_case, 316b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org const int matrix1_index_step, 326b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org const int matrix0_index_step, 336b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org const int inner_loop_count, 346b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org const int mid_loop_count, 356b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org const int shift) { 366b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org int j = 0, k = 0, n = 0; 376b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org int matrix1_index = 0, matrix0_index = 0, matrix_prod_index = 0; 386b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org int* matrix1_index_factor2 = &j; 396b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org int* matrix0_index_factor2 = &k; 406b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org if (matrix1_index_init_case != 0) { 416b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix1_index_factor2 = &k; 426b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix0_index_factor2 = &j; 436b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org } 446b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org int32x4_t shift32x4 = vdupq_n_s32(shift); 456b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org int32x2_t shift32x2 = vdup_n_s32(shift); 460c46af62e2cc171f324f4d7e518e34f88ce88a6dkma@webrtc.org int32x4_t sum_32x4 = vdupq_n_s32(0); 470c46af62e2cc171f324f4d7e518e34f88ce88a6dkma@webrtc.org int32x2_t sum_32x2 = vdup_n_s32(0); 486b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org 496b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org assert(inner_loop_count % 2 == 0); 506b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org assert(mid_loop_count % 2 == 0); 516b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org 526b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org if (matrix1_index_init_case != 0 && matrix1_index_factor1 == 1) { 536b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org for (j = 0; j < SUBFRAMES; j++) { 546b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix_prod_index = mid_loop_count * j; 556b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org for (k = 0; k < (mid_loop_count >> 2) << 2; k += 4) { 560c46af62e2cc171f324f4d7e518e34f88ce88a6dkma@webrtc.org sum_32x4 = veorq_s32(sum_32x4, sum_32x4); // Initialize to zeros. 576b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix1_index = k; 586b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix0_index = matrix0_index_factor1 * j; 596b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org for (n = 0; n < inner_loop_count; n++) { 606b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org int32x4_t matrix0_32x4 = 616b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org vdupq_n_s32((int32_t)(matrix0[matrix0_index]) << 15); 626b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org int32x4_t matrix1_32x4 = 636b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org vshlq_s32(vld1q_s32(&matrix1[matrix1_index]), shift32x4); 646b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org int32x4_t multi_32x4 = vqdmulhq_s32(matrix0_32x4, matrix1_32x4); 656b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org sum_32x4 = vqaddq_s32(sum_32x4, multi_32x4); 666b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix1_index += matrix1_index_step; 676b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix0_index += matrix0_index_step; 686b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org } 696b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org vst1q_s32(&matrix_product[matrix_prod_index], sum_32x4); 706b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix_prod_index += 4; 716b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org } 726b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org if (mid_loop_count % 4 > 1) { 730c46af62e2cc171f324f4d7e518e34f88ce88a6dkma@webrtc.org sum_32x2 = veor_s32(sum_32x2, sum_32x2); // Initialize to zeros. 746b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix1_index = k; 756b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org k += 2; 766b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix0_index = matrix0_index_factor1 * j; 776b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org for (n = 0; n < inner_loop_count; n++) { 786b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org int32x2_t matrix0_32x2 = 796b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org vdup_n_s32((int32_t)(matrix0[matrix0_index]) << 15); 806b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org int32x2_t matrix1_32x2 = 816b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org vshl_s32(vld1_s32(&matrix1[matrix1_index]), shift32x2); 826b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org int32x2_t multi_32x2 = vqdmulh_s32(matrix0_32x2, matrix1_32x2); 836b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org sum_32x2 = vqadd_s32(sum_32x2, multi_32x2); 846b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix1_index += matrix1_index_step; 856b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix0_index += matrix0_index_step; 866b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org } 876b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org vst1_s32(&matrix_product[matrix_prod_index], sum_32x2); 886b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix_prod_index += 2; 896b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org } 906b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org } 916b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org } 926b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org else if (matrix1_index_init_case == 0 && matrix0_index_factor1 == 1) { 930c46af62e2cc171f324f4d7e518e34f88ce88a6dkma@webrtc.org int32x2_t multi_32x2 = vdup_n_s32(0); 940c46af62e2cc171f324f4d7e518e34f88ce88a6dkma@webrtc.org int32x2_t matrix0_32x2 = vdup_n_s32(0); 956b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org for (j = 0; j < SUBFRAMES; j++) { 966b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix_prod_index = mid_loop_count * j; 976b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org for (k = 0; k < (mid_loop_count >> 2) << 2; k += 4) { 980c46af62e2cc171f324f4d7e518e34f88ce88a6dkma@webrtc.org sum_32x4 = veorq_s32(sum_32x4, sum_32x4); // Initialize to zeros. 996b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix1_index = matrix1_index_factor1 * j; 1006b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix0_index = k; 1016b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org for (n = 0; n < inner_loop_count; n++) { 1026b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org int32x4_t matrix1_32x4 = vdupq_n_s32(matrix1[matrix1_index] << shift); 1036b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org int32x4_t matrix0_32x4 = 1046b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org vshll_n_s16(vld1_s16(&matrix0[matrix0_index]), 15); 1056b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org int32x4_t multi_32x4 = vqdmulhq_s32(matrix0_32x4, matrix1_32x4); 1066b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org sum_32x4 = vqaddq_s32(sum_32x4, multi_32x4); 1076b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix1_index += matrix1_index_step; 1086b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix0_index += matrix0_index_step; 1096b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org } 1106b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org vst1q_s32(&matrix_product[matrix_prod_index], sum_32x4); 1116b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix_prod_index += 4; 1126b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org } 1136b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org if (mid_loop_count % 4 > 1) { 1140c46af62e2cc171f324f4d7e518e34f88ce88a6dkma@webrtc.org sum_32x2 = veor_s32(sum_32x2, sum_32x2); // Initialize to zeros. 1156b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix1_index = matrix1_index_factor1 * j; 1166b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix0_index = k; 1176b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org for (n = 0; n < inner_loop_count; n++) { 1186b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org int32x2_t matrix1_32x2 = vdup_n_s32(matrix1[matrix1_index] << shift); 1190c46af62e2cc171f324f4d7e518e34f88ce88a6dkma@webrtc.org matrix0_32x2 = 1206b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org vset_lane_s32((int32_t)matrix0[matrix0_index], matrix0_32x2, 0); 1216b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix0_32x2 = vset_lane_s32((int32_t)matrix0[matrix0_index + 1], 1226b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix0_32x2, 1); 1236b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix0_32x2 = vshl_n_s32(matrix0_32x2, 15); 1246b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org multi_32x2 = vqdmulh_s32(matrix1_32x2, matrix0_32x2); 1256b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org sum_32x2 = vqadd_s32(sum_32x2, multi_32x2); 1266b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix1_index += matrix1_index_step; 1276b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix0_index += matrix0_index_step; 1286b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org } 1296b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org vst1_s32(&matrix_product[matrix_prod_index], sum_32x2); 1306b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix_prod_index += 2; 1316b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org } 1326b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org } 1336b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org } 1346b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org else if (matrix1_index_init_case == 0 && 1356b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix1_index_step == 1 && 1366b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix0_index_step == 1) { 1370c46af62e2cc171f324f4d7e518e34f88ce88a6dkma@webrtc.org int32x2_t multi_32x2 = vdup_n_s32(0); 1380c46af62e2cc171f324f4d7e518e34f88ce88a6dkma@webrtc.org int32x2_t matrix0_32x2 = vdup_n_s32(0); 1396b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org for (j = 0; j < SUBFRAMES; j++) { 1406b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix_prod_index = mid_loop_count * j; 1416b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org for (k = 0; k < mid_loop_count; k++) { 1420c46af62e2cc171f324f4d7e518e34f88ce88a6dkma@webrtc.org sum_32x4 = veorq_s32(sum_32x4, sum_32x4); // Initialize to zeros. 1436b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix1_index = matrix1_index_factor1 * j; 1446b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix0_index = matrix0_index_factor1 * k; 1456b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org for (n = 0; n < (inner_loop_count >> 2) << 2; n += 4) { 1466b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org int32x4_t matrix1_32x4 = 1476b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org vshlq_s32(vld1q_s32(&matrix1[matrix1_index]), shift32x4); 1486b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org int32x4_t matrix0_32x4 = 1496b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org vshll_n_s16(vld1_s16(&matrix0[matrix0_index]), 15); 1506b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org int32x4_t multi_32x4 = vqdmulhq_s32(matrix0_32x4, matrix1_32x4); 1516b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org sum_32x4 = vqaddq_s32(sum_32x4, multi_32x4); 1526b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix1_index += 4; 1536b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix0_index += 4; 1546b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org } 1556b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org sum_32x2 = vqadd_s32(vget_low_s32(sum_32x4), vget_high_s32(sum_32x4)); 1566b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org if (inner_loop_count % 4 > 1) { 1576b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org int32x2_t matrix1_32x2 = 1586b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org vshl_s32(vld1_s32(&matrix1[matrix1_index]), shift32x2); 1590c46af62e2cc171f324f4d7e518e34f88ce88a6dkma@webrtc.org matrix0_32x2 = 1606b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org vset_lane_s32((int32_t)matrix0[matrix0_index], matrix0_32x2, 0); 1616b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix0_32x2 = vset_lane_s32((int32_t)matrix0[matrix0_index + 1], 1626b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix0_32x2, 1); 1636b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix0_32x2 = vshl_n_s32(matrix0_32x2, 15); 1646b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org multi_32x2 = vqdmulh_s32(matrix1_32x2, matrix0_32x2); 1656b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org sum_32x2 = vqadd_s32(sum_32x2, multi_32x2); 1666b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org } 1676b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org sum_32x2 = vpadd_s32(sum_32x2, sum_32x2); 1686b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org vst1_lane_s32(&matrix_product[matrix_prod_index], sum_32x2, 0); 1696b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix_prod_index++; 1706b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org } 1716b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org } 1726b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org } 1736b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org else { 1746b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org for (j = 0; j < SUBFRAMES; j++) { 1756b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix_prod_index = mid_loop_count * j; 1766b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org for (k=0; k < mid_loop_count; k++) { 1776b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org int32_t sum32 = 0; 1786b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix1_index = matrix1_index_factor1 * (*matrix1_index_factor2); 1796b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix0_index = matrix0_index_factor1 * (*matrix0_index_factor2); 1806b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org for (n = 0; n < inner_loop_count; n++) { 1816b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org sum32 += (WEBRTC_SPL_MUL_16_32_RSFT16(matrix0[matrix0_index], 1826b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix1[matrix1_index] << shift)); 1836b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix1_index += matrix1_index_step; 1846b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix0_index += matrix0_index_step; 1856b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org } 1866b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix_product[matrix_prod_index] = sum32; 1876b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix_prod_index++; 1886b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org } 1896b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org } 1906b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org } 1916b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org} 1926b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org 1936b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.orgvoid WebRtcIsacfix_MatrixProduct2Neon(const int16_t matrix0[], 1946b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org const int32_t matrix1[], 1956b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org int32_t matrix_product[], 1966b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org const int matrix0_index_factor, 1976b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org const int matrix0_index_step) { 1986b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org int j = 0, n = 0; 1996b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org int matrix1_index = 0, matrix0_index = 0, matrix_prod_index = 0; 2000c46af62e2cc171f324f4d7e518e34f88ce88a6dkma@webrtc.org int32x2_t sum_32x2 = vdup_n_s32(0); 2016b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org for (j = 0; j < SUBFRAMES; j++) { 2020c46af62e2cc171f324f4d7e518e34f88ce88a6dkma@webrtc.org sum_32x2 = veor_s32(sum_32x2, sum_32x2); // Initialize to zeros. 2036b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix1_index = 0; 2046b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix0_index = matrix0_index_factor * j; 2056b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org for (n = SUBFRAMES; n > 0; n--) { 2066b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org int32x2_t matrix0_32x2 = 2076b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org vdup_n_s32((int32_t)(matrix0[matrix0_index]) << 15); 2086b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org int32x2_t matrix1_32x2 = vld1_s32(&matrix1[matrix1_index]); 2096b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org int32x2_t multi_32x2 = vqdmulh_s32(matrix0_32x2, matrix1_32x2); 2106b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org sum_32x2 = vqadd_s32(sum_32x2, multi_32x2); 2116b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix1_index += 2; 2126b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix0_index += matrix0_index_step; 2136b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org } 2146b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org sum_32x2 = vshr_n_s32(sum_32x2, 3); 2156b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org vst1_s32(&matrix_product[matrix_prod_index], sum_32x2); 2166b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org matrix_prod_index += 2; 2176b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org } 2186b12f9704ef671d4ec2ab14c531c967373c8b137kma@webrtc.org} 219