1b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org/* 2b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org * 4b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org * Use of this source code is governed by a BSD-style license 5b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org * that can be found in the LICENSE file in the root of the source 6b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org * tree. An additional intellectual property rights grant can be found 7b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org * in the file PATENTS. All contributing project authors may 8b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org * be found in the AUTHORS file in the root of the source tree. 9b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org */ 10b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 119fb16139d917ba32720e031d3c871987d418668fpbos@webrtc.org#include "webrtc/modules/audio_processing/ns/nsx_core.h" 12b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 13b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org#include <arm_neon.h> 14b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org#include <assert.h> 15b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 16267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org// Constants to compensate for shifting signal log(2^shifts). 173f6d5e0bded85b8b0d055da8fa49e8d7137fe8edpbos@webrtc.orgconst int16_t WebRtcNsx_kLogTable[9] = { 18267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org 0, 177, 355, 532, 710, 887, 1065, 1242, 1420 19267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org}; 20267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org 213f6d5e0bded85b8b0d055da8fa49e8d7137fe8edpbos@webrtc.orgconst int16_t WebRtcNsx_kCounterDiv[201] = { 22267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org 32767, 16384, 10923, 8192, 6554, 5461, 4681, 4096, 3641, 3277, 2979, 2731, 23267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org 2521, 2341, 2185, 2048, 1928, 1820, 1725, 1638, 1560, 1489, 1425, 1365, 1311, 24267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org 1260, 1214, 1170, 1130, 1092, 1057, 1024, 993, 964, 936, 910, 886, 862, 840, 25267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org 819, 799, 780, 762, 745, 728, 712, 697, 683, 669, 655, 643, 630, 618, 607, 26267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org 596, 585, 575, 565, 555, 546, 537, 529, 520, 512, 504, 496, 489, 482, 475, 27267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org 468, 462, 455, 449, 443, 437, 431, 426, 420, 415, 410, 405, 400, 395, 390, 28267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org 386, 381, 377, 372, 368, 364, 360, 356, 352, 349, 345, 341, 338, 334, 331, 29267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org 328, 324, 321, 318, 315, 312, 309, 306, 303, 301, 298, 295, 293, 290, 287, 30267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org 285, 282, 280, 278, 275, 273, 271, 269, 266, 264, 262, 260, 258, 256, 254, 31267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org 252, 250, 248, 246, 245, 243, 241, 239, 237, 236, 234, 232, 231, 229, 228, 32267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org 226, 224, 223, 221, 220, 218, 217, 216, 214, 213, 211, 210, 209, 207, 206, 33267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org 205, 204, 202, 201, 200, 199, 197, 196, 195, 194, 193, 192, 191, 189, 188, 34267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org 187, 186, 185, 184, 183, 182, 181, 180, 179, 178, 177, 176, 175, 174, 173, 35267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org 172, 172, 171, 170, 169, 168, 167, 166, 165, 165, 164, 163 36267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org}; 37267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org 383f6d5e0bded85b8b0d055da8fa49e8d7137fe8edpbos@webrtc.orgconst int16_t WebRtcNsx_kLogTableFrac[256] = { 39267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org 0, 1, 3, 4, 6, 7, 9, 10, 11, 13, 14, 16, 17, 18, 20, 21, 40267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org 22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 36, 37, 38, 40, 41, 42, 41267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org 44, 45, 46, 47, 49, 50, 51, 52, 54, 55, 56, 57, 59, 60, 61, 62, 42267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org 63, 65, 66, 67, 68, 69, 71, 72, 73, 74, 75, 77, 78, 79, 80, 81, 43267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org 82, 84, 85, 86, 87, 88, 89, 90, 92, 93, 94, 95, 96, 97, 98, 99, 44267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org 100, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 116, 45267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 46267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 47267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org 147, 148, 149, 150, 151, 152, 153, 154, 155, 155, 156, 157, 158, 159, 160, 48267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org 161, 162, 163, 164, 165, 166, 167, 168, 169, 169, 170, 171, 172, 173, 174, 49267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org 175, 176, 177, 178, 178, 179, 180, 181, 182, 183, 184, 185, 185, 186, 187, 50267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org 188, 189, 190, 191, 192, 192, 193, 194, 195, 196, 197, 198, 198, 199, 200, 51267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org 201, 202, 203, 203, 204, 205, 206, 207, 208, 208, 209, 210, 211, 212, 212, 52267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org 213, 214, 215, 216, 216, 217, 218, 219, 220, 220, 221, 222, 223, 224, 224, 53267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org 225, 226, 227, 228, 228, 229, 230, 231, 231, 232, 233, 234, 234, 235, 236, 54267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org 237, 238, 238, 239, 240, 241, 241, 242, 243, 244, 244, 245, 246, 247, 247, 55267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org 248, 249, 249, 250, 251, 252, 252, 253, 254, 255, 255 56267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org}; 57267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org 58b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org// Update the noise estimation information. 59b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgstatic void UpdateNoiseEstimateNeon(NsxInst_t* inst, int offset) { 60b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org const int16_t kExp2Const = 11819; // Q13 61b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int16_t* ptr_noiseEstLogQuantile = NULL; 62b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int16_t* ptr_noiseEstQuantile = NULL; 63b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int16x4_t kExp2Const16x4 = vdup_n_s16(kExp2Const); 64b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int32x4_t twentyOne32x4 = vdupq_n_s32(21); 65b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int32x4_t constA32x4 = vdupq_n_s32(0x1fffff); 66b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int32x4_t constB32x4 = vdupq_n_s32(0x200000); 67b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 68b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int16_t tmp16 = WebRtcSpl_MaxValueW16(inst->noiseEstLogQuantile + offset, 69b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org inst->magnLen); 70b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 71b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // Guarantee a Q-domain as high as possible and still fit in int16 72b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org inst->qNoise = 14 - (int) WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(kExp2Const, 73b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org tmp16, 74b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 21); 75b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 76b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int32x4_t qNoise32x4 = vdupq_n_s32(inst->qNoise); 77b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 78b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org for (ptr_noiseEstLogQuantile = &inst->noiseEstLogQuantile[offset], 79b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org ptr_noiseEstQuantile = &inst->noiseEstQuantile[0]; 80b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org ptr_noiseEstQuantile < &inst->noiseEstQuantile[inst->magnLen - 3]; 81b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org ptr_noiseEstQuantile += 4, ptr_noiseEstLogQuantile += 4) { 82b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 83b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // tmp32no2 = WEBRTC_SPL_MUL_16_16(kExp2Const, 84b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // inst->noiseEstLogQuantile[offset + i]); 85b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int16x4_t v16x4 = vld1_s16(ptr_noiseEstLogQuantile); 86b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int32x4_t v32x4B = vmull_s16(v16x4, kExp2Const16x4); 87b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 88b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // tmp32no1 = (0x00200000 | (tmp32no2 & 0x001FFFFF)); // 2^21 + frac 89b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int32x4_t v32x4A = vandq_s32(v32x4B, constA32x4); 90b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org v32x4A = vorrq_s32(v32x4A, constB32x4); 91b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 92b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // tmp16 = (int16_t) WEBRTC_SPL_RSHIFT_W32(tmp32no2, 21); 93b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org v32x4B = vshrq_n_s32(v32x4B, 21); 94b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 95b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // tmp16 -= 21;// shift 21 to get result in Q0 96b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org v32x4B = vsubq_s32(v32x4B, twentyOne32x4); 97b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 98b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // tmp16 += (int16_t) inst->qNoise; 99b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // shift to get result in Q(qNoise) 100b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org v32x4B = vaddq_s32(v32x4B, qNoise32x4); 101b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 102b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // if (tmp16 < 0) { 103b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // tmp32no1 = WEBRTC_SPL_RSHIFT_W32(tmp32no1, -tmp16); 104b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // } else { 105b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // tmp32no1 = WEBRTC_SPL_LSHIFT_W32(tmp32no1, tmp16); 106b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // } 107b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org v32x4B = vshlq_s32(v32x4A, v32x4B); 108b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 109b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // tmp16 = WebRtcSpl_SatW32ToW16(tmp32no1); 110b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org v16x4 = vqmovn_s32(v32x4B); 111b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 112b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org //inst->noiseEstQuantile[i] = tmp16; 113b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org vst1_s16(ptr_noiseEstQuantile, v16x4); 114b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org } 115b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 116b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // Last iteration: 117b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 118b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // inst->quantile[i]=exp(inst->lquantile[offset+i]); 119b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // in Q21 120b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int32_t tmp32no2 = WEBRTC_SPL_MUL_16_16(kExp2Const, 121b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org *ptr_noiseEstLogQuantile); 122b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int32_t tmp32no1 = (0x00200000 | (tmp32no2 & 0x001FFFFF)); // 2^21 + frac 123b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 124b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org tmp16 = (int16_t) WEBRTC_SPL_RSHIFT_W32(tmp32no2, 21); 125b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org tmp16 -= 21;// shift 21 to get result in Q0 126b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org tmp16 += (int16_t) inst->qNoise; //shift to get result in Q(qNoise) 127b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org if (tmp16 < 0) { 128b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org tmp32no1 = WEBRTC_SPL_RSHIFT_W32(tmp32no1, -tmp16); 129b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org } else { 130b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org tmp32no1 = WEBRTC_SPL_LSHIFT_W32(tmp32no1, tmp16); 131b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org } 132b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org *ptr_noiseEstQuantile = WebRtcSpl_SatW32ToW16(tmp32no1); 133b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org} 134b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 135b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org// Noise Estimation 136b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgvoid WebRtcNsx_NoiseEstimationNeon(NsxInst_t* inst, 137b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org uint16_t* magn, 138b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org uint32_t* noise, 139b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int16_t* q_noise) { 140b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int16_t lmagn[HALF_ANAL_BLOCKL], counter, countDiv; 141b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int16_t countProd, delta, zeros, frac; 142b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int16_t log2, tabind, logval, tmp16, tmp16no1, tmp16no2; 143b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org const int16_t log2_const = 22713; 144b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org const int16_t width_factor = 21845; 145b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 146b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int i, s, offset; 147b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 148b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org tabind = inst->stages - inst->normData; 149b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org assert(tabind < 9); 150b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org assert(tabind > -9); 151b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org if (tabind < 0) { 152b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org logval = -WebRtcNsx_kLogTable[-tabind]; 153b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org } else { 154b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org logval = WebRtcNsx_kLogTable[tabind]; 155b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org } 156b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 157b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int16x8_t logval_16x8 = vdupq_n_s16(logval); 158b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 159b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // lmagn(i)=log(magn(i))=log(2)*log2(magn(i)) 160b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // magn is in Q(-stages), and the real lmagn values are: 161b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // real_lmagn(i)=log(magn(i)*2^stages)=log(magn(i))+log(2^stages) 162b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // lmagn in Q8 163b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org for (i = 0; i < inst->magnLen; i++) { 164b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org if (magn[i]) { 165b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org zeros = WebRtcSpl_NormU32((uint32_t)magn[i]); 166b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org frac = (int16_t)((((uint32_t)magn[i] << zeros) 167b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org & 0x7FFFFFFF) >> 23); 168b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org assert(frac < 256); 169b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // log2(magn(i)) 170b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org log2 = (int16_t)(((31 - zeros) << 8) 171b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org + WebRtcNsx_kLogTableFrac[frac]); 172b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // log2(magn(i))*log(2) 173b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org lmagn[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(log2, log2_const, 15); 174b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // + log(2^stages) 175b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org lmagn[i] += logval; 176b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org } else { 177b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org lmagn[i] = logval; 178b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org } 179b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org } 180b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 181b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int16x4_t Q3_16x4 = vdup_n_s16(3); 182b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int16x8_t WIDTHQ8_16x8 = vdupq_n_s16(WIDTH_Q8); 183b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int16x8_t WIDTHFACTOR_16x8 = vdupq_n_s16(width_factor); 184b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 185b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int16_t factor = FACTOR_Q7; 186b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org if (inst->blockIndex < END_STARTUP_LONG) 187b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org factor = FACTOR_Q7_STARTUP; 188b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 189b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // Loop over simultaneous estimates 190b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org for (s = 0; s < SIMULT; s++) { 191b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org offset = s * inst->magnLen; 192b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 193b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // Get counter values from state 194b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org counter = inst->noiseEstCounter[s]; 195b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org assert(counter < 201); 196b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org countDiv = WebRtcNsx_kCounterDiv[counter]; 197b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org countProd = (int16_t)WEBRTC_SPL_MUL_16_16(counter, countDiv); 198b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 199b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // quant_est(...) 200b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int16_t deltaBuff[8]; 201b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int16x4_t tmp16x4_0; 202b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int16x4_t tmp16x4_1; 203b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int16x4_t countDiv_16x4 = vdup_n_s16(countDiv); 204b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int16x8_t countProd_16x8 = vdupq_n_s16(countProd); 205b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int16x8_t tmp16x8_0 = vdupq_n_s16(countDiv); 206b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int16x8_t prod16x8 = vqrdmulhq_s16(WIDTHFACTOR_16x8, tmp16x8_0); 207b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int16x8_t tmp16x8_1; 208b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int16x8_t tmp16x8_2; 209b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int16x8_t tmp16x8_3; 210b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // Initialize tmp16x8_4 to zero to avoid compilaton error. 211b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int16x8_t tmp16x8_4 = vdupq_n_s16(0); 212b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int16x8_t tmp16x8_5; 213b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int32x4_t tmp32x4; 214b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 215b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org for (i = 0; i < inst->magnLen - 7; i += 8) { 216b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // Compute delta. 217b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // Smaller step size during startup. This prevents from using 218b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // unrealistic values causing overflow. 219b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org tmp16x8_0 = vdupq_n_s16(factor); 220b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org vst1q_s16(deltaBuff, tmp16x8_0); 221b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 222b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int j; 223b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org for (j = 0; j < 8; j++) { 224b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org if (inst->noiseEstDensity[offset + i + j] > 512) { 225b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // Get values for deltaBuff by shifting intead of dividing. 226b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int factor = WebRtcSpl_NormW16(inst->noiseEstDensity[offset + i + j]); 227b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org deltaBuff[j] = (int16_t)(FACTOR_Q16 >> (14 - factor)); 228b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org } 229b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org } 230b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 231b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // Update log quantile estimate 232b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 233b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // tmp16 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(delta, countDiv, 14); 234b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org tmp32x4 = vmull_s16(vld1_s16(&deltaBuff[0]), countDiv_16x4); 235b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org tmp16x4_1 = vshrn_n_s32(tmp32x4, 14); 236b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org tmp32x4 = vmull_s16(vld1_s16(&deltaBuff[4]), countDiv_16x4); 237b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org tmp16x4_0 = vshrn_n_s32(tmp32x4, 14); 238b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org tmp16x8_0 = vcombine_s16(tmp16x4_1, tmp16x4_0); // Keep for several lines. 239b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 240b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // prepare for the "if" branch 241b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // tmp16 += 2; 242b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // tmp16_1 = (Word16)(tmp16>>2); 243b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org tmp16x8_1 = vrshrq_n_s16(tmp16x8_0, 2); 244b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 245b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // inst->noiseEstLogQuantile[offset+i] + tmp16_1; 246b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org tmp16x8_2 = vld1q_s16(&inst->noiseEstLogQuantile[offset + i]); // Keep 247b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org tmp16x8_1 = vaddq_s16(tmp16x8_2, tmp16x8_1); // Keep for several lines 248b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 249b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // Prepare for the "else" branch 250b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // tmp16 += 1; 251b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // tmp16_1 = (Word16)(tmp16>>1); 252b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org tmp16x8_0 = vrshrq_n_s16(tmp16x8_0, 1); 253b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 254b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // tmp16_2 = (Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16_1,3,1); 255b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org tmp32x4 = vmull_s16(vget_low_s16(tmp16x8_0), Q3_16x4); 256b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org tmp16x4_1 = vshrn_n_s32(tmp32x4, 1); 257b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 258b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // tmp16_2 = (Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16_1,3,1); 259b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org tmp32x4 = vmull_s16(vget_high_s16(tmp16x8_0), Q3_16x4); 260b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org tmp16x4_0 = vshrn_n_s32(tmp32x4, 1); 261b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 262b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // inst->noiseEstLogQuantile[offset + i] - tmp16_2; 263b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org tmp16x8_0 = vcombine_s16(tmp16x4_1, tmp16x4_0); // keep 264b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org tmp16x8_0 = vsubq_s16(tmp16x8_2, tmp16x8_0); 265b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 266b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // logval is the smallest fixed point representation we can have. Values 267b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // below that will correspond to values in the interval [0, 1], which 268b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // can't possibly occur. 269b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org tmp16x8_0 = vmaxq_s16(tmp16x8_0, logval_16x8); 270b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 271b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // Do the if-else branches: 272b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org tmp16x8_3 = vld1q_s16(&lmagn[i]); // keep for several lines 273b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org tmp16x8_5 = vsubq_s16(tmp16x8_3, tmp16x8_2); 274b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org __asm__("vcgt.s16 %q0, %q1, #0"::"w"(tmp16x8_4), "w"(tmp16x8_5)); 275b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org __asm__("vbit %q0, %q1, %q2":: 276b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "w"(tmp16x8_2), "w"(tmp16x8_1), "w"(tmp16x8_4)); 277b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org __asm__("vbif %q0, %q1, %q2":: 278b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "w"(tmp16x8_2), "w"(tmp16x8_0), "w"(tmp16x8_4)); 279b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org vst1q_s16(&inst->noiseEstLogQuantile[offset + i], tmp16x8_2); 280b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 281b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // Update density estimate 282b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // tmp16_1 + tmp16_2 283b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org tmp16x8_1 = vld1q_s16(&inst->noiseEstDensity[offset + i]); 284b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org tmp16x8_0 = vqrdmulhq_s16(tmp16x8_1, countProd_16x8); 285b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org tmp16x8_0 = vaddq_s16(tmp16x8_0, prod16x8); 286b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 287b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // lmagn[i] - inst->noiseEstLogQuantile[offset + i] 288b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org tmp16x8_3 = vsubq_s16(tmp16x8_3, tmp16x8_2); 289b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org tmp16x8_3 = vabsq_s16(tmp16x8_3); 290b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org tmp16x8_4 = vcgtq_s16(WIDTHQ8_16x8, tmp16x8_3); 291b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org __asm__("vbit %q0, %q1, %q2":: 292b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "w"(tmp16x8_1), "w"(tmp16x8_0), "w"(tmp16x8_4)); 293b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org vst1q_s16(&inst->noiseEstDensity[offset + i], tmp16x8_1); 2943b89e10f31160da35b408fd00cb8f89d2b08862dpbos@webrtc.org } // End loop over magnitude spectrum 295b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 296b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // Last iteration over magnitude spectrum: 297b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // compute delta 298b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org if (inst->noiseEstDensity[offset + i] > 512) { 299b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // Get values for deltaBuff by shifting intead of dividing. 300b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int factor = WebRtcSpl_NormW16(inst->noiseEstDensity[offset + i]); 301b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org delta = (int16_t)(FACTOR_Q16 >> (14 - factor)); 302b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org } else { 303b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org delta = FACTOR_Q7; 304b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org if (inst->blockIndex < END_STARTUP_LONG) { 305b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // Smaller step size during startup. This prevents from using 306b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // unrealistic values causing overflow. 307b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org delta = FACTOR_Q7_STARTUP; 308b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org } 309b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org } 310b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // update log quantile estimate 311b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org tmp16 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(delta, countDiv, 14); 312b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org if (lmagn[i] > inst->noiseEstLogQuantile[offset + i]) { 313b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // +=QUANTILE*delta/(inst->counter[s]+1) QUANTILE=0.25, =1 in Q2 314b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // CounterDiv=1/(inst->counter[s]+1) in Q15 315b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org tmp16 += 2; 316b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 2); 317b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org inst->noiseEstLogQuantile[offset + i] += tmp16no1; 318b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org } else { 319b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org tmp16 += 1; 320b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 1); 321b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // *(1-QUANTILE), in Q2 QUANTILE=0.25, 1-0.25=0.75=3 in Q2 322b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org tmp16no2 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, 3, 1); 323b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org inst->noiseEstLogQuantile[offset + i] -= tmp16no2; 324b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org if (inst->noiseEstLogQuantile[offset + i] < logval) { 325b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // logval is the smallest fixed point representation we can have. 326b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // Values below that will correspond to values in the interval 327b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // [0, 1], which can't possibly occur. 328b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org inst->noiseEstLogQuantile[offset + i] = logval; 329b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org } 330b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org } 331b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 332b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // update density estimate 333b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org if (WEBRTC_SPL_ABS_W16(lmagn[i] - inst->noiseEstLogQuantile[offset + i]) 334b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org < WIDTH_Q8) { 335b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org tmp16no1 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( 336b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org inst->noiseEstDensity[offset + i], countProd, 15); 337b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org tmp16no2 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( 338b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org width_factor, countDiv, 15); 339b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org inst->noiseEstDensity[offset + i] = tmp16no1 + tmp16no2; 340b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org } 341b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 342b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 343b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org if (counter >= END_STARTUP_LONG) { 344b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org inst->noiseEstCounter[s] = 0; 345b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org if (inst->blockIndex >= END_STARTUP_LONG) { 346b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org UpdateNoiseEstimateNeon(inst, offset); 347b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org } 348b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org } 349b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org inst->noiseEstCounter[s]++; 350b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 3513b89e10f31160da35b408fd00cb8f89d2b08862dpbos@webrtc.org } // end loop over simultaneous estimates 352b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 353b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // Sequentially update the noise during startup 354b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org if (inst->blockIndex < END_STARTUP_LONG) { 355b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org UpdateNoiseEstimateNeon(inst, offset); 356b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org } 357b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 358b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org for (i = 0; i < inst->magnLen; i++) { 359b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org noise[i] = (uint32_t)(inst->noiseEstQuantile[i]); // Q(qNoise) 360b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org } 361b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org (*q_noise) = (int16_t)inst->qNoise; 362b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org} 363b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 364b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org// Filter the data in the frequency domain, and create spectrum. 365b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgvoid WebRtcNsx_PrepareSpectrumNeon(NsxInst_t* inst, int16_t* freq_buf) { 366b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org assert(inst->magnLen % 8 == 1); 367b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org assert(inst->anaLen2 % 16 == 0); 368b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 369b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // (1) Filtering. 370b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 371b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // Fixed point C code for the next block is as follows: 372b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // for (i = 0; i < inst->magnLen; i++) { 373b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // inst->real[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(inst->real[i], 374b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // (int16_t)(inst->noiseSupFilter[i]), 14); // Q(normData-stages) 375b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // inst->imag[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(inst->imag[i], 376b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // (int16_t)(inst->noiseSupFilter[i]), 14); // Q(normData-stages) 377b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // } 378b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 379b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org int16_t* preal = &inst->real[0]; 380b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org int16_t* pimag = &inst->imag[0]; 381b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org int16_t* pns_filter = (int16_t*)&inst->noiseSupFilter[0]; 382b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org int16_t* pimag_end = pimag + inst->magnLen - 4; 383b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org 384b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org while (pimag < pimag_end) { 385b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org int16x8_t real = vld1q_s16(preal); 386b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org int16x8_t imag = vld1q_s16(pimag); 387b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org int16x8_t ns_filter = vld1q_s16(pns_filter); 388b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org 389b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org int32x4_t tmp_r_0 = vmull_s16(vget_low_s16(real), vget_low_s16(ns_filter)); 390b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org int32x4_t tmp_i_0 = vmull_s16(vget_low_s16(imag), vget_low_s16(ns_filter)); 391b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org int32x4_t tmp_r_1 = vmull_s16(vget_high_s16(real), 392b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org vget_high_s16(ns_filter)); 393b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org int32x4_t tmp_i_1 = vmull_s16(vget_high_s16(imag), 394b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org vget_high_s16(ns_filter)); 395b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org 396b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org int16x4_t result_r_0 = vshrn_n_s32(tmp_r_0, 14); 397b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org int16x4_t result_i_0 = vshrn_n_s32(tmp_i_0, 14); 398b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org int16x4_t result_r_1 = vshrn_n_s32(tmp_r_1, 14); 399b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org int16x4_t result_i_1 = vshrn_n_s32(tmp_i_1, 14); 400b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org 401b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org vst1q_s16(preal, vcombine_s16(result_r_0, result_r_1)); 402b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org vst1q_s16(pimag, vcombine_s16(result_i_0, result_i_1)); 403b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org preal += 8; 404b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org pimag += 8; 405b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org pns_filter += 8; 406b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org } 407b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 408b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org // Filter the last element 409b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org *preal = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(*preal, *pns_filter, 14); 410b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org *pimag = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(*pimag, *pns_filter, 14); 411b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 412b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // (2) Create spectrum. 413b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 414b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // Fixed point C code for the rest of the function is as follows: 415b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // freq_buf[0] = inst->real[0]; 416b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // freq_buf[1] = -inst->imag[0]; 417b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // for (i = 1, j = 2; i < inst->anaLen2; i += 1, j += 2) { 418b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // freq_buf[j] = inst->real[i]; 419b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // freq_buf[j + 1] = -inst->imag[i]; 420b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // } 421b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // freq_buf[inst->anaLen] = inst->real[inst->anaLen2]; 422b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // freq_buf[inst->anaLen + 1] = -inst->imag[inst->anaLen2]; 423b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 424b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org preal = &inst->real[0]; 425b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org pimag = &inst->imag[0]; 426b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org pimag_end = pimag + inst->anaLen2; 427b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org int16_t * freq_buf_start = freq_buf; 428b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org while (pimag < pimag_end) { 429b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org // loop unroll 430b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org int16x8x2_t real_imag_0; 431b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org int16x8x2_t real_imag_1; 432b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org real_imag_0.val[1] = vld1q_s16(pimag); 433b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org real_imag_0.val[0] = vld1q_s16(preal); 434b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org preal += 8; 435b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org pimag += 8; 436b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org real_imag_1.val[1] = vld1q_s16(pimag); 437b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org real_imag_1.val[0] = vld1q_s16(preal); 438b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org preal += 8; 439b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org pimag += 8; 440b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org 441b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org real_imag_0.val[1] = vnegq_s16(real_imag_0.val[1]); 442b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org real_imag_1.val[1] = vnegq_s16(real_imag_1.val[1]); 443b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org vst2q_s16(freq_buf_start, real_imag_0); 444b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org freq_buf_start += 16; 445b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org vst2q_s16(freq_buf_start, real_imag_1); 446b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org freq_buf_start += 16; 447b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org } 448b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org freq_buf[inst->anaLen] = inst->real[inst->anaLen2]; 449b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org freq_buf[inst->anaLen + 1] = -inst->imag[inst->anaLen2]; 450b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org} 451b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 452b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org// Denormalize the input buffer. 453b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgvoid WebRtcNsx_DenormalizeNeon(NsxInst_t* inst, int16_t* in, int factor) { 454b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int16_t* ptr_real = &inst->real[0]; 455b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int16_t* ptr_in = &in[0]; 456b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 457b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org __asm__ __volatile__("vdup.32 q10, %0" :: 458b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "r"((int32_t)(factor - inst->normData)) : "q10"); 459b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org for (; ptr_real < &inst->real[inst->anaLen];) { 460b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 461b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // Loop unrolled once. Both pointers are incremented. 462b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org __asm__ __volatile__( 463b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // tmp32 = WEBRTC_SPL_SHIFT_W32((int32_t)in[j], 464b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // factor - inst->normData); 465b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vld2.16 {d24, d25}, [%[ptr_in]]!\n\t" 466b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vmovl.s16 q12, d24\n\t" 467b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vshl.s32 q12, q10\n\t" 468b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // inst->real[i] = WebRtcSpl_SatW32ToW16(tmp32); // Q0 469b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vqmovn.s32 d24, q12\n\t" 470b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vst1.16 d24, [%[ptr_real]]!\n\t" 471b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 472b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // tmp32 = WEBRTC_SPL_SHIFT_W32((int32_t)in[j], 473b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // factor - inst->normData); 474b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vld2.16 {d22, d23}, [%[ptr_in]]!\n\t" 475b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vmovl.s16 q11, d22\n\t" 476b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vshl.s32 q11, q10\n\t" 477b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // inst->real[i] = WebRtcSpl_SatW32ToW16(tmp32); // Q0 478b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vqmovn.s32 d22, q11\n\t" 479b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vst1.16 d22, [%[ptr_real]]!\n\t" 480b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 481b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // Specify constraints. 482b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org :[ptr_in]"+r"(ptr_in), 483b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org [ptr_real]"+r"(ptr_real) 484b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org : 485b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org :"d22", "d23", "d24", "d25" 486b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org ); 487b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org } 488b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org} 489b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 490b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org// For the noise supress process, synthesis, read out fully processed segment, 491b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org// and update synthesis buffer. 492b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgvoid WebRtcNsx_SynthesisUpdateNeon(NsxInst_t* inst, 493b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int16_t* out_frame, 494b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int16_t gain_factor) { 495b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int16_t* ptr_real = &inst->real[0]; 496b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int16_t* ptr_syn = &inst->synthesisBuffer[0]; 497b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org const int16_t* ptr_window = &inst->window[0]; 498b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 499b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // synthesis 500b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org __asm__ __volatile__("vdup.16 d24, %0" : : "r"(gain_factor) : "d24"); 501b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // Loop unrolled once. All pointers are incremented in the assembly code. 502b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org for (; ptr_syn < &inst->synthesisBuffer[inst->anaLen];) { 503b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org __asm__ __volatile__( 504b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // Load variables. 505b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vld1.16 d22, [%[ptr_real]]!\n\t" 506b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vld1.16 d23, [%[ptr_window]]!\n\t" 507b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vld1.16 d25, [%[ptr_syn]]\n\t" 508b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // tmp16a = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( 509b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // inst->window[i], inst->real[i], 14); // Q0, window in Q14 510b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vmull.s16 q11, d22, d23\n\t" 511b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vrshrn.i32 d22, q11, #14\n\t" 512b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // tmp32 = WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(tmp16a, gain_factor, 13); 513b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vmull.s16 q11, d24, d22\n\t" 514b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // tmp16b = WebRtcSpl_SatW32ToW16(tmp32); // Q0 515b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vqrshrn.s32 d22, q11, #13\n\t" 516347671c843ed5c93d25bf1a23f9295d35ce3df4abjornv@webrtc.org // inst->synthesisBuffer[i] = WebRtcSpl_AddSatW16( 517b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // inst->synthesisBuffer[i], tmp16b); // Q0 518b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vqadd.s16 d25, d22\n\t" 519b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vst1.16 d25, [%[ptr_syn]]!\n\t" 520b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 521b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // Load variables. 522b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vld1.16 d26, [%[ptr_real]]!\n\t" 523b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vld1.16 d27, [%[ptr_window]]!\n\t" 524b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vld1.16 d28, [%[ptr_syn]]\n\t" 525b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // tmp16a = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( 526b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // inst->window[i], inst->real[i], 14); // Q0, window in Q14 527b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vmull.s16 q13, d26, d27\n\t" 528b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vrshrn.i32 d26, q13, #14\n\t" 529b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // tmp32 = WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(tmp16a, gain_factor, 13); 530b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vmull.s16 q13, d24, d26\n\t" 531b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // tmp16b = WebRtcSpl_SatW32ToW16(tmp32); // Q0 532b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vqrshrn.s32 d26, q13, #13\n\t" 533347671c843ed5c93d25bf1a23f9295d35ce3df4abjornv@webrtc.org // inst->synthesisBuffer[i] = WebRtcSpl_AddSatW16( 534b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // inst->synthesisBuffer[i], tmp16b); // Q0 535b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vqadd.s16 d28, d26\n\t" 536b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vst1.16 d28, [%[ptr_syn]]!\n\t" 537b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 538b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // Specify constraints. 539b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org :[ptr_real]"+r"(ptr_real), 540b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org [ptr_window]"+r"(ptr_window), 541b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org [ptr_syn]"+r"(ptr_syn) 542b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org : 543b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org :"d22", "d23", "d24", "d25", "d26", "d27", "d28", "q11", "q12", "q13" 544b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org ); 545b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org } 546b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 547b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int16_t* ptr_out = &out_frame[0]; 548b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org ptr_syn = &inst->synthesisBuffer[0]; 549b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // read out fully processed segment 550b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org for (; ptr_syn < &inst->synthesisBuffer[inst->blockLen10ms];) { 551b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // Loop unrolled once. Both pointers are incremented in the assembly code. 552b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org __asm__ __volatile__( 553b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // out_frame[i] = inst->synthesisBuffer[i]; // Q0 554b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vld1.16 {d22, d23}, [%[ptr_syn]]!\n\t" 555b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vld1.16 {d24, d25}, [%[ptr_syn]]!\n\t" 556b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vst1.16 {d22, d23}, [%[ptr_out]]!\n\t" 557b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vst1.16 {d24, d25}, [%[ptr_out]]!\n\t" 558b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org :[ptr_syn]"+r"(ptr_syn), 559b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org [ptr_out]"+r"(ptr_out) 560b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org : 561b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org :"d22", "d23", "d24", "d25" 562b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org ); 563b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org } 564b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 565b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // Update synthesis buffer. 566b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // C code: 567b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // WEBRTC_SPL_MEMCPY_W16(inst->synthesisBuffer, 568b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // inst->synthesisBuffer + inst->blockLen10ms, 569b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // inst->anaLen - inst->blockLen10ms); 570b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org ptr_out = &inst->synthesisBuffer[0], 571b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org ptr_syn = &inst->synthesisBuffer[inst->blockLen10ms]; 572b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org for (; ptr_syn < &inst->synthesisBuffer[inst->anaLen];) { 573b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // Loop unrolled once. Both pointers are incremented in the assembly code. 574b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org __asm__ __volatile__( 575b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vld1.16 {d22, d23}, [%[ptr_syn]]!\n\t" 576b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vld1.16 {d24, d25}, [%[ptr_syn]]!\n\t" 577b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vst1.16 {d22, d23}, [%[ptr_out]]!\n\t" 578b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vst1.16 {d24, d25}, [%[ptr_out]]!\n\t" 579b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org :[ptr_syn]"+r"(ptr_syn), 580b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org [ptr_out]"+r"(ptr_out) 581b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org : 582b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org :"d22", "d23", "d24", "d25" 583b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org ); 584b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org } 585b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 586b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // C code: 587b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer 588b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // + inst->anaLen - inst->blockLen10ms, inst->blockLen10ms); 589b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org __asm__ __volatile__("vdup.16 q10, %0" : : "r"(0) : "q10"); 590b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org for (; ptr_out < &inst->synthesisBuffer[inst->anaLen];) { 591b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // Loop unrolled once. Pointer is incremented in the assembly code. 592b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org __asm__ __volatile__( 593b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vst1.16 {d20, d21}, [%[ptr_out]]!\n\t" 594b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vst1.16 {d20, d21}, [%[ptr_out]]!\n\t" 595b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org :[ptr_out]"+r"(ptr_out) 596b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org : 597b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org :"d20", "d21" 598b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org ); 599b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org } 600b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org} 601b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 602b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org// Update analysis buffer for lower band, and window data before FFT. 603b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgvoid WebRtcNsx_AnalysisUpdateNeon(NsxInst_t* inst, 604b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int16_t* out, 605b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int16_t* new_speech) { 606b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 607b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int16_t* ptr_ana = &inst->analysisBuffer[inst->blockLen10ms]; 608b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int16_t* ptr_out = &inst->analysisBuffer[0]; 609b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 610b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // For lower band update analysis buffer. 611b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // WEBRTC_SPL_MEMCPY_W16(inst->analysisBuffer, 612b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // inst->analysisBuffer + inst->blockLen10ms, 613b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // inst->anaLen - inst->blockLen10ms); 614b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org for (; ptr_out < &inst->analysisBuffer[inst->anaLen - inst->blockLen10ms];) { 615b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // Loop unrolled once, so both pointers are incremented by 8 twice. 616b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org __asm__ __volatile__( 617b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vld1.16 {d20, d21}, [%[ptr_ana]]!\n\t" 618b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vst1.16 {d20, d21}, [%[ptr_out]]!\n\t" 619b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vld1.16 {d22, d23}, [%[ptr_ana]]!\n\t" 620b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vst1.16 {d22, d23}, [%[ptr_out]]!\n\t" 621b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org :[ptr_ana]"+r"(ptr_ana), 622b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org [ptr_out]"+r"(ptr_out) 623b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org : 624b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org :"d20", "d21", "d22", "d23" 625b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org ); 626b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org } 627b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 628b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // WEBRTC_SPL_MEMCPY_W16(inst->analysisBuffer 629b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // + inst->anaLen - inst->blockLen10ms, new_speech, inst->blockLen10ms); 630b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org for (ptr_ana = new_speech; ptr_out < &inst->analysisBuffer[inst->anaLen];) { 631b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // Loop unrolled once, so both pointers are incremented by 8 twice. 632b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org __asm__ __volatile__( 633b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vld1.16 {d20, d21}, [%[ptr_ana]]!\n\t" 634b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vst1.16 {d20, d21}, [%[ptr_out]]!\n\t" 635b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vld1.16 {d22, d23}, [%[ptr_ana]]!\n\t" 636b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vst1.16 {d22, d23}, [%[ptr_out]]!\n\t" 637b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org :[ptr_ana]"+r"(ptr_ana), 638b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org [ptr_out]"+r"(ptr_out) 639b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org : 640b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org :"d20", "d21", "d22", "d23" 641b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org ); 642b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org } 643b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 644b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // Window data before FFT 645b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org const int16_t* ptr_window = &inst->window[0]; 646b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org ptr_out = &out[0]; 647b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org ptr_ana = &inst->analysisBuffer[0]; 648b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org for (; ptr_out < &out[inst->anaLen];) { 649b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 650b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // Loop unrolled once, so all pointers are incremented by 4 twice. 651b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org __asm__ __volatile__( 652b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vld1.16 d20, [%[ptr_ana]]!\n\t" 653b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vld1.16 d21, [%[ptr_window]]!\n\t" 654b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // out[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( 655b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // inst->window[i], inst->analysisBuffer[i], 14); // Q0 656b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vmull.s16 q10, d20, d21\n\t" 657b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vrshrn.i32 d20, q10, #14\n\t" 658b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vst1.16 d20, [%[ptr_out]]!\n\t" 659b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 660b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vld1.16 d22, [%[ptr_ana]]!\n\t" 661b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vld1.16 d23, [%[ptr_window]]!\n\t" 662b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // out[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( 663b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // inst->window[i], inst->analysisBuffer[i], 14); // Q0 664b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vmull.s16 q11, d22, d23\n\t" 665b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vrshrn.i32 d22, q11, #14\n\t" 666b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vst1.16 d22, [%[ptr_out]]!\n\t" 667b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 668b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // Specify constraints. 669b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org :[ptr_ana]"+r"(ptr_ana), 670b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org [ptr_window]"+r"(ptr_window), 671b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org [ptr_out]"+r"(ptr_out) 672b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org : 673b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org :"d20", "d21", "d22", "d23", "q10", "q11" 674b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org ); 675b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org } 676b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org} 677b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 678b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org// Create a complex number buffer (out[]) as the intput (in[]) interleaved with 679b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org// zeros, and normalize it. 680b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgvoid WebRtcNsx_CreateComplexBufferNeon(NsxInst_t* inst, 681b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int16_t* in, 682b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int16_t* out) { 683b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int16_t* ptr_out = &out[0]; 684b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int16_t* ptr_in = &in[0]; 685b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 686b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org __asm__ __volatile__("vdup.16 d25, %0" : : "r"(0) : "d25"); 687b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org __asm__ __volatile__("vdup.16 q10, %0" : : "r"(inst->normData) : "q10"); 688b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org for (; ptr_in < &in[inst->anaLen];) { 689b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 690b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // Loop unrolled once, so ptr_in is incremented by 8 twice, 691b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // and ptr_out is incremented by 8 four times. 692b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org __asm__ __volatile__( 693b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // out[j] = WEBRTC_SPL_LSHIFT_W16(in[i], inst->normData); // Q(normData) 694b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vld1.16 {d22, d23}, [%[ptr_in]]!\n\t" 695b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vshl.s16 q11, q10\n\t" 696b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vmov d24, d23\n\t" 697b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 698b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // out[j + 1] = 0; // Insert zeros in imaginary part 699b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vmov d23, d25\n\t" 700b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vst2.16 {d22, d23}, [%[ptr_out]]!\n\t" 701b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vst2.16 {d24, d25}, [%[ptr_out]]!\n\t" 702b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 703b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // out[j] = WEBRTC_SPL_LSHIFT_W16(in[i], inst->normData); // Q(normData) 704b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vld1.16 {d22, d23}, [%[ptr_in]]!\n\t" 705b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vshl.s16 q11, q10\n\t" 706b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vmov d24, d23\n\t" 707b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 708b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // out[j + 1] = 0; // Insert zeros in imaginary part 709b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vmov d23, d25\n\t" 710b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vst2.16 {d22, d23}, [%[ptr_out]]!\n\t" 711b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org "vst2.16 {d24, d25}, [%[ptr_out]]!\n\t" 712b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 713b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // Specify constraints. 714b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org :[ptr_in]"+r"(ptr_in), 715b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org [ptr_out]"+r"(ptr_out) 716b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org : 717b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org :"d22", "d23", "d24", "d25", "q10", "q11" 718b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org ); 719b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org } 720b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org} 721