1b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org/*
2b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org *
4b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org *  Use of this source code is governed by a BSD-style license
5b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org *  that can be found in the LICENSE file in the root of the source
6b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org *  tree. An additional intellectual property rights grant can be found
7b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org *  in the file PATENTS.  All contributing project authors may
8b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org *  be found in the AUTHORS file in the root of the source tree.
9b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org */
10b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
119fb16139d917ba32720e031d3c871987d418668fpbos@webrtc.org#include "webrtc/modules/audio_processing/ns/nsx_core.h"
12b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
13b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org#include <arm_neon.h>
14b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org#include <assert.h>
15b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
16267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org// Constants to compensate for shifting signal log(2^shifts).
173f6d5e0bded85b8b0d055da8fa49e8d7137fe8edpbos@webrtc.orgconst int16_t WebRtcNsx_kLogTable[9] = {
18267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org  0, 177, 355, 532, 710, 887, 1065, 1242, 1420
19267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org};
20267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org
213f6d5e0bded85b8b0d055da8fa49e8d7137fe8edpbos@webrtc.orgconst int16_t WebRtcNsx_kCounterDiv[201] = {
22267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org  32767, 16384, 10923, 8192, 6554, 5461, 4681, 4096, 3641, 3277, 2979, 2731,
23267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org  2521, 2341, 2185, 2048, 1928, 1820, 1725, 1638, 1560, 1489, 1425, 1365, 1311,
24267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org  1260, 1214, 1170, 1130, 1092, 1057, 1024, 993, 964, 936, 910, 886, 862, 840,
25267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org  819, 799, 780, 762, 745, 728, 712, 697, 683, 669, 655, 643, 630, 618, 607,
26267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org  596, 585, 575, 565, 555, 546, 537, 529, 520, 512, 504, 496, 489, 482, 475,
27267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org  468, 462, 455, 449, 443, 437, 431, 426, 420, 415, 410, 405, 400, 395, 390,
28267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org  386, 381, 377, 372, 368, 364, 360, 356, 352, 349, 345, 341, 338, 334, 331,
29267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org  328, 324, 321, 318, 315, 312, 309, 306, 303, 301, 298, 295, 293, 290, 287,
30267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org  285, 282, 280, 278, 275, 273, 271, 269, 266, 264, 262, 260, 258, 256, 254,
31267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org  252, 250, 248, 246, 245, 243, 241, 239, 237, 236, 234, 232, 231, 229, 228,
32267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org  226, 224, 223, 221, 220, 218, 217, 216, 214, 213, 211, 210, 209, 207, 206,
33267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org  205, 204, 202, 201, 200, 199, 197, 196, 195, 194, 193, 192, 191, 189, 188,
34267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org  187, 186, 185, 184, 183, 182, 181, 180, 179, 178, 177, 176, 175, 174, 173,
35267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org  172, 172, 171, 170, 169, 168, 167, 166, 165, 165, 164, 163
36267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org};
37267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org
383f6d5e0bded85b8b0d055da8fa49e8d7137fe8edpbos@webrtc.orgconst int16_t WebRtcNsx_kLogTableFrac[256] = {
39267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org  0, 1, 3, 4, 6, 7, 9, 10, 11, 13, 14, 16, 17, 18, 20, 21,
40267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org  22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 36, 37, 38, 40, 41, 42,
41267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org  44, 45, 46, 47, 49, 50, 51, 52, 54, 55, 56, 57, 59, 60, 61, 62,
42267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org  63, 65, 66, 67, 68, 69, 71, 72, 73, 74, 75, 77, 78, 79, 80, 81,
43267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org  82, 84, 85, 86, 87, 88, 89, 90, 92, 93, 94, 95, 96, 97, 98, 99,
44267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org  100, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 116,
45267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org  117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131,
46267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org  132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146,
47267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org  147, 148, 149, 150, 151, 152, 153, 154, 155, 155, 156, 157, 158, 159, 160,
48267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org  161, 162, 163, 164, 165, 166, 167, 168, 169, 169, 170, 171, 172, 173, 174,
49267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org  175, 176, 177, 178, 178, 179, 180, 181, 182, 183, 184, 185, 185, 186, 187,
50267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org  188, 189, 190, 191, 192, 192, 193, 194, 195, 196, 197, 198, 198, 199, 200,
51267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org  201, 202, 203, 203, 204, 205, 206, 207, 208, 208, 209, 210, 211, 212, 212,
52267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org  213, 214, 215, 216, 216, 217, 218, 219, 220, 220, 221, 222, 223, 224, 224,
53267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org  225, 226, 227, 228, 228, 229, 230, 231, 231, 232, 233, 234, 234, 235, 236,
54267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org  237, 238, 238, 239, 240, 241, 241, 242, 243, 244, 244, 245, 246, 247, 247,
55267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org  248, 249, 249, 250, 251, 252, 252, 253, 254, 255, 255
56267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org};
57267a0decd6e8569a0af603fc27e4ddd25d50d13ckma@webrtc.org
58b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org// Update the noise estimation information.
59b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgstatic void UpdateNoiseEstimateNeon(NsxInst_t* inst, int offset) {
60b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  const int16_t kExp2Const = 11819; // Q13
61b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  int16_t* ptr_noiseEstLogQuantile = NULL;
62b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  int16_t* ptr_noiseEstQuantile = NULL;
63b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  int16x4_t kExp2Const16x4 = vdup_n_s16(kExp2Const);
64b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  int32x4_t twentyOne32x4 = vdupq_n_s32(21);
65b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  int32x4_t constA32x4 = vdupq_n_s32(0x1fffff);
66b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  int32x4_t constB32x4 = vdupq_n_s32(0x200000);
67b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
68b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  int16_t tmp16 = WebRtcSpl_MaxValueW16(inst->noiseEstLogQuantile + offset,
69b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org                                        inst->magnLen);
70b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
71b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  // Guarantee a Q-domain as high as possible and still fit in int16
72b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  inst->qNoise = 14 - (int) WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(kExp2Const,
73b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org                                                                 tmp16,
74b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org                                                                 21);
75b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
76b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  int32x4_t qNoise32x4 = vdupq_n_s32(inst->qNoise);
77b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
78b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  for (ptr_noiseEstLogQuantile = &inst->noiseEstLogQuantile[offset],
79b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org       ptr_noiseEstQuantile = &inst->noiseEstQuantile[0];
80b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org       ptr_noiseEstQuantile < &inst->noiseEstQuantile[inst->magnLen - 3];
81b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org       ptr_noiseEstQuantile += 4, ptr_noiseEstLogQuantile += 4) {
82b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
83b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    // tmp32no2 = WEBRTC_SPL_MUL_16_16(kExp2Const,
84b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    //                                inst->noiseEstLogQuantile[offset + i]);
85b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    int16x4_t v16x4 = vld1_s16(ptr_noiseEstLogQuantile);
86b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    int32x4_t v32x4B = vmull_s16(v16x4, kExp2Const16x4);
87b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
88b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    // tmp32no1 = (0x00200000 | (tmp32no2 & 0x001FFFFF)); // 2^21 + frac
89b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    int32x4_t v32x4A = vandq_s32(v32x4B, constA32x4);
90b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    v32x4A = vorrq_s32(v32x4A, constB32x4);
91b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
92b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    // tmp16 = (int16_t) WEBRTC_SPL_RSHIFT_W32(tmp32no2, 21);
93b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    v32x4B = vshrq_n_s32(v32x4B, 21);
94b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
95b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    // tmp16 -= 21;// shift 21 to get result in Q0
96b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    v32x4B = vsubq_s32(v32x4B, twentyOne32x4);
97b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
98b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    // tmp16 += (int16_t) inst->qNoise;
99b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    // shift to get result in Q(qNoise)
100b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    v32x4B = vaddq_s32(v32x4B, qNoise32x4);
101b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
102b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    // if (tmp16 < 0) {
103b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    //   tmp32no1 = WEBRTC_SPL_RSHIFT_W32(tmp32no1, -tmp16);
104b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    // } else {
105b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    //   tmp32no1 = WEBRTC_SPL_LSHIFT_W32(tmp32no1, tmp16);
106b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    // }
107b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    v32x4B = vshlq_s32(v32x4A, v32x4B);
108b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
109b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    // tmp16 = WebRtcSpl_SatW32ToW16(tmp32no1);
110b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    v16x4 = vqmovn_s32(v32x4B);
111b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
112b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    //inst->noiseEstQuantile[i] = tmp16;
113b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    vst1_s16(ptr_noiseEstQuantile, v16x4);
114b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  }
115b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
116b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  // Last iteration:
117b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
118b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  // inst->quantile[i]=exp(inst->lquantile[offset+i]);
119b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  // in Q21
120b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  int32_t tmp32no2 = WEBRTC_SPL_MUL_16_16(kExp2Const,
121b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org                                          *ptr_noiseEstLogQuantile);
122b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  int32_t tmp32no1 = (0x00200000 | (tmp32no2 & 0x001FFFFF)); // 2^21 + frac
123b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
124b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  tmp16 = (int16_t) WEBRTC_SPL_RSHIFT_W32(tmp32no2, 21);
125b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  tmp16 -= 21;// shift 21 to get result in Q0
126b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  tmp16 += (int16_t) inst->qNoise; //shift to get result in Q(qNoise)
127b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  if (tmp16 < 0) {
128b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    tmp32no1 = WEBRTC_SPL_RSHIFT_W32(tmp32no1, -tmp16);
129b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  } else {
130b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    tmp32no1 = WEBRTC_SPL_LSHIFT_W32(tmp32no1, tmp16);
131b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  }
132b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  *ptr_noiseEstQuantile = WebRtcSpl_SatW32ToW16(tmp32no1);
133b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org}
134b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
135b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org// Noise Estimation
136b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgvoid WebRtcNsx_NoiseEstimationNeon(NsxInst_t* inst,
137b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org                                   uint16_t* magn,
138b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org                                   uint32_t* noise,
139b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org                                   int16_t* q_noise) {
140b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  int16_t lmagn[HALF_ANAL_BLOCKL], counter, countDiv;
141b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  int16_t countProd, delta, zeros, frac;
142b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  int16_t log2, tabind, logval, tmp16, tmp16no1, tmp16no2;
143b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  const int16_t log2_const = 22713;
144b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  const int16_t width_factor = 21845;
145b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
146b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  int i, s, offset;
147b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
148b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  tabind = inst->stages - inst->normData;
149b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  assert(tabind < 9);
150b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  assert(tabind > -9);
151b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  if (tabind < 0) {
152b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    logval = -WebRtcNsx_kLogTable[-tabind];
153b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  } else {
154b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    logval = WebRtcNsx_kLogTable[tabind];
155b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  }
156b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
157b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  int16x8_t logval_16x8 = vdupq_n_s16(logval);
158b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
159b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  // lmagn(i)=log(magn(i))=log(2)*log2(magn(i))
160b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  // magn is in Q(-stages), and the real lmagn values are:
161b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  // real_lmagn(i)=log(magn(i)*2^stages)=log(magn(i))+log(2^stages)
162b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  // lmagn in Q8
163b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  for (i = 0; i < inst->magnLen; i++) {
164b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    if (magn[i]) {
165b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      zeros = WebRtcSpl_NormU32((uint32_t)magn[i]);
166b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      frac = (int16_t)((((uint32_t)magn[i] << zeros)
167b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org                        & 0x7FFFFFFF) >> 23);
168b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      assert(frac < 256);
169b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // log2(magn(i))
170b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      log2 = (int16_t)(((31 - zeros) << 8)
171b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org                       + WebRtcNsx_kLogTableFrac[frac]);
172b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // log2(magn(i))*log(2)
173b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      lmagn[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(log2, log2_const, 15);
174b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // + log(2^stages)
175b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      lmagn[i] += logval;
176b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    } else {
177b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      lmagn[i] = logval;
178b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    }
179b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  }
180b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
181b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  int16x4_t Q3_16x4  = vdup_n_s16(3);
182b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  int16x8_t WIDTHQ8_16x8 = vdupq_n_s16(WIDTH_Q8);
183b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  int16x8_t WIDTHFACTOR_16x8 = vdupq_n_s16(width_factor);
184b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
185b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  int16_t factor = FACTOR_Q7;
186b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  if (inst->blockIndex < END_STARTUP_LONG)
187b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    factor = FACTOR_Q7_STARTUP;
188b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
189b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  // Loop over simultaneous estimates
190b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  for (s = 0; s < SIMULT; s++) {
191b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    offset = s * inst->magnLen;
192b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
193b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    // Get counter values from state
194b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    counter = inst->noiseEstCounter[s];
195b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    assert(counter < 201);
196b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    countDiv = WebRtcNsx_kCounterDiv[counter];
197b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    countProd = (int16_t)WEBRTC_SPL_MUL_16_16(counter, countDiv);
198b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
199b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    // quant_est(...)
200b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    int16_t deltaBuff[8];
201b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    int16x4_t tmp16x4_0;
202b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    int16x4_t tmp16x4_1;
203b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    int16x4_t countDiv_16x4 = vdup_n_s16(countDiv);
204b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    int16x8_t countProd_16x8 = vdupq_n_s16(countProd);
205b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    int16x8_t tmp16x8_0 = vdupq_n_s16(countDiv);
206b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    int16x8_t prod16x8 = vqrdmulhq_s16(WIDTHFACTOR_16x8, tmp16x8_0);
207b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    int16x8_t tmp16x8_1;
208b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    int16x8_t tmp16x8_2;
209b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    int16x8_t tmp16x8_3;
210b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    // Initialize tmp16x8_4 to zero to avoid compilaton error.
211b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    int16x8_t tmp16x8_4 = vdupq_n_s16(0);
212b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    int16x8_t tmp16x8_5;
213b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    int32x4_t tmp32x4;
214b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
215b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    for (i = 0; i < inst->magnLen - 7; i += 8) {
216b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // Compute delta.
217b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // Smaller step size during startup. This prevents from using
218b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // unrealistic values causing overflow.
219b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      tmp16x8_0 = vdupq_n_s16(factor);
220b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      vst1q_s16(deltaBuff, tmp16x8_0);
221b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
222b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      int j;
223b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      for (j = 0; j < 8; j++) {
224b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org        if (inst->noiseEstDensity[offset + i + j] > 512) {
225b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org          // Get values for deltaBuff by shifting intead of dividing.
226b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org          int factor = WebRtcSpl_NormW16(inst->noiseEstDensity[offset + i + j]);
227b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org          deltaBuff[j] = (int16_t)(FACTOR_Q16 >> (14 - factor));
228b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org        }
229b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      }
230b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
231b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // Update log quantile estimate
232b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
233b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // tmp16 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(delta, countDiv, 14);
234b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      tmp32x4 = vmull_s16(vld1_s16(&deltaBuff[0]), countDiv_16x4);
235b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      tmp16x4_1 = vshrn_n_s32(tmp32x4, 14);
236b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      tmp32x4 = vmull_s16(vld1_s16(&deltaBuff[4]), countDiv_16x4);
237b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      tmp16x4_0 = vshrn_n_s32(tmp32x4, 14);
238b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      tmp16x8_0 = vcombine_s16(tmp16x4_1, tmp16x4_0); // Keep for several lines.
239b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
240b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // prepare for the "if" branch
241b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // tmp16 += 2;
242b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // tmp16_1 = (Word16)(tmp16>>2);
243b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      tmp16x8_1 = vrshrq_n_s16(tmp16x8_0, 2);
244b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
245b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // inst->noiseEstLogQuantile[offset+i] + tmp16_1;
246b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      tmp16x8_2 = vld1q_s16(&inst->noiseEstLogQuantile[offset + i]); // Keep
247b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      tmp16x8_1 = vaddq_s16(tmp16x8_2, tmp16x8_1); // Keep for several lines
248b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
249b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // Prepare for the "else" branch
250b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // tmp16 += 1;
251b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // tmp16_1 = (Word16)(tmp16>>1);
252b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      tmp16x8_0 = vrshrq_n_s16(tmp16x8_0, 1);
253b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
254b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // tmp16_2 = (Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16_1,3,1);
255b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      tmp32x4 = vmull_s16(vget_low_s16(tmp16x8_0), Q3_16x4);
256b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      tmp16x4_1 = vshrn_n_s32(tmp32x4, 1);
257b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
258b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // tmp16_2 = (Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16_1,3,1);
259b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      tmp32x4 = vmull_s16(vget_high_s16(tmp16x8_0), Q3_16x4);
260b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      tmp16x4_0 = vshrn_n_s32(tmp32x4, 1);
261b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
262b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // inst->noiseEstLogQuantile[offset + i] - tmp16_2;
263b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      tmp16x8_0 = vcombine_s16(tmp16x4_1, tmp16x4_0); // keep
264b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      tmp16x8_0 = vsubq_s16(tmp16x8_2, tmp16x8_0);
265b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
266b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // logval is the smallest fixed point representation we can have. Values
267b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // below that will correspond to values in the interval [0, 1], which
268b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // can't possibly occur.
269b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      tmp16x8_0 = vmaxq_s16(tmp16x8_0, logval_16x8);
270b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
271b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // Do the if-else branches:
272b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      tmp16x8_3 = vld1q_s16(&lmagn[i]); // keep for several lines
273b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      tmp16x8_5 = vsubq_s16(tmp16x8_3, tmp16x8_2);
274b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      __asm__("vcgt.s16 %q0, %q1, #0"::"w"(tmp16x8_4), "w"(tmp16x8_5));
275b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      __asm__("vbit %q0, %q1, %q2"::
276b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org              "w"(tmp16x8_2), "w"(tmp16x8_1), "w"(tmp16x8_4));
277b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      __asm__("vbif %q0, %q1, %q2"::
278b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org              "w"(tmp16x8_2), "w"(tmp16x8_0), "w"(tmp16x8_4));
279b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      vst1q_s16(&inst->noiseEstLogQuantile[offset + i], tmp16x8_2);
280b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
281b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // Update density estimate
282b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // tmp16_1 + tmp16_2
283b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      tmp16x8_1 = vld1q_s16(&inst->noiseEstDensity[offset + i]);
284b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      tmp16x8_0 = vqrdmulhq_s16(tmp16x8_1, countProd_16x8);
285b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      tmp16x8_0 = vaddq_s16(tmp16x8_0, prod16x8);
286b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
287b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // lmagn[i] - inst->noiseEstLogQuantile[offset + i]
288b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      tmp16x8_3 = vsubq_s16(tmp16x8_3, tmp16x8_2);
289b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      tmp16x8_3 = vabsq_s16(tmp16x8_3);
290b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      tmp16x8_4 = vcgtq_s16(WIDTHQ8_16x8, tmp16x8_3);
291b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      __asm__("vbit %q0, %q1, %q2"::
292b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org              "w"(tmp16x8_1), "w"(tmp16x8_0), "w"(tmp16x8_4));
293b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      vst1q_s16(&inst->noiseEstDensity[offset + i], tmp16x8_1);
2943b89e10f31160da35b408fd00cb8f89d2b08862dpbos@webrtc.org    }  // End loop over magnitude spectrum
295b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
296b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    // Last iteration over magnitude spectrum:
297b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    // compute delta
298b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    if (inst->noiseEstDensity[offset + i] > 512) {
299b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // Get values for deltaBuff by shifting intead of dividing.
300b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      int factor = WebRtcSpl_NormW16(inst->noiseEstDensity[offset + i]);
301b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      delta = (int16_t)(FACTOR_Q16 >> (14 - factor));
302b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    } else {
303b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      delta = FACTOR_Q7;
304b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      if (inst->blockIndex < END_STARTUP_LONG) {
305b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org        // Smaller step size during startup. This prevents from using
306b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org        // unrealistic values causing overflow.
307b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org        delta = FACTOR_Q7_STARTUP;
308b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      }
309b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    }
310b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    // update log quantile estimate
311b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    tmp16 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(delta, countDiv, 14);
312b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    if (lmagn[i] > inst->noiseEstLogQuantile[offset + i]) {
313b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // +=QUANTILE*delta/(inst->counter[s]+1) QUANTILE=0.25, =1 in Q2
314b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // CounterDiv=1/(inst->counter[s]+1) in Q15
315b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      tmp16 += 2;
316b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 2);
317b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      inst->noiseEstLogQuantile[offset + i] += tmp16no1;
318b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    } else {
319b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      tmp16 += 1;
320b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 1);
321b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // *(1-QUANTILE), in Q2 QUANTILE=0.25, 1-0.25=0.75=3 in Q2
322b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      tmp16no2 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, 3, 1);
323b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      inst->noiseEstLogQuantile[offset + i] -= tmp16no2;
324b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      if (inst->noiseEstLogQuantile[offset + i] < logval) {
325b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org        // logval is the smallest fixed point representation we can have.
326b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org        // Values below that will correspond to values in the interval
327b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org        // [0, 1], which can't possibly occur.
328b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org        inst->noiseEstLogQuantile[offset + i] = logval;
329b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      }
330b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    }
331b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
332b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    // update density estimate
333b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    if (WEBRTC_SPL_ABS_W16(lmagn[i] - inst->noiseEstLogQuantile[offset + i])
334b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org        < WIDTH_Q8) {
335b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      tmp16no1 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
336b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org                   inst->noiseEstDensity[offset + i], countProd, 15);
337b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      tmp16no2 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
338b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org                   width_factor, countDiv, 15);
339b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      inst->noiseEstDensity[offset + i] = tmp16no1 + tmp16no2;
340b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    }
341b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
342b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
343b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    if (counter >= END_STARTUP_LONG) {
344b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      inst->noiseEstCounter[s] = 0;
345b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      if (inst->blockIndex >= END_STARTUP_LONG) {
346b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org        UpdateNoiseEstimateNeon(inst, offset);
347b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      }
348b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    }
349b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    inst->noiseEstCounter[s]++;
350b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
3513b89e10f31160da35b408fd00cb8f89d2b08862dpbos@webrtc.org  }  // end loop over simultaneous estimates
352b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
353b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  // Sequentially update the noise during startup
354b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  if (inst->blockIndex < END_STARTUP_LONG) {
355b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    UpdateNoiseEstimateNeon(inst, offset);
356b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  }
357b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
358b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  for (i = 0; i < inst->magnLen; i++) {
359b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    noise[i] = (uint32_t)(inst->noiseEstQuantile[i]); // Q(qNoise)
360b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  }
361b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  (*q_noise) = (int16_t)inst->qNoise;
362b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org}
363b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
364b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org// Filter the data in the frequency domain, and create spectrum.
365b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgvoid WebRtcNsx_PrepareSpectrumNeon(NsxInst_t* inst, int16_t* freq_buf) {
366b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org  assert(inst->magnLen % 8 == 1);
367b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org  assert(inst->anaLen2 % 16 == 0);
368b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
369b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  // (1) Filtering.
370b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
371b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  // Fixed point C code for the next block is as follows:
372b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  // for (i = 0; i < inst->magnLen; i++) {
373b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  //   inst->real[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(inst->real[i],
374b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  //      (int16_t)(inst->noiseSupFilter[i]), 14); // Q(normData-stages)
375b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  //   inst->imag[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(inst->imag[i],
376b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  //      (int16_t)(inst->noiseSupFilter[i]), 14); // Q(normData-stages)
377b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  // }
378b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
379b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org  int16_t* preal = &inst->real[0];
380b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org  int16_t* pimag = &inst->imag[0];
381b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org  int16_t* pns_filter = (int16_t*)&inst->noiseSupFilter[0];
382b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org  int16_t* pimag_end = pimag + inst->magnLen - 4;
383b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org
384b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org  while (pimag < pimag_end) {
385b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org    int16x8_t real = vld1q_s16(preal);
386b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org    int16x8_t imag = vld1q_s16(pimag);
387b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org    int16x8_t ns_filter = vld1q_s16(pns_filter);
388b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org
389b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org    int32x4_t tmp_r_0 = vmull_s16(vget_low_s16(real), vget_low_s16(ns_filter));
390b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org    int32x4_t tmp_i_0 = vmull_s16(vget_low_s16(imag), vget_low_s16(ns_filter));
391b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org    int32x4_t tmp_r_1 = vmull_s16(vget_high_s16(real),
392b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org                                  vget_high_s16(ns_filter));
393b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org    int32x4_t tmp_i_1 = vmull_s16(vget_high_s16(imag),
394b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org                                  vget_high_s16(ns_filter));
395b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org
396b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org    int16x4_t result_r_0 = vshrn_n_s32(tmp_r_0, 14);
397b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org    int16x4_t result_i_0 = vshrn_n_s32(tmp_i_0, 14);
398b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org    int16x4_t result_r_1 = vshrn_n_s32(tmp_r_1, 14);
399b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org    int16x4_t result_i_1 = vshrn_n_s32(tmp_i_1, 14);
400b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org
401b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org    vst1q_s16(preal, vcombine_s16(result_r_0, result_r_1));
402b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org    vst1q_s16(pimag, vcombine_s16(result_i_0, result_i_1));
403b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org    preal += 8;
404b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org    pimag += 8;
405b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org    pns_filter += 8;
406b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  }
407b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
408b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org  // Filter the last element
409b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org  *preal = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(*preal, *pns_filter, 14);
410b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org  *pimag = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(*pimag, *pns_filter, 14);
411b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
412b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  // (2) Create spectrum.
413b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
414b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  // Fixed point C code for the rest of the function is as follows:
415b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  // freq_buf[0] = inst->real[0];
416b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  // freq_buf[1] = -inst->imag[0];
417b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  // for (i = 1, j = 2; i < inst->anaLen2; i += 1, j += 2) {
418b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  //   freq_buf[j] = inst->real[i];
419b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  //   freq_buf[j + 1] = -inst->imag[i];
420b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  // }
421b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  // freq_buf[inst->anaLen] = inst->real[inst->anaLen2];
422b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  // freq_buf[inst->anaLen + 1] = -inst->imag[inst->anaLen2];
423b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
424b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org  preal = &inst->real[0];
425b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org  pimag = &inst->imag[0];
426b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org  pimag_end = pimag + inst->anaLen2;
427b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org  int16_t * freq_buf_start = freq_buf;
428b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org  while (pimag < pimag_end) {
429b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org    // loop unroll
430b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org    int16x8x2_t real_imag_0;
431b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org    int16x8x2_t real_imag_1;
432b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org    real_imag_0.val[1] = vld1q_s16(pimag);
433b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org    real_imag_0.val[0] = vld1q_s16(preal);
434b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org    preal += 8;
435b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org    pimag += 8;
436b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org    real_imag_1.val[1] = vld1q_s16(pimag);
437b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org    real_imag_1.val[0] = vld1q_s16(preal);
438b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org    preal += 8;
439b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org    pimag += 8;
440b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org
441b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org    real_imag_0.val[1] = vnegq_s16(real_imag_0.val[1]);
442b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org    real_imag_1.val[1] = vnegq_s16(real_imag_1.val[1]);
443b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org    vst2q_s16(freq_buf_start, real_imag_0);
444b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org    freq_buf_start += 16;
445b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org    vst2q_s16(freq_buf_start, real_imag_1);
446b5f0569340d8ee6141026f770a2f9a6f932feee8andrew@webrtc.org    freq_buf_start += 16;
447b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  }
448b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  freq_buf[inst->anaLen] = inst->real[inst->anaLen2];
449b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  freq_buf[inst->anaLen + 1] = -inst->imag[inst->anaLen2];
450b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org}
451b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
452b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org// Denormalize the input buffer.
453b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgvoid WebRtcNsx_DenormalizeNeon(NsxInst_t* inst, int16_t* in, int factor) {
454b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  int16_t* ptr_real = &inst->real[0];
455b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  int16_t* ptr_in = &in[0];
456b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
457b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  __asm__ __volatile__("vdup.32 q10, %0" ::
458b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org                       "r"((int32_t)(factor - inst->normData)) : "q10");
459b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  for (; ptr_real < &inst->real[inst->anaLen];) {
460b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
461b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    // Loop unrolled once. Both pointers are incremented.
462b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    __asm__ __volatile__(
463b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // tmp32 = WEBRTC_SPL_SHIFT_W32((int32_t)in[j],
464b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      //                             factor - inst->normData);
465b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vld2.16 {d24, d25}, [%[ptr_in]]!\n\t"
466b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vmovl.s16 q12, d24\n\t"
467b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vshl.s32 q12, q10\n\t"
468b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // inst->real[i] = WebRtcSpl_SatW32ToW16(tmp32); // Q0
469b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vqmovn.s32 d24, q12\n\t"
470b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vst1.16 d24, [%[ptr_real]]!\n\t"
471b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
472b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // tmp32 = WEBRTC_SPL_SHIFT_W32((int32_t)in[j],
473b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      //                             factor - inst->normData);
474b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vld2.16 {d22, d23}, [%[ptr_in]]!\n\t"
475b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vmovl.s16 q11, d22\n\t"
476b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vshl.s32 q11, q10\n\t"
477b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // inst->real[i] = WebRtcSpl_SatW32ToW16(tmp32); // Q0
478b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vqmovn.s32 d22, q11\n\t"
479b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vst1.16 d22, [%[ptr_real]]!\n\t"
480b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
481b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // Specify constraints.
482b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      :[ptr_in]"+r"(ptr_in),
483b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org       [ptr_real]"+r"(ptr_real)
484b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      :
485b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      :"d22", "d23", "d24", "d25"
486b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    );
487b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  }
488b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org}
489b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
490b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org// For the noise supress process, synthesis, read out fully processed segment,
491b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org// and update synthesis buffer.
492b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgvoid WebRtcNsx_SynthesisUpdateNeon(NsxInst_t* inst,
493b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org                                   int16_t* out_frame,
494b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org                                   int16_t gain_factor) {
495b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  int16_t* ptr_real = &inst->real[0];
496b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  int16_t* ptr_syn = &inst->synthesisBuffer[0];
497b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  const int16_t* ptr_window = &inst->window[0];
498b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
499b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  // synthesis
500b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  __asm__ __volatile__("vdup.16 d24, %0" : : "r"(gain_factor) : "d24");
501b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  // Loop unrolled once. All pointers are incremented in the assembly code.
502b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  for (; ptr_syn < &inst->synthesisBuffer[inst->anaLen];) {
503b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    __asm__ __volatile__(
504b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // Load variables.
505b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vld1.16 d22, [%[ptr_real]]!\n\t"
506b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vld1.16 d23, [%[ptr_window]]!\n\t"
507b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vld1.16 d25, [%[ptr_syn]]\n\t"
508b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // tmp16a = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
509b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      //           inst->window[i], inst->real[i], 14); // Q0, window in Q14
510b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vmull.s16 q11, d22, d23\n\t"
511b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vrshrn.i32 d22, q11, #14\n\t"
512b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // tmp32 = WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(tmp16a, gain_factor, 13);
513b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vmull.s16 q11, d24, d22\n\t"
514b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // tmp16b = WebRtcSpl_SatW32ToW16(tmp32); // Q0
515b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vqrshrn.s32 d22, q11, #13\n\t"
516347671c843ed5c93d25bf1a23f9295d35ce3df4abjornv@webrtc.org      // inst->synthesisBuffer[i] = WebRtcSpl_AddSatW16(
517b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      //     inst->synthesisBuffer[i], tmp16b); // Q0
518b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vqadd.s16 d25, d22\n\t"
519b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vst1.16 d25, [%[ptr_syn]]!\n\t"
520b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
521b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // Load variables.
522b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vld1.16 d26, [%[ptr_real]]!\n\t"
523b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vld1.16 d27, [%[ptr_window]]!\n\t"
524b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vld1.16 d28, [%[ptr_syn]]\n\t"
525b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // tmp16a = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
526b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      //           inst->window[i], inst->real[i], 14); // Q0, window in Q14
527b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vmull.s16 q13, d26, d27\n\t"
528b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vrshrn.i32 d26, q13, #14\n\t"
529b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // tmp32 = WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(tmp16a, gain_factor, 13);
530b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vmull.s16 q13, d24, d26\n\t"
531b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // tmp16b = WebRtcSpl_SatW32ToW16(tmp32); // Q0
532b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vqrshrn.s32 d26, q13, #13\n\t"
533347671c843ed5c93d25bf1a23f9295d35ce3df4abjornv@webrtc.org      // inst->synthesisBuffer[i] = WebRtcSpl_AddSatW16(
534b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      //     inst->synthesisBuffer[i], tmp16b); // Q0
535b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vqadd.s16 d28, d26\n\t"
536b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vst1.16 d28, [%[ptr_syn]]!\n\t"
537b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
538b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // Specify constraints.
539b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      :[ptr_real]"+r"(ptr_real),
540b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org       [ptr_window]"+r"(ptr_window),
541b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org       [ptr_syn]"+r"(ptr_syn)
542b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      :
543b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      :"d22", "d23", "d24", "d25", "d26", "d27", "d28", "q11", "q12", "q13"
544b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    );
545b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  }
546b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
547b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  int16_t* ptr_out = &out_frame[0];
548b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  ptr_syn = &inst->synthesisBuffer[0];
549b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  // read out fully processed segment
550b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  for (; ptr_syn < &inst->synthesisBuffer[inst->blockLen10ms];) {
551b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    // Loop unrolled once. Both pointers are incremented in the assembly code.
552b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    __asm__ __volatile__(
553b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // out_frame[i] = inst->synthesisBuffer[i]; // Q0
554b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vld1.16 {d22, d23}, [%[ptr_syn]]!\n\t"
555b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vld1.16 {d24, d25}, [%[ptr_syn]]!\n\t"
556b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vst1.16 {d22, d23}, [%[ptr_out]]!\n\t"
557b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vst1.16 {d24, d25}, [%[ptr_out]]!\n\t"
558b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      :[ptr_syn]"+r"(ptr_syn),
559b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org       [ptr_out]"+r"(ptr_out)
560b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      :
561b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      :"d22", "d23", "d24", "d25"
562b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    );
563b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  }
564b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
565b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  // Update synthesis buffer.
566b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  // C code:
567b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  // WEBRTC_SPL_MEMCPY_W16(inst->synthesisBuffer,
568b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  //                      inst->synthesisBuffer + inst->blockLen10ms,
569b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  //                      inst->anaLen - inst->blockLen10ms);
570b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  ptr_out = &inst->synthesisBuffer[0],
571b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  ptr_syn = &inst->synthesisBuffer[inst->blockLen10ms];
572b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  for (; ptr_syn < &inst->synthesisBuffer[inst->anaLen];) {
573b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    // Loop unrolled once. Both pointers are incremented in the assembly code.
574b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    __asm__ __volatile__(
575b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vld1.16 {d22, d23}, [%[ptr_syn]]!\n\t"
576b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vld1.16 {d24, d25}, [%[ptr_syn]]!\n\t"
577b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vst1.16 {d22, d23}, [%[ptr_out]]!\n\t"
578b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vst1.16 {d24, d25}, [%[ptr_out]]!\n\t"
579b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      :[ptr_syn]"+r"(ptr_syn),
580b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org       [ptr_out]"+r"(ptr_out)
581b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      :
582b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      :"d22", "d23", "d24", "d25"
583b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    );
584b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  }
585b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
586b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  // C code:
587b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  // WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer
588b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  //    + inst->anaLen - inst->blockLen10ms, inst->blockLen10ms);
589b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  __asm__ __volatile__("vdup.16 q10, %0" : : "r"(0) : "q10");
590b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  for (; ptr_out < &inst->synthesisBuffer[inst->anaLen];) {
591b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    // Loop unrolled once. Pointer is incremented in the assembly code.
592b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    __asm__ __volatile__(
593b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vst1.16 {d20, d21}, [%[ptr_out]]!\n\t"
594b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vst1.16 {d20, d21}, [%[ptr_out]]!\n\t"
595b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      :[ptr_out]"+r"(ptr_out)
596b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      :
597b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      :"d20", "d21"
598b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    );
599b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  }
600b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org}
601b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
602b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org// Update analysis buffer for lower band, and window data before FFT.
603b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgvoid WebRtcNsx_AnalysisUpdateNeon(NsxInst_t* inst,
604b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org                                  int16_t* out,
605b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org                                  int16_t* new_speech) {
606b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
607b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  int16_t* ptr_ana = &inst->analysisBuffer[inst->blockLen10ms];
608b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  int16_t* ptr_out = &inst->analysisBuffer[0];
609b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
610b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  // For lower band update analysis buffer.
611b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  // WEBRTC_SPL_MEMCPY_W16(inst->analysisBuffer,
612b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  //                      inst->analysisBuffer + inst->blockLen10ms,
613b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  //                      inst->anaLen - inst->blockLen10ms);
614b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  for (; ptr_out < &inst->analysisBuffer[inst->anaLen - inst->blockLen10ms];) {
615b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    // Loop unrolled once, so both pointers are incremented by 8 twice.
616b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    __asm__ __volatile__(
617b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vld1.16 {d20, d21}, [%[ptr_ana]]!\n\t"
618b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vst1.16 {d20, d21}, [%[ptr_out]]!\n\t"
619b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vld1.16 {d22, d23}, [%[ptr_ana]]!\n\t"
620b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vst1.16 {d22, d23}, [%[ptr_out]]!\n\t"
621b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      :[ptr_ana]"+r"(ptr_ana),
622b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org       [ptr_out]"+r"(ptr_out)
623b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      :
624b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      :"d20", "d21", "d22", "d23"
625b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    );
626b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  }
627b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
628b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  // WEBRTC_SPL_MEMCPY_W16(inst->analysisBuffer
629b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  //    + inst->anaLen - inst->blockLen10ms, new_speech, inst->blockLen10ms);
630b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  for (ptr_ana = new_speech; ptr_out < &inst->analysisBuffer[inst->anaLen];) {
631b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    // Loop unrolled once, so both pointers are incremented by 8 twice.
632b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    __asm__ __volatile__(
633b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vld1.16 {d20, d21}, [%[ptr_ana]]!\n\t"
634b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vst1.16 {d20, d21}, [%[ptr_out]]!\n\t"
635b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vld1.16 {d22, d23}, [%[ptr_ana]]!\n\t"
636b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vst1.16 {d22, d23}, [%[ptr_out]]!\n\t"
637b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      :[ptr_ana]"+r"(ptr_ana),
638b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org       [ptr_out]"+r"(ptr_out)
639b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      :
640b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      :"d20", "d21", "d22", "d23"
641b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    );
642b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  }
643b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
644b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  // Window data before FFT
645b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  const int16_t* ptr_window = &inst->window[0];
646b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  ptr_out = &out[0];
647b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  ptr_ana = &inst->analysisBuffer[0];
648b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  for (; ptr_out < &out[inst->anaLen];) {
649b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
650b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    // Loop unrolled once, so all pointers are incremented by 4 twice.
651b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    __asm__ __volatile__(
652b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vld1.16 d20, [%[ptr_ana]]!\n\t"
653b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vld1.16 d21, [%[ptr_window]]!\n\t"
654b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // out[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
655b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      //           inst->window[i], inst->analysisBuffer[i], 14); // Q0
656b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vmull.s16 q10, d20, d21\n\t"
657b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vrshrn.i32 d20, q10, #14\n\t"
658b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vst1.16 d20, [%[ptr_out]]!\n\t"
659b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
660b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vld1.16 d22, [%[ptr_ana]]!\n\t"
661b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vld1.16 d23, [%[ptr_window]]!\n\t"
662b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // out[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
663b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      //           inst->window[i], inst->analysisBuffer[i], 14); // Q0
664b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vmull.s16 q11, d22, d23\n\t"
665b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vrshrn.i32 d22, q11, #14\n\t"
666b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vst1.16 d22, [%[ptr_out]]!\n\t"
667b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
668b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // Specify constraints.
669b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      :[ptr_ana]"+r"(ptr_ana),
670b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org       [ptr_window]"+r"(ptr_window),
671b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org       [ptr_out]"+r"(ptr_out)
672b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      :
673b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      :"d20", "d21", "d22", "d23", "q10", "q11"
674b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    );
675b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  }
676b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org}
677b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
678b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org// Create a complex number buffer (out[]) as the intput (in[]) interleaved with
679b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org// zeros, and normalize it.
680b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgvoid WebRtcNsx_CreateComplexBufferNeon(NsxInst_t* inst,
681b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org                                       int16_t* in,
682b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org                                       int16_t* out) {
683b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  int16_t* ptr_out = &out[0];
684b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  int16_t* ptr_in = &in[0];
685b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
686b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  __asm__ __volatile__("vdup.16 d25, %0" : : "r"(0) : "d25");
687b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  __asm__ __volatile__("vdup.16 q10, %0" : : "r"(inst->normData) : "q10");
688b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  for (; ptr_in < &in[inst->anaLen];) {
689b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
690b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    // Loop unrolled once, so ptr_in is incremented by 8 twice,
691b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    // and ptr_out is incremented by 8 four times.
692b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    __asm__ __volatile__(
693b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // out[j] = WEBRTC_SPL_LSHIFT_W16(in[i], inst->normData); // Q(normData)
694b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vld1.16 {d22, d23}, [%[ptr_in]]!\n\t"
695b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vshl.s16 q11, q10\n\t"
696b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vmov d24, d23\n\t"
697b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
698b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // out[j + 1] = 0; // Insert zeros in imaginary part
699b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vmov d23, d25\n\t"
700b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vst2.16 {d22, d23}, [%[ptr_out]]!\n\t"
701b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vst2.16 {d24, d25}, [%[ptr_out]]!\n\t"
702b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
703b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // out[j] = WEBRTC_SPL_LSHIFT_W16(in[i], inst->normData); // Q(normData)
704b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vld1.16 {d22, d23}, [%[ptr_in]]!\n\t"
705b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vshl.s16 q11, q10\n\t"
706b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vmov d24, d23\n\t"
707b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
708b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // out[j + 1] = 0; // Insert zeros in imaginary part
709b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vmov d23, d25\n\t"
710b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vst2.16 {d22, d23}, [%[ptr_out]]!\n\t"
711b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      "vst2.16 {d24, d25}, [%[ptr_out]]!\n\t"
712b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
713b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      // Specify constraints.
714b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      :[ptr_in]"+r"(ptr_in),
715b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org       [ptr_out]"+r"(ptr_out)
716b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      :
717b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      :"d22", "d23", "d24", "d25", "q10", "q11"
718b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    );
719b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  }
720b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org}
721