19257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org/* 29257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 39257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org * 49257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org * Use of this source code is governed by a BSD-style license 59257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org * that can be found in the LICENSE file in the root of the source 69257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org * tree. An additional intellectual property rights grant can be found 79257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org * in the file PATENTS. All contributing project authors may 89257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org * be found in the AUTHORS file in the root of the source tree. 99257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org */ 109257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org 119257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org/* 129257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org * The core AEC algorithm, neon version of speed-critical functions. 139257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org * 149257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org * Based on aec_core_sse2.c. 159257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org */ 169257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org 179257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org#include <arm_neon.h> 189257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org#include <math.h> 19685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org#include <string.h> // memset 209257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org 2131ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 2231ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org#include "webrtc/modules/audio_processing/aec/aec_common.h" 239257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org#include "webrtc/modules/audio_processing/aec/aec_core_internal.h" 249257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org#include "webrtc/modules/audio_processing/aec/aec_rdft.h" 259257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org 269257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.orgenum { kShiftExponentIntoTopMantissa = 8 }; 279257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.orgenum { kFloatExponentShift = 23 }; 289257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org 29685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org__inline static float MulRe(float aRe, float aIm, float bRe, float bIm) { 30685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org return aRe * bRe - aIm * bIm; 31685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org} 32685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org 33ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org__inline static float MulIm(float aRe, float aIm, float bRe, float bIm) { 34ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org return aRe * bIm + aIm * bRe; 35ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org} 36ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org 37ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.orgstatic void FilterFarNEON(AecCore* aec, float yf[2][PART_LEN1]) { 38ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org int i; 39ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org const int num_partitions = aec->num_partitions; 40ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org for (i = 0; i < num_partitions; i++) { 41ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org int j; 42ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org int xPos = (i + aec->xfBufBlockPos) * PART_LEN1; 43ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org int pos = i * PART_LEN1; 44ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org // Check for wrap 45ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org if (i + aec->xfBufBlockPos >= num_partitions) { 46ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org xPos -= num_partitions * PART_LEN1; 47ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org } 48ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org 49ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org // vectorized code (four at once) 50ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org for (j = 0; j + 3 < PART_LEN1; j += 4) { 51ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org const float32x4_t xfBuf_re = vld1q_f32(&aec->xfBuf[0][xPos + j]); 52ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org const float32x4_t xfBuf_im = vld1q_f32(&aec->xfBuf[1][xPos + j]); 53ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org const float32x4_t wfBuf_re = vld1q_f32(&aec->wfBuf[0][pos + j]); 54ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org const float32x4_t wfBuf_im = vld1q_f32(&aec->wfBuf[1][pos + j]); 55ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org const float32x4_t yf_re = vld1q_f32(&yf[0][j]); 56ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org const float32x4_t yf_im = vld1q_f32(&yf[1][j]); 57ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org const float32x4_t a = vmulq_f32(xfBuf_re, wfBuf_re); 58ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org const float32x4_t e = vmlsq_f32(a, xfBuf_im, wfBuf_im); 59ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org const float32x4_t c = vmulq_f32(xfBuf_re, wfBuf_im); 60ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org const float32x4_t f = vmlaq_f32(c, xfBuf_im, wfBuf_re); 61ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org const float32x4_t g = vaddq_f32(yf_re, e); 62ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org const float32x4_t h = vaddq_f32(yf_im, f); 63ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org vst1q_f32(&yf[0][j], g); 64ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org vst1q_f32(&yf[1][j], h); 65ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org } 66ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org // scalar code for the remaining items. 67ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org for (; j < PART_LEN1; j++) { 68ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org yf[0][j] += MulRe(aec->xfBuf[0][xPos + j], 69ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org aec->xfBuf[1][xPos + j], 70ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org aec->wfBuf[0][pos + j], 71ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org aec->wfBuf[1][pos + j]); 72ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org yf[1][j] += MulIm(aec->xfBuf[0][xPos + j], 73ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org aec->xfBuf[1][xPos + j], 74ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org aec->wfBuf[0][pos + j], 75ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org aec->wfBuf[1][pos + j]); 76ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org } 77ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org } 78ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org} 79ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org 8026f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.orgstatic float32x4_t vdivq_f32(float32x4_t a, float32x4_t b) { 8126f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org int i; 8226f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org float32x4_t x = vrecpeq_f32(b); 8326f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org // from arm documentation 8426f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org // The Newton-Raphson iteration: 8526f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org // x[n+1] = x[n] * (2 - d * x[n]) 8626f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org // converges to (1/d) if x0 is the result of VRECPE applied to d. 8726f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org // 8826f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org // Note: The precision did not improve after 2 iterations. 8926f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org for (i = 0; i < 2; i++) { 9026f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org x = vmulq_f32(vrecpsq_f32(b, x), x); 9126f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org } 9226f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org // a/b = a*(1/b) 9326f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org return vmulq_f32(a, x); 9426f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org} 9526f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org 9626f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.orgstatic float32x4_t vsqrtq_f32(float32x4_t s) { 9726f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org int i; 9826f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org float32x4_t x = vrsqrteq_f32(s); 9926f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org 10026f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org // Code to handle sqrt(0). 10126f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org // If the input to sqrtf() is zero, a zero will be returned. 10226f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org // If the input to vrsqrteq_f32() is zero, positive infinity is returned. 10326f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org const uint32x4_t vec_p_inf = vdupq_n_u32(0x7F800000); 10426f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org // check for divide by zero 10526f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org const uint32x4_t div_by_zero = vceqq_u32(vec_p_inf, vreinterpretq_u32_f32(x)); 10626f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org // zero out the positive infinity results 10726f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org x = vreinterpretq_f32_u32(vandq_u32(vmvnq_u32(div_by_zero), 10826f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org vreinterpretq_u32_f32(x))); 10926f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org // from arm documentation 11026f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org // The Newton-Raphson iteration: 11126f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org // x[n+1] = x[n] * (3 - d * (x[n] * x[n])) / 2) 11226f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org // converges to (1/√d) if x0 is the result of VRSQRTE applied to d. 11326f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org // 11426f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org // Note: The precision did not improve after 2 iterations. 11526f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org for (i = 0; i < 2; i++) { 11626f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org x = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x, x), s), x); 11726f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org } 11826f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org // sqrt(s) = s * 1/sqrt(s) 11926f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org return vmulq_f32(s, x);; 12026f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org} 12126f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org 12226f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.orgstatic void ScaleErrorSignalNEON(AecCore* aec, float ef[2][PART_LEN1]) { 12326f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu; 12426f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org const float error_threshold = aec->extended_filter_enabled ? 12526f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org kExtendedErrorThreshold : aec->normal_error_threshold; 12626f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org const float32x4_t k1e_10f = vdupq_n_f32(1e-10f); 12726f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org const float32x4_t kMu = vmovq_n_f32(mu); 12826f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org const float32x4_t kThresh = vmovq_n_f32(error_threshold); 12926f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org int i; 13026f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org // vectorized code (four at once) 13126f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org for (i = 0; i + 3 < PART_LEN1; i += 4) { 13226f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org const float32x4_t xPow = vld1q_f32(&aec->xPow[i]); 13326f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org const float32x4_t ef_re_base = vld1q_f32(&ef[0][i]); 13426f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org const float32x4_t ef_im_base = vld1q_f32(&ef[1][i]); 13526f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org const float32x4_t xPowPlus = vaddq_f32(xPow, k1e_10f); 13626f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org float32x4_t ef_re = vdivq_f32(ef_re_base, xPowPlus); 13726f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org float32x4_t ef_im = vdivq_f32(ef_im_base, xPowPlus); 13826f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org const float32x4_t ef_re2 = vmulq_f32(ef_re, ef_re); 13926f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org const float32x4_t ef_sum2 = vmlaq_f32(ef_re2, ef_im, ef_im); 14026f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org const float32x4_t absEf = vsqrtq_f32(ef_sum2); 14126f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org const uint32x4_t bigger = vcgtq_f32(absEf, kThresh); 14226f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org const float32x4_t absEfPlus = vaddq_f32(absEf, k1e_10f); 14326f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org const float32x4_t absEfInv = vdivq_f32(kThresh, absEfPlus); 14426f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org uint32x4_t ef_re_if = vreinterpretq_u32_f32(vmulq_f32(ef_re, absEfInv)); 14526f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org uint32x4_t ef_im_if = vreinterpretq_u32_f32(vmulq_f32(ef_im, absEfInv)); 14626f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org uint32x4_t ef_re_u32 = vandq_u32(vmvnq_u32(bigger), 14726f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org vreinterpretq_u32_f32(ef_re)); 14826f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org uint32x4_t ef_im_u32 = vandq_u32(vmvnq_u32(bigger), 14926f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org vreinterpretq_u32_f32(ef_im)); 15026f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org ef_re_if = vandq_u32(bigger, ef_re_if); 15126f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org ef_im_if = vandq_u32(bigger, ef_im_if); 15226f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org ef_re_u32 = vorrq_u32(ef_re_u32, ef_re_if); 15326f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org ef_im_u32 = vorrq_u32(ef_im_u32, ef_im_if); 15426f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org ef_re = vmulq_f32(vreinterpretq_f32_u32(ef_re_u32), kMu); 15526f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org ef_im = vmulq_f32(vreinterpretq_f32_u32(ef_im_u32), kMu); 15626f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org vst1q_f32(&ef[0][i], ef_re); 15726f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org vst1q_f32(&ef[1][i], ef_im); 15826f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org } 15926f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org // scalar code for the remaining items. 16026f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org for (; i < PART_LEN1; i++) { 16126f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org float abs_ef; 16226f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org ef[0][i] /= (aec->xPow[i] + 1e-10f); 16326f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org ef[1][i] /= (aec->xPow[i] + 1e-10f); 16426f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]); 16526f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org 16626f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org if (abs_ef > error_threshold) { 16726f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org abs_ef = error_threshold / (abs_ef + 1e-10f); 16826f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org ef[0][i] *= abs_ef; 16926f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org ef[1][i] *= abs_ef; 17026f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org } 17126f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org 17226f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org // Stepsize factor 17326f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org ef[0][i] *= mu; 17426f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org ef[1][i] *= mu; 17526f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org } 17626f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org} 17726f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org 178685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.orgstatic void FilterAdaptationNEON(AecCore* aec, 179685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org float* fft, 180685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org float ef[2][PART_LEN1]) { 181685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org int i; 182685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org const int num_partitions = aec->num_partitions; 183685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org for (i = 0; i < num_partitions; i++) { 184685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org int xPos = (i + aec->xfBufBlockPos) * PART_LEN1; 185685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org int pos = i * PART_LEN1; 186685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org int j; 187685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org // Check for wrap 188685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org if (i + aec->xfBufBlockPos >= num_partitions) { 189685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org xPos -= num_partitions * PART_LEN1; 190685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org } 191685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org 192685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org // Process the whole array... 193685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org for (j = 0; j < PART_LEN; j += 4) { 194685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org // Load xfBuf and ef. 195685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org const float32x4_t xfBuf_re = vld1q_f32(&aec->xfBuf[0][xPos + j]); 196685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org const float32x4_t xfBuf_im = vld1q_f32(&aec->xfBuf[1][xPos + j]); 197685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org const float32x4_t ef_re = vld1q_f32(&ef[0][j]); 198685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org const float32x4_t ef_im = vld1q_f32(&ef[1][j]); 199685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org // Calculate the product of conjugate(xfBuf) by ef. 200685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org // re(conjugate(a) * b) = aRe * bRe + aIm * bIm 201685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org // im(conjugate(a) * b)= aRe * bIm - aIm * bRe 202685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org const float32x4_t a = vmulq_f32(xfBuf_re, ef_re); 203685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org const float32x4_t e = vmlaq_f32(a, xfBuf_im, ef_im); 204685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org const float32x4_t c = vmulq_f32(xfBuf_re, ef_im); 205685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org const float32x4_t f = vmlsq_f32(c, xfBuf_im, ef_re); 206685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org // Interleave real and imaginary parts. 207685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org const float32x4x2_t g_n_h = vzipq_f32(e, f); 208685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org // Store 209685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org vst1q_f32(&fft[2 * j + 0], g_n_h.val[0]); 210685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org vst1q_f32(&fft[2 * j + 4], g_n_h.val[1]); 211685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org } 212685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org // ... and fixup the first imaginary entry. 213685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN], 214685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org -aec->xfBuf[1][xPos + PART_LEN], 215685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org ef[0][PART_LEN], 216685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org ef[1][PART_LEN]); 217685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org 218685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org aec_rdft_inverse_128(fft); 219685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN); 220685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org 221685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org // fft scaling 222685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org { 223685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org const float scale = 2.0f / PART_LEN2; 224685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org const float32x4_t scale_ps = vmovq_n_f32(scale); 225685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org for (j = 0; j < PART_LEN; j += 4) { 226685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org const float32x4_t fft_ps = vld1q_f32(&fft[j]); 227685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org const float32x4_t fft_scale = vmulq_f32(fft_ps, scale_ps); 228685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org vst1q_f32(&fft[j], fft_scale); 229685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org } 230685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org } 231685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org aec_rdft_forward_128(fft); 232685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org 233685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org { 234685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org const float wt1 = aec->wfBuf[1][pos]; 235685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org aec->wfBuf[0][pos + PART_LEN] += fft[1]; 236685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org for (j = 0; j < PART_LEN; j += 4) { 237685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org float32x4_t wtBuf_re = vld1q_f32(&aec->wfBuf[0][pos + j]); 238685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org float32x4_t wtBuf_im = vld1q_f32(&aec->wfBuf[1][pos + j]); 239685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org const float32x4_t fft0 = vld1q_f32(&fft[2 * j + 0]); 240685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org const float32x4_t fft4 = vld1q_f32(&fft[2 * j + 4]); 241685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org const float32x4x2_t fft_re_im = vuzpq_f32(fft0, fft4); 242685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org wtBuf_re = vaddq_f32(wtBuf_re, fft_re_im.val[0]); 243685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org wtBuf_im = vaddq_f32(wtBuf_im, fft_re_im.val[1]); 244685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org 245685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org vst1q_f32(&aec->wfBuf[0][pos + j], wtBuf_re); 246685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org vst1q_f32(&aec->wfBuf[1][pos + j], wtBuf_im); 247685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org } 248685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org aec->wfBuf[1][pos] = wt1; 249685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org } 250685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org } 251685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org} 252685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org 2539257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.orgstatic float32x4_t vpowq_f32(float32x4_t a, float32x4_t b) { 2549257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // a^b = exp2(b * log2(a)) 2559257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // exp2(x) and log2(x) are calculated using polynomial approximations. 2569257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org float32x4_t log2_a, b_log2_a, a_exp_b; 2579257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org 2589257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // Calculate log2(x), x = a. 2599257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org { 2609257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // To calculate log2(x), we decompose x like this: 2619257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // x = y * 2^n 2629257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // n is an integer 2639257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // y is in the [1.0, 2.0) range 2649257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // 2659257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // log2(x) = log2(y) + n 2669257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // n can be evaluated by playing with float representation. 2679257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // log2(y) in a small range can be approximated, this code uses an order 2689257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // five polynomial approximation. The coefficients have been 2699257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // estimated with the Remez algorithm and the resulting 2709257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // polynomial has a maximum relative error of 0.00086%. 2719257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org 2729257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // Compute n. 2739257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // This is done by masking the exponent, shifting it into the top bit of 2749257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // the mantissa, putting eight into the biased exponent (to shift/ 2759257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // compensate the fact that the exponent has been shifted in the top/ 2769257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // fractional part and finally getting rid of the implicit leading one 2779257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // from the mantissa by substracting it out. 2789257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const uint32x4_t vec_float_exponent_mask = vdupq_n_u32(0x7F800000); 2799257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const uint32x4_t vec_eight_biased_exponent = vdupq_n_u32(0x43800000); 2809257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const uint32x4_t vec_implicit_leading_one = vdupq_n_u32(0x43BF8000); 2819257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const uint32x4_t two_n = vandq_u32(vreinterpretq_u32_f32(a), 2829257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org vec_float_exponent_mask); 2839257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const uint32x4_t n_1 = vshrq_n_u32(two_n, kShiftExponentIntoTopMantissa); 2849257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const uint32x4_t n_0 = vorrq_u32(n_1, vec_eight_biased_exponent); 2859257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const float32x4_t n = 2869257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org vsubq_f32(vreinterpretq_f32_u32(n_0), 2879257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org vreinterpretq_f32_u32(vec_implicit_leading_one)); 2889257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // Compute y. 2899257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const uint32x4_t vec_mantissa_mask = vdupq_n_u32(0x007FFFFF); 2909257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const uint32x4_t vec_zero_biased_exponent_is_one = vdupq_n_u32(0x3F800000); 2919257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const uint32x4_t mantissa = vandq_u32(vreinterpretq_u32_f32(a), 2929257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org vec_mantissa_mask); 2939257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const float32x4_t y = 2949257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org vreinterpretq_f32_u32(vorrq_u32(mantissa, 2959257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org vec_zero_biased_exponent_is_one)); 2969257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // Approximate log2(y) ~= (y - 1) * pol5(y). 2979257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // pol5(y) = C5 * y^5 + C4 * y^4 + C3 * y^3 + C2 * y^2 + C1 * y + C0 2989257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const float32x4_t C5 = vdupq_n_f32(-3.4436006e-2f); 2999257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const float32x4_t C4 = vdupq_n_f32(3.1821337e-1f); 3009257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const float32x4_t C3 = vdupq_n_f32(-1.2315303f); 3019257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const float32x4_t C2 = vdupq_n_f32(2.5988452f); 3029257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const float32x4_t C1 = vdupq_n_f32(-3.3241990f); 3039257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const float32x4_t C0 = vdupq_n_f32(3.1157899f); 3049257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org float32x4_t pol5_y = C5; 3059257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org pol5_y = vmlaq_f32(C4, y, pol5_y); 3069257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org pol5_y = vmlaq_f32(C3, y, pol5_y); 3079257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org pol5_y = vmlaq_f32(C2, y, pol5_y); 3089257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org pol5_y = vmlaq_f32(C1, y, pol5_y); 3099257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org pol5_y = vmlaq_f32(C0, y, pol5_y); 3109257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const float32x4_t y_minus_one = 3119257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org vsubq_f32(y, vreinterpretq_f32_u32(vec_zero_biased_exponent_is_one)); 3129257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const float32x4_t log2_y = vmulq_f32(y_minus_one, pol5_y); 3139257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org 3149257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // Combine parts. 3159257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org log2_a = vaddq_f32(n, log2_y); 3169257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org } 3179257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org 3189257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // b * log2(a) 3199257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org b_log2_a = vmulq_f32(b, log2_a); 3209257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org 3219257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // Calculate exp2(x), x = b * log2(a). 3229257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org { 3239257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // To calculate 2^x, we decompose x like this: 3249257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // x = n + y 3259257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // n is an integer, the value of x - 0.5 rounded down, therefore 3269257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // y is in the [0.5, 1.5) range 3279257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // 3289257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // 2^x = 2^n * 2^y 3299257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // 2^n can be evaluated by playing with float representation. 3309257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // 2^y in a small range can be approximated, this code uses an order two 3319257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // polynomial approximation. The coefficients have been estimated 3329257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // with the Remez algorithm and the resulting polynomial has a 3339257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // maximum relative error of 0.17%. 3349257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // To avoid over/underflow, we reduce the range of input to ]-127, 129]. 3359257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const float32x4_t max_input = vdupq_n_f32(129.f); 3369257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const float32x4_t min_input = vdupq_n_f32(-126.99999f); 3379257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const float32x4_t x_min = vminq_f32(b_log2_a, max_input); 3389257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const float32x4_t x_max = vmaxq_f32(x_min, min_input); 3399257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // Compute n. 3409257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const float32x4_t half = vdupq_n_f32(0.5f); 3419257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const float32x4_t x_minus_half = vsubq_f32(x_max, half); 3429257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const int32x4_t x_minus_half_floor = vcvtq_s32_f32(x_minus_half); 3439257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org 3449257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // Compute 2^n. 3459257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const int32x4_t float_exponent_bias = vdupq_n_s32(127); 3469257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const int32x4_t two_n_exponent = 3479257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org vaddq_s32(x_minus_half_floor, float_exponent_bias); 3489257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const float32x4_t two_n = 3499257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org vreinterpretq_f32_s32(vshlq_n_s32(two_n_exponent, kFloatExponentShift)); 3509257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // Compute y. 3519257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const float32x4_t y = vsubq_f32(x_max, vcvtq_f32_s32(x_minus_half_floor)); 3529257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org 3539257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // Approximate 2^y ~= C2 * y^2 + C1 * y + C0. 3549257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const float32x4_t C2 = vdupq_n_f32(3.3718944e-1f); 3559257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const float32x4_t C1 = vdupq_n_f32(6.5763628e-1f); 3569257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const float32x4_t C0 = vdupq_n_f32(1.0017247f); 3579257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org float32x4_t exp2_y = C2; 3589257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org exp2_y = vmlaq_f32(C1, y, exp2_y); 3599257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org exp2_y = vmlaq_f32(C0, y, exp2_y); 3609257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org 3619257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // Combine parts. 3629257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org a_exp_b = vmulq_f32(exp2_y, two_n); 3639257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org } 3649257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org 3659257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org return a_exp_b; 3669257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org} 3679257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org 3689257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.orgstatic void OverdriveAndSuppressNEON(AecCore* aec, 3699257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org float hNl[PART_LEN1], 3709257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const float hNlFb, 3719257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org float efw[2][PART_LEN1]) { 3729257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org int i; 3739257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const float32x4_t vec_hNlFb = vmovq_n_f32(hNlFb); 3749257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const float32x4_t vec_one = vdupq_n_f32(1.0f); 3759257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const float32x4_t vec_minus_one = vdupq_n_f32(-1.0f); 3769257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const float32x4_t vec_overDriveSm = vmovq_n_f32(aec->overDriveSm); 3779257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org 3789257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // vectorized code (four at once) 3799257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org for (i = 0; i + 3 < PART_LEN1; i += 4) { 3809257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // Weight subbands 3819257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org float32x4_t vec_hNl = vld1q_f32(&hNl[i]); 3829257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const float32x4_t vec_weightCurve = vld1q_f32(&WebRtcAec_weightCurve[i]); 3839257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const uint32x4_t bigger = vcgtq_f32(vec_hNl, vec_hNlFb); 3849257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const float32x4_t vec_weightCurve_hNlFb = vmulq_f32(vec_weightCurve, 3859257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org vec_hNlFb); 3869257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const float32x4_t vec_one_weightCurve = vsubq_f32(vec_one, vec_weightCurve); 3879257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const float32x4_t vec_one_weightCurve_hNl = vmulq_f32(vec_one_weightCurve, 3889257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org vec_hNl); 3899257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const uint32x4_t vec_if0 = vandq_u32(vmvnq_u32(bigger), 3909257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org vreinterpretq_u32_f32(vec_hNl)); 3919257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const float32x4_t vec_one_weightCurve_add = 3929257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org vaddq_f32(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl); 3939257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const uint32x4_t vec_if1 = 3949257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org vandq_u32(bigger, vreinterpretq_u32_f32(vec_one_weightCurve_add)); 3959257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org 3969257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org vec_hNl = vreinterpretq_f32_u32(vorrq_u32(vec_if0, vec_if1)); 3979257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org 3989257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org { 3999257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const float32x4_t vec_overDriveCurve = 4009257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org vld1q_f32(&WebRtcAec_overDriveCurve[i]); 4019257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org const float32x4_t vec_overDriveSm_overDriveCurve = 4029257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org vmulq_f32(vec_overDriveSm, vec_overDriveCurve); 4039257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org vec_hNl = vpowq_f32(vec_hNl, vec_overDriveSm_overDriveCurve); 4049257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org vst1q_f32(&hNl[i], vec_hNl); 4059257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org } 4069257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org 4079257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // Suppress error signal 4089257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org { 4099257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org float32x4_t vec_efw_re = vld1q_f32(&efw[0][i]); 4109257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org float32x4_t vec_efw_im = vld1q_f32(&efw[1][i]); 4119257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org vec_efw_re = vmulq_f32(vec_efw_re, vec_hNl); 4129257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org vec_efw_im = vmulq_f32(vec_efw_im, vec_hNl); 4139257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org 4149257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // Ooura fft returns incorrect sign on imaginary component. It matters 4159257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // here because we are making an additive change with comfort noise. 4169257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org vec_efw_im = vmulq_f32(vec_efw_im, vec_minus_one); 4179257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org vst1q_f32(&efw[0][i], vec_efw_re); 4189257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org vst1q_f32(&efw[1][i], vec_efw_im); 4199257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org } 4209257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org } 4219257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org 4229257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // scalar code for the remaining items. 4239257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org for (; i < PART_LEN1; i++) { 4249257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // Weight subbands 4259257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org if (hNl[i] > hNlFb) { 4269257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org hNl[i] = WebRtcAec_weightCurve[i] * hNlFb + 4279257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org (1 - WebRtcAec_weightCurve[i]) * hNl[i]; 4289257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org } 4299257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org 4309257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]); 4319257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org 4329257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // Suppress error signal 4339257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org efw[0][i] *= hNl[i]; 4349257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org efw[1][i] *= hNl[i]; 4359257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org 4369257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // Ooura fft returns incorrect sign on imaginary component. It matters 4379257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org // here because we are making an additive change with comfort noise. 4389257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org efw[1][i] *= -1; 4399257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org } 4409257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org} 4419257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org 44231ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.orgstatic int PartitionDelay(const AecCore* aec) { 44331ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org // Measures the energy in each filter partition and returns the partition with 44431ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org // highest energy. 44531ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org // TODO(bjornv): Spread computational cost by computing one partition per 44631ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org // block? 44731ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org float wfEnMax = 0; 44831ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org int i; 44931ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org int delay = 0; 45031ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org 45131ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org for (i = 0; i < aec->num_partitions; i++) { 45231ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org int j; 45331ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org int pos = i * PART_LEN1; 45431ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org float wfEn = 0; 45531ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org float32x4_t vec_wfEn = vdupq_n_f32(0.0f); 45631ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org // vectorized code (four at once) 45731ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org for (j = 0; j + 3 < PART_LEN1; j += 4) { 45831ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org const float32x4_t vec_wfBuf0 = vld1q_f32(&aec->wfBuf[0][pos + j]); 45931ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org const float32x4_t vec_wfBuf1 = vld1q_f32(&aec->wfBuf[1][pos + j]); 46031ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vec_wfEn = vmlaq_f32(vec_wfEn, vec_wfBuf0, vec_wfBuf0); 46131ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vec_wfEn = vmlaq_f32(vec_wfEn, vec_wfBuf1, vec_wfBuf1); 46231ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org } 46331ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org { 46431ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org float32x2_t vec_total; 46531ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org // A B C D 46631ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vec_total = vpadd_f32(vget_low_f32(vec_wfEn), vget_high_f32(vec_wfEn)); 46731ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org // A+B C+D 46831ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vec_total = vpadd_f32(vec_total, vec_total); 46931ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org // A+B+C+D A+B+C+D 47031ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org wfEn = vget_lane_f32(vec_total, 0); 47131ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org } 47231ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org 47331ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org // scalar code for the remaining items. 47431ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org for (; j < PART_LEN1; j++) { 47531ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] + 47631ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j]; 47731ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org } 47831ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org 47931ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org if (wfEn > wfEnMax) { 48031ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org wfEnMax = wfEn; 48131ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org delay = i; 48231ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org } 48331ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org } 48431ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org return delay; 48531ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org} 48631ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org 48731ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org// Updates the following smoothed Power Spectral Densities (PSD): 48831ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org// - sd : near-end 48931ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org// - se : residual echo 49031ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org// - sx : far-end 49131ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org// - sde : cross-PSD of near-end and residual echo 49231ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org// - sxd : cross-PSD of near-end and far-end 49331ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org// 49431ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org// In addition to updating the PSDs, also the filter diverge state is determined 49531ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org// upon actions are taken. 49631ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.orgstatic void SmoothedPSD(AecCore* aec, 49731ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org float efw[2][PART_LEN1], 49831ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org float dfw[2][PART_LEN1], 49931ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org float xfw[2][PART_LEN1]) { 50031ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org // Power estimate smoothing coefficients. 50131ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org const float* ptrGCoh = aec->extended_filter_enabled 50231ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org ? WebRtcAec_kExtendedSmoothingCoefficients[aec->mult - 1] 50331ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org : WebRtcAec_kNormalSmoothingCoefficients[aec->mult - 1]; 50431ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org int i; 50531ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org float sdSum = 0, seSum = 0; 50631ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org const float32x4_t vec_15 = vdupq_n_f32(WebRtcAec_kMinFarendPSD); 50731ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org float32x4_t vec_sdSum = vdupq_n_f32(0.0f); 50831ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org float32x4_t vec_seSum = vdupq_n_f32(0.0f); 50931ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org 51031ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org for (i = 0; i + 3 < PART_LEN1; i += 4) { 51131ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org const float32x4_t vec_dfw0 = vld1q_f32(&dfw[0][i]); 51231ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org const float32x4_t vec_dfw1 = vld1q_f32(&dfw[1][i]); 51331ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org const float32x4_t vec_efw0 = vld1q_f32(&efw[0][i]); 51431ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org const float32x4_t vec_efw1 = vld1q_f32(&efw[1][i]); 51531ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org const float32x4_t vec_xfw0 = vld1q_f32(&xfw[0][i]); 51631ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org const float32x4_t vec_xfw1 = vld1q_f32(&xfw[1][i]); 51731ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org float32x4_t vec_sd = vmulq_n_f32(vld1q_f32(&aec->sd[i]), ptrGCoh[0]); 51831ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org float32x4_t vec_se = vmulq_n_f32(vld1q_f32(&aec->se[i]), ptrGCoh[0]); 51931ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org float32x4_t vec_sx = vmulq_n_f32(vld1q_f32(&aec->sx[i]), ptrGCoh[0]); 52031ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org float32x4_t vec_dfw_sumsq = vmulq_f32(vec_dfw0, vec_dfw0); 52131ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org float32x4_t vec_efw_sumsq = vmulq_f32(vec_efw0, vec_efw0); 52231ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org float32x4_t vec_xfw_sumsq = vmulq_f32(vec_xfw0, vec_xfw0); 52331ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org 52431ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vec_dfw_sumsq = vmlaq_f32(vec_dfw_sumsq, vec_dfw1, vec_dfw1); 52531ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vec_efw_sumsq = vmlaq_f32(vec_efw_sumsq, vec_efw1, vec_efw1); 52631ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vec_xfw_sumsq = vmlaq_f32(vec_xfw_sumsq, vec_xfw1, vec_xfw1); 52731ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vec_xfw_sumsq = vmaxq_f32(vec_xfw_sumsq, vec_15); 52831ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vec_sd = vmlaq_n_f32(vec_sd, vec_dfw_sumsq, ptrGCoh[1]); 52931ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vec_se = vmlaq_n_f32(vec_se, vec_efw_sumsq, ptrGCoh[1]); 53031ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vec_sx = vmlaq_n_f32(vec_sx, vec_xfw_sumsq, ptrGCoh[1]); 53131ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org 53231ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vst1q_f32(&aec->sd[i], vec_sd); 53331ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vst1q_f32(&aec->se[i], vec_se); 53431ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vst1q_f32(&aec->sx[i], vec_sx); 53531ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org 53631ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org { 53731ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org float32x4x2_t vec_sde = vld2q_f32(&aec->sde[i][0]); 53831ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org float32x4_t vec_dfwefw0011 = vmulq_f32(vec_dfw0, vec_efw0); 53931ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org float32x4_t vec_dfwefw0110 = vmulq_f32(vec_dfw0, vec_efw1); 54031ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vec_sde.val[0] = vmulq_n_f32(vec_sde.val[0], ptrGCoh[0]); 54131ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vec_sde.val[1] = vmulq_n_f32(vec_sde.val[1], ptrGCoh[0]); 54231ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vec_dfwefw0011 = vmlaq_f32(vec_dfwefw0011, vec_dfw1, vec_efw1); 54331ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vec_dfwefw0110 = vmlsq_f32(vec_dfwefw0110, vec_dfw1, vec_efw0); 54431ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vec_sde.val[0] = vmlaq_n_f32(vec_sde.val[0], vec_dfwefw0011, ptrGCoh[1]); 54531ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vec_sde.val[1] = vmlaq_n_f32(vec_sde.val[1], vec_dfwefw0110, ptrGCoh[1]); 54631ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vst2q_f32(&aec->sde[i][0], vec_sde); 54731ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org } 54831ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org 54931ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org { 55031ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org float32x4x2_t vec_sxd = vld2q_f32(&aec->sxd[i][0]); 55131ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org float32x4_t vec_dfwxfw0011 = vmulq_f32(vec_dfw0, vec_xfw0); 55231ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org float32x4_t vec_dfwxfw0110 = vmulq_f32(vec_dfw0, vec_xfw1); 55331ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vec_sxd.val[0] = vmulq_n_f32(vec_sxd.val[0], ptrGCoh[0]); 55431ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vec_sxd.val[1] = vmulq_n_f32(vec_sxd.val[1], ptrGCoh[0]); 55531ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vec_dfwxfw0011 = vmlaq_f32(vec_dfwxfw0011, vec_dfw1, vec_xfw1); 55631ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vec_dfwxfw0110 = vmlsq_f32(vec_dfwxfw0110, vec_dfw1, vec_xfw0); 55731ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vec_sxd.val[0] = vmlaq_n_f32(vec_sxd.val[0], vec_dfwxfw0011, ptrGCoh[1]); 55831ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vec_sxd.val[1] = vmlaq_n_f32(vec_sxd.val[1], vec_dfwxfw0110, ptrGCoh[1]); 55931ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vst2q_f32(&aec->sxd[i][0], vec_sxd); 56031ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org } 56131ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org 56231ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vec_sdSum = vaddq_f32(vec_sdSum, vec_sd); 56331ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vec_seSum = vaddq_f32(vec_seSum, vec_se); 56431ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org } 56531ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org { 56631ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org float32x2_t vec_sdSum_total; 56731ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org float32x2_t vec_seSum_total; 56831ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org // A B C D 56931ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vec_sdSum_total = vpadd_f32(vget_low_f32(vec_sdSum), 57031ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vget_high_f32(vec_sdSum)); 57131ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vec_seSum_total = vpadd_f32(vget_low_f32(vec_seSum), 57231ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vget_high_f32(vec_seSum)); 57331ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org // A+B C+D 57431ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vec_sdSum_total = vpadd_f32(vec_sdSum_total, vec_sdSum_total); 57531ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vec_seSum_total = vpadd_f32(vec_seSum_total, vec_seSum_total); 57631ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org // A+B+C+D A+B+C+D 57731ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org sdSum = vget_lane_f32(vec_sdSum_total, 0); 57831ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org seSum = vget_lane_f32(vec_seSum_total, 0); 57931ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org } 58031ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org 58131ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org // scalar code for the remaining items. 58231ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org for (; i < PART_LEN1; i++) { 58331ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org aec->sd[i] = ptrGCoh[0] * aec->sd[i] + 58431ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]); 58531ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org aec->se[i] = ptrGCoh[0] * aec->se[i] + 58631ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]); 58731ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org // We threshold here to protect against the ill-effects of a zero farend. 58831ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org // The threshold is not arbitrarily chosen, but balances protection and 58931ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org // adverse interaction with the algorithm's tuning. 59031ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org // TODO(bjornv): investigate further why this is so sensitive. 59131ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org aec->sx[i] = 59231ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org ptrGCoh[0] * aec->sx[i] + 59331ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org ptrGCoh[1] * WEBRTC_SPL_MAX( 59431ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i], 59531ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org WebRtcAec_kMinFarendPSD); 59631ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org 59731ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org aec->sde[i][0] = 59831ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org ptrGCoh[0] * aec->sde[i][0] + 59931ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]); 60031ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org aec->sde[i][1] = 60131ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org ptrGCoh[0] * aec->sde[i][1] + 60231ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]); 60331ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org 60431ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org aec->sxd[i][0] = 60531ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org ptrGCoh[0] * aec->sxd[i][0] + 60631ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]); 60731ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org aec->sxd[i][1] = 60831ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org ptrGCoh[0] * aec->sxd[i][1] + 60931ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]); 61031ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org 61131ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org sdSum += aec->sd[i]; 61231ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org seSum += aec->se[i]; 61331ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org } 61431ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org 61531ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org // Divergent filter safeguard. 61631ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum; 61731ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org 61831ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org if (aec->divergeState) 61931ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1); 62031ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org 62131ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org // Reset if error is significantly larger than nearend (13 dB). 62231ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org if (!aec->extended_filter_enabled && seSum > (19.95f * sdSum)) 62331ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org memset(aec->wfBuf, 0, sizeof(aec->wfBuf)); 62431ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org} 62531ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org 62631ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org// Window time domain data to be used by the fft. 62731ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org__inline static void WindowData(float* x_windowed, const float* x) { 62831ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org int i; 62931ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org for (i = 0; i < PART_LEN; i += 4) { 63031ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org const float32x4_t vec_Buf1 = vld1q_f32(&x[i]); 63131ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org const float32x4_t vec_Buf2 = vld1q_f32(&x[PART_LEN + i]); 63231ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org const float32x4_t vec_sqrtHanning = vld1q_f32(&WebRtcAec_sqrtHanning[i]); 63331ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org // A B C D 63431ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org float32x4_t vec_sqrtHanning_rev = 63531ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vld1q_f32(&WebRtcAec_sqrtHanning[PART_LEN - i - 3]); 63631ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org // B A D C 63731ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vec_sqrtHanning_rev = vrev64q_f32(vec_sqrtHanning_rev); 63831ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org // D C B A 63931ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vec_sqrtHanning_rev = vcombine_f32(vget_high_f32(vec_sqrtHanning_rev), 64031ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vget_low_f32(vec_sqrtHanning_rev)); 64131ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vst1q_f32(&x_windowed[i], vmulq_f32(vec_Buf1, vec_sqrtHanning)); 64231ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vst1q_f32(&x_windowed[PART_LEN + i], 64331ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vmulq_f32(vec_Buf2, vec_sqrtHanning_rev)); 64431ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org } 64531ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org} 64631ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org 64731ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org// Puts fft output data into a complex valued array. 64831ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org__inline static void StoreAsComplex(const float* data, 64931ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org float data_complex[2][PART_LEN1]) { 65031ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org int i; 65131ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org for (i = 0; i < PART_LEN; i += 4) { 65231ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org const float32x4x2_t vec_data = vld2q_f32(&data[2 * i]); 65331ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vst1q_f32(&data_complex[0][i], vec_data.val[0]); 65431ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vst1q_f32(&data_complex[1][i], vec_data.val[1]); 65531ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org } 65631ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org // fix beginning/end values 65731ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org data_complex[1][0] = 0; 65831ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org data_complex[1][PART_LEN] = 0; 65931ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org data_complex[0][0] = data[0]; 66031ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org data_complex[0][PART_LEN] = data[1]; 66131ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org} 66231ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org 66331ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.orgstatic void SubbandCoherenceNEON(AecCore* aec, 66431ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org float efw[2][PART_LEN1], 66531ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org float xfw[2][PART_LEN1], 66631ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org float* fft, 66731ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org float* cohde, 66831ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org float* cohxd) { 66931ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org float dfw[2][PART_LEN1]; 67031ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org int i; 67131ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org 67231ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org if (aec->delayEstCtr == 0) 67331ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org aec->delayIdx = PartitionDelay(aec); 67431ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org 67531ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org // Use delayed far. 67631ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org memcpy(xfw, 67731ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org aec->xfwBuf + aec->delayIdx * PART_LEN1, 67831ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org sizeof(xfw[0][0]) * 2 * PART_LEN1); 67931ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org 68031ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org // Windowed near fft 68131ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org WindowData(fft, aec->dBuf); 68231ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org aec_rdft_forward_128(fft); 68331ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org StoreAsComplex(fft, dfw); 68431ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org 68531ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org // Windowed error fft 68631ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org WindowData(fft, aec->eBuf); 68731ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org aec_rdft_forward_128(fft); 68831ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org StoreAsComplex(fft, efw); 68931ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org 69031ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org SmoothedPSD(aec, efw, dfw, xfw); 69131ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org 69231ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org { 69331ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org const float32x4_t vec_1eminus10 = vdupq_n_f32(1e-10f); 69431ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org 69531ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org // Subband coherence 69631ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org for (i = 0; i + 3 < PART_LEN1; i += 4) { 69731ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org const float32x4_t vec_sd = vld1q_f32(&aec->sd[i]); 69831ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org const float32x4_t vec_se = vld1q_f32(&aec->se[i]); 69931ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org const float32x4_t vec_sx = vld1q_f32(&aec->sx[i]); 70031ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org const float32x4_t vec_sdse = vmlaq_f32(vec_1eminus10, vec_sd, vec_se); 70131ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org const float32x4_t vec_sdsx = vmlaq_f32(vec_1eminus10, vec_sd, vec_sx); 70231ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org float32x4x2_t vec_sde = vld2q_f32(&aec->sde[i][0]); 70331ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org float32x4x2_t vec_sxd = vld2q_f32(&aec->sxd[i][0]); 70431ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org float32x4_t vec_cohde = vmulq_f32(vec_sde.val[0], vec_sde.val[0]); 70531ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org float32x4_t vec_cohxd = vmulq_f32(vec_sxd.val[0], vec_sxd.val[0]); 70631ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vec_cohde = vmlaq_f32(vec_cohde, vec_sde.val[1], vec_sde.val[1]); 70731ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vec_cohde = vdivq_f32(vec_cohde, vec_sdse); 70831ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vec_cohxd = vmlaq_f32(vec_cohxd, vec_sxd.val[1], vec_sxd.val[1]); 70931ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vec_cohxd = vdivq_f32(vec_cohxd, vec_sdsx); 71031ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org 71131ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vst1q_f32(&cohde[i], vec_cohde); 71231ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org vst1q_f32(&cohxd[i], vec_cohxd); 71331ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org } 71431ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org } 71531ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org // scalar code for the remaining items. 71631ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org for (; i < PART_LEN1; i++) { 71731ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org cohde[i] = 71831ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org (aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) / 71931ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org (aec->sd[i] * aec->se[i] + 1e-10f); 72031ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org cohxd[i] = 72131ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org (aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) / 72231ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org (aec->sx[i] * aec->sd[i] + 1e-10f); 72331ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org } 72431ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org} 72531ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org 7269257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.orgvoid WebRtcAec_InitAec_neon(void) { 727ccbe08ee89f2997fa987ea29b0e097b57501721ebjornv@webrtc.org WebRtcAec_FilterFar = FilterFarNEON; 72826f68fe36316141f949c723088540e055b95fb98bjornv@webrtc.org WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON; 729685eb96adec550115e494008b9cec03154ba442dbjornv@webrtc.org WebRtcAec_FilterAdaptation = FilterAdaptationNEON; 7309257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON; 73131ab61cf47b1a086aea0952054e09dd47856bf87bjornv@webrtc.org WebRtcAec_SubbandCoherence = SubbandCoherenceNEON; 7329257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org} 7339257c64588ec1b396299ca1036267f1b38f969febjornv@webrtc.org 734