16c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org/*
26c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
36c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org *
46c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org *  Use of this source code is governed by a BSD-style license
56c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org *  that can be found in the LICENSE file in the root of the source
66c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org *  tree. An additional intellectual property rights grant can be found
76c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org *  in the file PATENTS.  All contributing project authors may
86c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org *  be found in the AUTHORS file in the root of the source tree.
96c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org */
106c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org
116c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org// Contains a function for WebRtcIsacfix_AllpassFilter2FixDec16Neon()
126c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org// in iSAC codec, optimized for ARM Neon platform. Bit exact with function
136c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org// WebRtcIsacfix_AllpassFilter2FixDec16C() in filterbanks.c. Prototype
146c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org// C code is at end of this file.
156c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org
166c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org#include <arm_neon.h>
176c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org#include <assert.h>
186c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org
196c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.orgvoid WebRtcIsacfix_AllpassFilter2FixDec16Neon(
206c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org    int16_t* data_ch1,  // Input and output in channel 1, in Q0
216c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org    int16_t* data_ch2,  // Input and output in channel 2, in Q0
226c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org    const int16_t* factor_ch1,  // Scaling factor for channel 1, in Q15
236c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org    const int16_t* factor_ch2,  // Scaling factor for channel 2, in Q15
246c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org    const int length,  // Length of the data buffers
256c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org    int32_t* filter_state_ch1,  // Filter state for channel 1, in Q16
266c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org    int32_t* filter_state_ch2) {  // Filter state for channel 2, in Q16
276c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  assert(length % 2 == 0);
286c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  int n = 0;
296c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  int16x4_t factorv;
306c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  int16x4_t datav;
316c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  int32x4_t statev;
326c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org
336c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  // Load factor_ch1 and factor_ch2.
3401bbe3eb8ca29d5d1ce3b59d02532a3d9fd35e24Zhongwei Yao  factorv = vld1_dup_s16(factor_ch1);
3501bbe3eb8ca29d5d1ce3b59d02532a3d9fd35e24Zhongwei Yao  factorv = vld1_lane_s16(factor_ch1 + 1, factorv, 1);
3601bbe3eb8ca29d5d1ce3b59d02532a3d9fd35e24Zhongwei Yao  factorv = vld1_lane_s16(factor_ch2, factorv, 2);
3701bbe3eb8ca29d5d1ce3b59d02532a3d9fd35e24Zhongwei Yao  factorv = vld1_lane_s16(factor_ch2 + 1, factorv, 3);
3801bbe3eb8ca29d5d1ce3b59d02532a3d9fd35e24Zhongwei Yao
396c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  // Load filter_state_ch1[0] and filter_state_ch2[0].
406c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  statev = vld1q_dup_s32(filter_state_ch1);
416c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  statev = vld1q_lane_s32(filter_state_ch2, statev, 2);
426c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org
436c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  // Loop unrolling preprocessing.
446c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  int32x4_t a;
456c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  int16x4_t tmp1, tmp2;
466c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org
476c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  // Load data_ch1[0] and data_ch2[0].
486c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  datav = vld1_dup_s16(data_ch1);
496c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  datav = vld1_lane_s16(data_ch2, datav, 2);
506c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org
516c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  a = vqdmlal_s16(statev, datav, factorv);
526c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  tmp1 = vshrn_n_s32(a, 16);
536c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org
546c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  // Update filter_state_ch1[0] and filter_state_ch2[0].
556c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  statev = vqdmlsl_s16(vshll_n_s16(datav, 16), tmp1, factorv);
566c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org
576c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  // Load filter_state_ch1[1] and filter_state_ch2[1].
586c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  statev = vld1q_lane_s32(filter_state_ch1 + 1, statev, 1);
596c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  statev = vld1q_lane_s32(filter_state_ch2 + 1, statev, 3);
606c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org
616c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  // Load data_ch1[1] and data_ch2[1].
626c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  tmp1 = vld1_lane_s16(data_ch1 + 1, tmp1, 1);
636c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  tmp1 = vld1_lane_s16(data_ch2 + 1, tmp1, 3);
646c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  datav = vrev32_s16(tmp1);
656c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org
666c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  // Loop unrolling processing.
676c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  for (n = 0; n < length - 2; n += 2) {
686c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org    a = vqdmlal_s16(statev, datav, factorv);
696c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org    tmp1 = vshrn_n_s32(a, 16);
706c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org    // Store data_ch1[n] and data_ch2[n].
716c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org    vst1_lane_s16(data_ch1 + n, tmp1, 1);
726c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org    vst1_lane_s16(data_ch2 + n, tmp1, 3);
736c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org
746c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org    // Update filter_state_ch1[0], filter_state_ch1[1]
756c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org    // and filter_state_ch2[0], filter_state_ch2[1].
766c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org    statev = vqdmlsl_s16(vshll_n_s16(datav, 16), tmp1, factorv);
776c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org
786c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org    // Load data_ch1[n + 2] and data_ch2[n + 2].
796c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org    tmp1 = vld1_lane_s16(data_ch1 + n + 2, tmp1, 1);
806c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org    tmp1 = vld1_lane_s16(data_ch2 + n + 2, tmp1, 3);
816c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org    datav = vrev32_s16(tmp1);
826c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org
836c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org    a = vqdmlal_s16(statev, datav, factorv);
846c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org    tmp2 = vshrn_n_s32(a, 16);
856c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org    // Store data_ch1[n + 1] and data_ch2[n + 1].
866c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org    vst1_lane_s16(data_ch1 + n + 1, tmp2, 1);
876c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org    vst1_lane_s16(data_ch2 + n + 1, tmp2, 3);
886c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org
896c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org    // Update filter_state_ch1[0], filter_state_ch1[1]
906c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org    // and filter_state_ch2[0], filter_state_ch2[1].
916c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org    statev = vqdmlsl_s16(vshll_n_s16(datav, 16), tmp2, factorv);
926c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org
936c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org    // Load data_ch1[n + 3] and data_ch2[n + 3].
946c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org    tmp2 = vld1_lane_s16(data_ch1 + n + 3, tmp2, 1);
956c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org    tmp2 = vld1_lane_s16(data_ch2 + n + 3, tmp2, 3);
966c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org    datav = vrev32_s16(tmp2);
976c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  }
986c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org
996c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  // Loop unrolling post-processing.
1006c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  a = vqdmlal_s16(statev, datav, factorv);
1016c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  tmp1 = vshrn_n_s32(a, 16);
1026c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  // Store data_ch1[n] and data_ch2[n].
1036c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  vst1_lane_s16(data_ch1 + n, tmp1, 1);
1046c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  vst1_lane_s16(data_ch2 + n, tmp1, 3);
1056c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org
1066c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  // Update filter_state_ch1[0], filter_state_ch1[1]
1076c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  // and filter_state_ch2[0], filter_state_ch2[1].
1086c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  statev = vqdmlsl_s16(vshll_n_s16(datav, 16), tmp1, factorv);
1096c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  // Store filter_state_ch1[0] and filter_state_ch2[0].
1106c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  vst1q_lane_s32(filter_state_ch1, statev, 0);
1116c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  vst1q_lane_s32(filter_state_ch2, statev, 2);
1126c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org
1136c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  datav = vrev32_s16(tmp1);
1146c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  a = vqdmlal_s16(statev, datav, factorv);
1156c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  tmp2 = vshrn_n_s32(a, 16);
1166c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  // Store data_ch1[n + 1] and data_ch2[n + 1].
1176c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  vst1_lane_s16(data_ch1 + n + 1, tmp2, 1);
1186c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  vst1_lane_s16(data_ch2 + n + 1, tmp2, 3);
1196c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org
1206c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  // Update filter_state_ch1[1] and filter_state_ch2[1].
1216c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  statev = vqdmlsl_s16(vshll_n_s16(datav, 16), tmp2, factorv);
1226c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  // Store filter_state_ch1[1] and filter_state_ch2[1].
1236c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  vst1q_lane_s32(filter_state_ch1 + 1, statev, 1);
1246c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org  vst1q_lane_s32(filter_state_ch2 + 1, statev, 3);
1256c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org}
1266c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org
1276c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org// This function is the prototype for above neon optimized function.
1286c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//void AllpassFilter2FixDec16BothChannels(
1296c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//    int16_t *data_ch1,  // Input and output in channel 1, in Q0
1306c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//    int16_t *data_ch2,  // Input and output in channel 2, in Q0
1316c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//    const int16_t *factor_ch1,  // Scaling factor for channel 1, in Q15
1326c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//    const int16_t *factor_ch2,  // Scaling factor for channel 2, in Q15
1336c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//    const int length,  // Length of the data buffers
1346c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//    int32_t *filter_state_ch1,  // Filter state for channel 1, in Q16
1356c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//    int32_t *filter_state_ch2) {  // Filter state for channel 2, in Q16
1366c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  int n = 0;
1376c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  int32_t state0_ch1 = filter_state_ch1[0], state1_ch1 = filter_state_ch1[1];
1386c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  int32_t state0_ch2 = filter_state_ch2[0], state1_ch2 = filter_state_ch2[1];
1396c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  int16_t sample0_ch1 = 0, sample0_ch2 = 0;
1406c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  int16_t sample1_ch1 = 0, sample1_ch2  = 0;
1416c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  int32_t a0_ch1 = 0, a0_ch2 = 0;
1426c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  int32_t b0_ch1 = 0, b0_ch2 = 0;
1436c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//
1446c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  int32_t a1_ch1 = 0, a1_ch2 = 0;
1456c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  int32_t b1_ch1 = 0, b1_ch2 = 0;
1466c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  int32_t b2_ch1  = 0, b2_ch2 = 0;
1476c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//
1486c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  // Loop unrolling preprocessing.
1496c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//
1506c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  sample0_ch1 = data_ch1[n];
1516c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  sample0_ch2 = data_ch2[n];
1526c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//
153bc2bb34419021fb3be4f87eb062cf0a9ed52675abjornv@webrtc.org//  a0_ch1 = (factor_ch1[0] * sample0_ch1) << 1;
154bc2bb34419021fb3be4f87eb062cf0a9ed52675abjornv@webrtc.org//  a0_ch2 = (factor_ch2[0] * sample0_ch2) << 1;
1556c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//
1566c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  b0_ch1 = WebRtcSpl_AddSatW32(a0_ch1, state0_ch1);
1576c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  b0_ch2 = WebRtcSpl_AddSatW32(a0_ch2, state0_ch2); //Q16+Q16=Q16
1586c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//
159bc2bb34419021fb3be4f87eb062cf0a9ed52675abjornv@webrtc.org//  a0_ch1 = -factor_ch1[0] * (int16_t)(b0_ch1 >> 16);
160bc2bb34419021fb3be4f87eb062cf0a9ed52675abjornv@webrtc.org//  a0_ch2 = -factor_ch2[0] * (int16_t)(b0_ch2 >> 16);
1616c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//
1626c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  state0_ch1 = WebRtcSpl_AddSatW32(a0_ch1 <<1, (uint32_t)sample0_ch1 << 16);
1636c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  state0_ch2 = WebRtcSpl_AddSatW32(a0_ch2 <<1, (uint32_t)sample0_ch2 << 16);
1646c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//
1656c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  sample1_ch1 = data_ch1[n + 1];
1666c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  sample0_ch1 = (int16_t) (b0_ch1 >> 16); //Save as Q0
1676c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  sample1_ch2  = data_ch2[n + 1];
1686c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  sample0_ch2 = (int16_t) (b0_ch2 >> 16); //Save as Q0
1696c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//
1706c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//
1716c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  for (n = 0; n < length - 2; n += 2) {
172bc2bb34419021fb3be4f87eb062cf0a9ed52675abjornv@webrtc.org//    a1_ch1 = (factor_ch1[0] * sample1_ch1) << 1;
173bc2bb34419021fb3be4f87eb062cf0a9ed52675abjornv@webrtc.org//    a0_ch1 = (factor_ch1[1] * sample0_ch1) << 1;
174bc2bb34419021fb3be4f87eb062cf0a9ed52675abjornv@webrtc.org//    a1_ch2 = (factor_ch2[0] * sample1_ch2) << 1;
175bc2bb34419021fb3be4f87eb062cf0a9ed52675abjornv@webrtc.org//    a0_ch2 = (factor_ch2[1] * sample0_ch2) << 1;
1766c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//
1776c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//    b1_ch1 = WebRtcSpl_AddSatW32(a1_ch1, state0_ch1);
1786c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//    b0_ch1 = WebRtcSpl_AddSatW32(a0_ch1, state1_ch1); //Q16+Q16=Q16
1796c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//    b1_ch2 = WebRtcSpl_AddSatW32(a1_ch2, state0_ch2); //Q16+Q16=Q16
1806c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//    b0_ch2 = WebRtcSpl_AddSatW32(a0_ch2, state1_ch2); //Q16+Q16=Q16
1816c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//
182bc2bb34419021fb3be4f87eb062cf0a9ed52675abjornv@webrtc.org//    a1_ch1 = -factor_ch1[0] * (int16_t)(b1_ch1 >> 16);
183bc2bb34419021fb3be4f87eb062cf0a9ed52675abjornv@webrtc.org//    a0_ch1 = -factor_ch1[1] * (int16_t)(b0_ch1 >> 16);
184bc2bb34419021fb3be4f87eb062cf0a9ed52675abjornv@webrtc.org//    a1_ch2 = -factor_ch2[0] * (int16_t)(b1_ch2 >> 16);
185bc2bb34419021fb3be4f87eb062cf0a9ed52675abjornv@webrtc.org//    a0_ch2 = -factor_ch2[1] * (int16_t)(b0_ch2 >> 16);
1866c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//
1876c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//    state0_ch1 = WebRtcSpl_AddSatW32(a1_ch1<<1, (uint32_t)sample1_ch1 <<16);
1886c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//    state1_ch1 = WebRtcSpl_AddSatW32(a0_ch1<<1, (uint32_t)sample0_ch1 <<16);
1896c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//    state0_ch2 = WebRtcSpl_AddSatW32(a1_ch2<<1, (uint32_t)sample1_ch2 <<16);
1906c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//    state1_ch2 = WebRtcSpl_AddSatW32(a0_ch2<<1, (uint32_t)sample0_ch2 <<16);
1916c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//
1926c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//    sample0_ch1 = data_ch1[n + 2];
1936c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//    sample1_ch1 = (int16_t) (b1_ch1 >> 16); //Save as Q0
1946c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//    sample0_ch2 = data_ch2[n + 2];
1956c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//    sample1_ch2  = (int16_t) (b1_ch2 >> 16); //Save as Q0
1966c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//
197bc2bb34419021fb3be4f87eb062cf0a9ed52675abjornv@webrtc.org//    a0_ch1 = (factor_ch1[0] * sample0_ch1) << 1;
198bc2bb34419021fb3be4f87eb062cf0a9ed52675abjornv@webrtc.org//    a1_ch1 = (factor_ch1[1] * sample1_ch1) << 1;
199bc2bb34419021fb3be4f87eb062cf0a9ed52675abjornv@webrtc.org//    a0_ch2 = (factor_ch2[0] * sample0_ch2) << 1;
200bc2bb34419021fb3be4f87eb062cf0a9ed52675abjornv@webrtc.org//    a1_ch2 = (factor_ch2[1] * sample1_ch2) << 1;
2016c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//
2026c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//    b2_ch1 = WebRtcSpl_AddSatW32(a0_ch1, state0_ch1);
2036c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//    b1_ch1 = WebRtcSpl_AddSatW32(a1_ch1, state1_ch1); //Q16+Q16=Q16
2046c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//    b2_ch2 = WebRtcSpl_AddSatW32(a0_ch2, state0_ch2); //Q16+Q16=Q16
2056c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//    b1_ch2 = WebRtcSpl_AddSatW32(a1_ch2, state1_ch2); //Q16+Q16=Q16
2066c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//
207bc2bb34419021fb3be4f87eb062cf0a9ed52675abjornv@webrtc.org//    a0_ch1 = -factor_ch1[0] * (int16_t)(b2_ch1 >> 16);
208bc2bb34419021fb3be4f87eb062cf0a9ed52675abjornv@webrtc.org//    a1_ch1 = -factor_ch1[1] * (int16_t)(b1_ch1 >> 16);
209bc2bb34419021fb3be4f87eb062cf0a9ed52675abjornv@webrtc.org//    a0_ch2 = -factor_ch2[0] * (int16_t)(b2_ch2 >> 16);
210bc2bb34419021fb3be4f87eb062cf0a9ed52675abjornv@webrtc.org//    a1_ch2 = -factor_ch2[1] * (int16_t)(b1_ch2 >> 16);
2116c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//
2126c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//    state0_ch1 = WebRtcSpl_AddSatW32(a0_ch1<<1, (uint32_t)sample0_ch1<<16);
2136c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//    state1_ch1 = WebRtcSpl_AddSatW32(a1_ch1<<1, (uint32_t)sample1_ch1<<16);
2146c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//    state0_ch2 = WebRtcSpl_AddSatW32(a0_ch2<<1, (uint32_t)sample0_ch2<<16);
2156c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//    state1_ch2 = WebRtcSpl_AddSatW32(a1_ch2<<1, (uint32_t)sample1_ch2<<16);
2166c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//
2176c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//
2186c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//    sample1_ch1 = data_ch1[n + 3];
2196c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//    sample0_ch1 = (int16_t) (b2_ch1  >> 16); //Save as Q0
2206c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//    sample1_ch2 = data_ch2[n + 3];
2216c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//    sample0_ch2 = (int16_t) (b2_ch2 >> 16); //Save as Q0
2226c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//
2236c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//    data_ch1[n]     = (int16_t) (b0_ch1 >> 16); //Save as Q0
2246c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//    data_ch1[n + 1] = (int16_t) (b1_ch1 >> 16); //Save as Q0
2256c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//    data_ch2[n]     = (int16_t) (b0_ch2 >> 16);
2266c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//    data_ch2[n + 1] = (int16_t) (b1_ch2 >> 16);
2276c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  }
2286c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//
2296c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  // Loop unrolling post-processing.
2306c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//
231bc2bb34419021fb3be4f87eb062cf0a9ed52675abjornv@webrtc.org//  a1_ch1 = (factor_ch1[0] * sample1_ch1) << 1;
232bc2bb34419021fb3be4f87eb062cf0a9ed52675abjornv@webrtc.org//  a0_ch1 = (factor_ch1[1] * sample0_ch1) << 1;
233bc2bb34419021fb3be4f87eb062cf0a9ed52675abjornv@webrtc.org//  a1_ch2 = (factor_ch2[0] * sample1_ch2) << 1;
234bc2bb34419021fb3be4f87eb062cf0a9ed52675abjornv@webrtc.org//  a0_ch2 = (factor_ch2[1] * sample0_ch2) << 1;
2356c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//
2366c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  b1_ch1 = WebRtcSpl_AddSatW32(a1_ch1, state0_ch1);
2376c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  b0_ch1 = WebRtcSpl_AddSatW32(a0_ch1, state1_ch1);
2386c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  b1_ch2 = WebRtcSpl_AddSatW32(a1_ch2, state0_ch2);
2396c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  b0_ch2 = WebRtcSpl_AddSatW32(a0_ch2, state1_ch2);
2406c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//
241bc2bb34419021fb3be4f87eb062cf0a9ed52675abjornv@webrtc.org//  a1_ch1 = -factor_ch1[0] * (int16_t)(b1_ch1 >> 16);
242bc2bb34419021fb3be4f87eb062cf0a9ed52675abjornv@webrtc.org//  a0_ch1 = -factor_ch1[1] * (int16_t)(b0_ch1 >> 16);
243bc2bb34419021fb3be4f87eb062cf0a9ed52675abjornv@webrtc.org//  a1_ch2 = -factor_ch2[0] * (int16_t)(b1_ch2 >> 16);
244bc2bb34419021fb3be4f87eb062cf0a9ed52675abjornv@webrtc.org//  a0_ch2 = -factor_ch2[1] * (int16_t)(b0_ch2 >> 16);
2456c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//
2466c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  state0_ch1 = WebRtcSpl_AddSatW32(a1_ch1<<1, (uint32_t)sample1_ch1 << 16);
2476c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  state1_ch1 = WebRtcSpl_AddSatW32(a0_ch1<<1, (uint32_t)sample0_ch1 << 16);
2486c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  state0_ch2 = WebRtcSpl_AddSatW32(a1_ch2<<1, (uint32_t)sample1_ch2 << 16);
2496c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  state1_ch2 = WebRtcSpl_AddSatW32(a0_ch2<<1, (uint32_t)sample0_ch2 << 16);
2506c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//
2516c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  data_ch1[n] = (int16_t) (b0_ch1 >> 16); //Save as Q0
2526c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  data_ch2[n] = (int16_t) (b0_ch2 >> 16);
2536c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//
2546c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  sample1_ch1 = (int16_t) (b1_ch1 >> 16); //Save as Q0
2556c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  sample1_ch2  = (int16_t) (b1_ch2 >> 16); //Save as Q0
2566c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//
257bc2bb34419021fb3be4f87eb062cf0a9ed52675abjornv@webrtc.org//  a1_ch1 = (factor_ch1[1] * sample1_ch1) << 1;
258bc2bb34419021fb3be4f87eb062cf0a9ed52675abjornv@webrtc.org//  a1_ch2 = (factor_ch2[1] * sample1_ch2) << 1;
2596c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//
2606c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  b1_ch1 = WebRtcSpl_AddSatW32(a1_ch1, state1_ch1); //Q16+Q16=Q16
2616c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  b1_ch2 = WebRtcSpl_AddSatW32(a1_ch2, state1_ch2); //Q16+Q16=Q16
2626c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//
263bc2bb34419021fb3be4f87eb062cf0a9ed52675abjornv@webrtc.org//  a1_ch1 = -factor_ch1[1] * (int16_t)(b1_ch1 >> 16);
264bc2bb34419021fb3be4f87eb062cf0a9ed52675abjornv@webrtc.org//  a1_ch2 = -factor_ch2[1] * (int16_t)(b1_ch2 >> 16);
2656c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//
2666c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  state1_ch1 = WebRtcSpl_AddSatW32(a1_ch1<<1, (uint32_t)sample1_ch1<<16);
2676c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  state1_ch2 = WebRtcSpl_AddSatW32(a1_ch2<<1, (uint32_t)sample1_ch2<<16);
2686c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//
2696c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  data_ch1[n + 1] = (int16_t) (b1_ch1 >> 16); //Save as Q0
2706c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  data_ch2[n + 1] = (int16_t) (b1_ch2 >> 16);
2716c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//
2726c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  filter_state_ch1[0] = state0_ch1;
2736c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  filter_state_ch1[1] = state1_ch1;
2746c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  filter_state_ch2[0] = state0_ch2;
2756c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//  filter_state_ch2[1] = state1_ch2;
2766c3855258dd719f806808e3ea00a269fe30dc63bandrew@webrtc.org//}
277