1da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com/*
2da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com *
4da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com *  Use of this source code is governed by a BSD-style license
5da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com *  that can be found in the LICENSE file in the root of the source
6da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com *  tree. An additional intellectual property rights grant can be found
7da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com *  in the file PATENTS.  All contributing project authors may
8da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com *  be found in the AUTHORS file in the root of the source tree.
9da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com *
10da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com */
11da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com
12da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com#include "dl/api/omxtypes.h"
13da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com
14da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.comvoid x86SP_FFT_CToC_FC32_Fwd_Radix4_ms(
15da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    const OMX_F32 *in,
16da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    OMX_F32 *out,
17da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    const OMX_F32 *twiddle,
18da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    OMX_INT n,
19da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    OMX_INT sub_size,
20da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    OMX_INT sub_num) {
21da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com  OMX_INT set;
22da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com  OMX_INT grp;
23da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com  OMX_INT step = sub_num >> 1;
24da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com  OMX_INT set_count = sub_num >> 2;
25da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com  OMX_INT n_by_4 = n >> 2;
26da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com  OMX_INT n_mul_2 = n << 1;
27da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com  OMX_F32 *out0 = out;
28da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com
29da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com  // grp == 0
30da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com  for (set = 0; set < set_count; ++set) {
31da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    OMX_FC32 t0;
32da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    OMX_FC32 t1;
33da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    OMX_FC32 t2;
34da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    OMX_FC32 t3;
35da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com
36da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    const OMX_F32 *in0 = in + set;
37da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    const OMX_F32 *in1 = in0 + set_count;
38da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    const OMX_F32 *in2 = in1 + set_count;
39da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    const OMX_F32 *in3 = in2 + set_count;
40da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    OMX_F32 *out1 = out0 + n_by_4;
41da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    OMX_F32 *out2 = out1 + n_by_4;
42da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    OMX_F32 *out3 = out2 + n_by_4;
43da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com
44da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    // CADD t0, in0, in2
45da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    t0.Re = in0[0] + in2[0];
46da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    t0.Im = in0[n] + in2[n];
47da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com
48da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    // CSUB t1, in0, in2
49da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    t1.Re = in0[0] - in2[0];
50da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    t1.Im = in0[n] - in2[n];
51da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com
52da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    // CADD t2, in1, in3
53da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    t2.Re = in1[0] + in3[0];
54da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    t2.Im = in1[n] + in3[n];
55da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com
56da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    // CSUB t3, in1, in3
57da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    t3.Re = in1[0] - in3[0];
58da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    t3.Im = in1[n] - in3[n];
59da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com
60da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    // CADD out0, t0, t2
61da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    out0[0] = t0.Re + t2.Re;
62da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    out0[n] = t0.Im + t2.Im;
63da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com
64da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    // CSUB out2, t0, t2
65da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    out2[0] = t0.Re - t2.Re;
66da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    out2[n] = t0.Im - t2.Im;
67da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com
68da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    // CSUB_ADD_X out3, t1, t3
69da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    out3[0] = t1.Re - t3.Im;
70da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    out3[n] = t1.Im + t3.Re;
71da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com
72da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    // CADD_SUB_X out1, t1, t3
73da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    out1[0] = t1.Re + t3.Im;
74da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    out1[n] = t1.Im - t3.Re;
75da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com
76da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    out0 += 1;
77da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com  }
78da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com
79da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com  // grp > 0
80da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com  for (grp = 1; grp < sub_size; ++grp) {
81da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    const OMX_F32 *tw1 = twiddle + grp * step;
82da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    const OMX_F32 *tw2 = tw1 + grp * step;
83da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    const OMX_F32 *tw3 = tw2 + grp * step;
84da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com
85da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    for (set = 0; set < set_count; ++set) {
86da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      OMX_FC32 t0;
87da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      OMX_FC32 t1;
88da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      OMX_FC32 t2;
89da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      OMX_FC32 t3;
90da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      OMX_FC32 tt1;
91da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      OMX_FC32 tt2;
92da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      OMX_FC32 tt3;
93da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com
94da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      const OMX_F32 *in0 = in + set + grp * sub_num;
95da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      const OMX_F32 *in1 = in0 + set_count;
96da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      const OMX_F32 *in2 = in1 + set_count;
97da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      const OMX_F32 *in3 = in2 + set_count;
98da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      OMX_F32 *out1 = out0 + n_by_4;
99da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      OMX_F32 *out2 = out1 + n_by_4;
100da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      OMX_F32 *out3 = out2 + n_by_4;
101da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com
102da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      // CMUL tt1, Tw1, in1
103da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      tt1.Re = tw1[0] * in1[0] - tw1[n_mul_2] * in1[n];
104da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      tt1.Im = tw1[0] * in1[n] + tw1[n_mul_2] * in1[0];
105da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com
106da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      // CMUL tt2, Tw2, in2
107da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      tt2.Re = tw2[0] * in2[0] - tw2[n_mul_2] * in2[n];
108da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      tt2.Im = tw2[0] * in2[n] + tw2[n_mul_2] * in2[0];
109da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com
110da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      // CMUL tt3, Tw3, in3
111da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      tt3.Re = tw3[0] * in3[0] - tw3[n_mul_2] * in3[n];
112da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      tt3.Im = tw3[0] * in3[n] + tw3[n_mul_2] * in3[0];
113da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com
114da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      // CADD t0, in0, tt2
115da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      t0.Re = in0[0] + tt2.Re;
116da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      t0.Im = in0[n] + tt2.Im;
117da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com
118da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      // CSUB t1, in0, tt2
119da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      t1.Re = in0[0] - tt2.Re;
120da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      t1.Im = in0[n] - tt2.Im;
121da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com
122da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      // CADD t2, tt1, tt3
123da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      t2.Re = tt1.Re + tt3.Re;
124da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      t2.Im = tt1.Im + tt3.Im;
125da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com
126da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      // CSUB t3, tt1, tt3
127da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      t3.Re = tt1.Re - tt3.Re;
128da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      t3.Im = tt1.Im - tt3.Im;
129da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com
130da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      // CADD out0, t0, t2
131da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      out0[0] = t0.Re + t2.Re;
132da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      out0[n] = t0.Im + t2.Im;
133da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com
134da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      // CSUB out2, t0, t2
135da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      out2[0] = t0.Re - t2.Re;
136da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      out2[n] = t0.Im - t2.Im;
137da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com
138da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      // CADD_SUB_X out1, t1, t3
139da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      out1[0] = t1.Re + t3.Im;
140da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      out1[n] = t1.Im - t3.Re;
141da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com
142da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      // CSUB_ADD_X out3, t1, t3
143da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      out3[0] = t1.Re - t3.Im;
144da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      out3[n] = t1.Im + t3.Re;
145da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com
146da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com      out0 += 1;
147da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com    }
148da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com  }
149da04d4f8ef493ab7bf1fbdaffe206899f03681c2rtoy@google.com}
150