1// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
2
3// Test new aarch64 intrinsics and types
4
5#include <arm_neon.h>
6
7// CHECK-LABEL: define <2 x float> @test_vmla_n_f32(<2 x float> %a, <2 x float> %b, float %c) #0 {
8// CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %c, i32 0
9// CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %c, i32 1
10// CHECK:   [[MUL_I:%.*]] = fmul <2 x float> %b, [[VECINIT1_I]]
11// CHECK:   [[ADD_I:%.*]] = fadd <2 x float> %a, [[MUL_I]]
12// CHECK:   ret <2 x float> [[ADD_I]]
13float32x2_t test_vmla_n_f32(float32x2_t a, float32x2_t b, float32_t c) {
14  return vmla_n_f32(a, b, c);
15}
16
17// CHECK-LABEL: define <4 x float> @test_vmlaq_n_f32(<4 x float> %a, <4 x float> %b, float %c) #0 {
18// CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %c, i32 0
19// CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %c, i32 1
20// CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %c, i32 2
21// CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %c, i32 3
22// CHECK:   [[MUL_I:%.*]] = fmul <4 x float> %b, [[VECINIT3_I]]
23// CHECK:   [[ADD_I:%.*]] = fadd <4 x float> %a, [[MUL_I]]
24// CHECK:   ret <4 x float> [[ADD_I]]
25float32x4_t test_vmlaq_n_f32(float32x4_t a, float32x4_t b, float32_t c) {
26  return vmlaq_n_f32(a, b, c);
27}
28
29// CHECK-LABEL: define <2 x double> @test_vmlaq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #0 {
30// CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0
31// CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1
32// CHECK:   [[MUL_I:%.*]] = fmul <2 x double> %b, [[VECINIT1_I]]
33// CHECK:   [[ADD_I:%.*]] = fadd <2 x double> %a, [[MUL_I]]
34// CHECK:   ret <2 x double> [[ADD_I]]
35float64x2_t test_vmlaq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
36  return vmlaq_n_f64(a, b, c);
37}
38
39// CHECK-LABEL: define <4 x float> @test_vmlsq_n_f32(<4 x float> %a, <4 x float> %b, float %c) #0 {
40// CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %c, i32 0
41// CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %c, i32 1
42// CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %c, i32 2
43// CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %c, i32 3
44// CHECK:   [[MUL_I:%.*]] = fmul <4 x float> %b, [[VECINIT3_I]]
45// CHECK:   [[SUB_I:%.*]] = fsub <4 x float> %a, [[MUL_I]]
46// CHECK:   ret <4 x float> [[SUB_I]]
47float32x4_t test_vmlsq_n_f32(float32x4_t a, float32x4_t b, float32_t c) {
48  return vmlsq_n_f32(a, b, c);
49}
50
51// CHECK-LABEL: define <2 x float> @test_vmls_n_f32(<2 x float> %a, <2 x float> %b, float %c) #0 {
52// CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %c, i32 0
53// CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %c, i32 1
54// CHECK:   [[MUL_I:%.*]] = fmul <2 x float> %b, [[VECINIT1_I]]
55// CHECK:   [[SUB_I:%.*]] = fsub <2 x float> %a, [[MUL_I]]
56// CHECK:   ret <2 x float> [[SUB_I]]
57float32x2_t test_vmls_n_f32(float32x2_t a, float32x2_t b, float32_t c) {
58  return vmls_n_f32(a, b, c);
59}
60
61// CHECK-LABEL: define <2 x double> @test_vmlsq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #0 {
62// CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0
63// CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1
64// CHECK:   [[MUL_I:%.*]] = fmul <2 x double> %b, [[VECINIT1_I]]
65// CHECK:   [[SUB_I:%.*]] = fsub <2 x double> %a, [[MUL_I]]
66// CHECK:   ret <2 x double> [[SUB_I]]
67float64x2_t test_vmlsq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
68  return vmlsq_n_f64(a, b, c);
69}
70
71// CHECK-LABEL: define <2 x float> @test_vmla_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 {
72// CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> zeroinitializer
73// CHECK:   [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
74// CHECK:   [[ADD:%.*]] = fadd <2 x float> %a, [[MUL]]
75// CHECK:   ret <2 x float> [[ADD]]
76float32x2_t test_vmla_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) {
77  return vmla_lane_f32(a, b, v, 0);
78}
79
80// CHECK-LABEL: define <4 x float> @test_vmlaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 {
81// CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> zeroinitializer
82// CHECK:   [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
83// CHECK:   [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]]
84// CHECK:   ret <4 x float> [[ADD]]
85float32x4_t test_vmlaq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) {
86  return vmlaq_lane_f32(a, b, v, 0);
87}
88
89// CHECK-LABEL: define <2 x float> @test_vmla_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 {
90// CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> zeroinitializer
91// CHECK:   [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
92// CHECK:   [[ADD:%.*]] = fadd <2 x float> %a, [[MUL]]
93// CHECK:   ret <2 x float> [[ADD]]
94float32x2_t test_vmla_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) {
95  return vmla_laneq_f32(a, b, v, 0);
96}
97
98// CHECK-LABEL: define <4 x float> @test_vmlaq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 {
99// CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> zeroinitializer
100// CHECK:   [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
101// CHECK:   [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]]
102// CHECK:   ret <4 x float> [[ADD]]
103float32x4_t test_vmlaq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) {
104  return vmlaq_laneq_f32(a, b, v, 0);
105}
106
107// CHECK-LABEL: define <2 x float> @test_vmls_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 {
108// CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> zeroinitializer
109// CHECK:   [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
110// CHECK:   [[SUB:%.*]] = fsub <2 x float> %a, [[MUL]]
111// CHECK:   ret <2 x float> [[SUB]]
112float32x2_t test_vmls_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) {
113  return vmls_lane_f32(a, b, v, 0);
114}
115
116// CHECK-LABEL: define <4 x float> @test_vmlsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 {
117// CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> zeroinitializer
118// CHECK:   [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
119// CHECK:   [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]]
120// CHECK:   ret <4 x float> [[SUB]]
121float32x4_t test_vmlsq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) {
122  return vmlsq_lane_f32(a, b, v, 0);
123}
124
125// CHECK-LABEL: define <2 x float> @test_vmls_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 {
126// CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> zeroinitializer
127// CHECK:   [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
128// CHECK:   [[SUB:%.*]] = fsub <2 x float> %a, [[MUL]]
129// CHECK:   ret <2 x float> [[SUB]]
130float32x2_t test_vmls_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) {
131  return vmls_laneq_f32(a, b, v, 0);
132}
133
134// CHECK-LABEL: define <4 x float> @test_vmlsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 {
135// CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> zeroinitializer
136// CHECK:   [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
137// CHECK:   [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]]
138// CHECK:   ret <4 x float> [[SUB]]
139float32x4_t test_vmlsq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) {
140  return vmlsq_laneq_f32(a, b, v, 0);
141}
142
143// CHECK-LABEL: define <2 x float> @test_vmla_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 {
144// CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> <i32 1, i32 1>
145// CHECK:   [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
146// CHECK:   [[ADD:%.*]] = fadd <2 x float> %a, [[MUL]]
147// CHECK:   ret <2 x float> [[ADD]]
148float32x2_t test_vmla_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) {
149  return vmla_lane_f32(a, b, v, 1);
150}
151
152// CHECK-LABEL: define <4 x float> @test_vmlaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 {
153// CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
154// CHECK:   [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
155// CHECK:   [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]]
156// CHECK:   ret <4 x float> [[ADD]]
157float32x4_t test_vmlaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) {
158  return vmlaq_lane_f32(a, b, v, 1);
159}
160
161// CHECK-LABEL: define <2 x float> @test_vmla_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 {
162// CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> <i32 3, i32 3>
163// CHECK:   [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
164// CHECK:   [[ADD:%.*]] = fadd <2 x float> %a, [[MUL]]
165// CHECK:   ret <2 x float> [[ADD]]
166float32x2_t test_vmla_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) {
167  return vmla_laneq_f32(a, b, v, 3);
168}
169
170// CHECK-LABEL: define <4 x float> @test_vmlaq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 {
171// CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
172// CHECK:   [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
173// CHECK:   [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]]
174// CHECK:   ret <4 x float> [[ADD]]
175float32x4_t test_vmlaq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) {
176  return vmlaq_laneq_f32(a, b, v, 3);
177}
178
179// CHECK-LABEL: define <2 x float> @test_vmls_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 {
180// CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> <i32 1, i32 1>
181// CHECK:   [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
182// CHECK:   [[SUB:%.*]] = fsub <2 x float> %a, [[MUL]]
183// CHECK:   ret <2 x float> [[SUB]]
184float32x2_t test_vmls_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) {
185  return vmls_lane_f32(a, b, v, 1);
186}
187
188// CHECK-LABEL: define <4 x float> @test_vmlsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 {
189// CHECK:   [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
190// CHECK:   [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
191// CHECK:   [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]]
192// CHECK:   ret <4 x float> [[SUB]]
193float32x4_t test_vmlsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) {
194  return vmlsq_lane_f32(a, b, v, 1);
195}
196// CHECK-LABEL: define <2 x float> @test_vmls_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 {
197// CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> <i32 3, i32 3>
198// CHECK:   [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
199// CHECK:   [[SUB:%.*]] = fsub <2 x float> %a, [[MUL]]
200// CHECK:   ret <2 x float> [[SUB]]
201float32x2_t test_vmls_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) {
202  return vmls_laneq_f32(a, b, v, 3);
203}
204
205// CHECK-LABEL: define <4 x float> @test_vmlsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 {
206// CHECK:   [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
207// CHECK:   [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
208// CHECK:   [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]]
209// CHECK:   ret <4 x float> [[SUB]]
210float32x4_t test_vmlsq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) {
211  return vmlsq_laneq_f32(a, b, v, 3);
212}
213
214// CHECK-LABEL: define <2 x double> @test_vfmaq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #0 {
215// CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0
216// CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1
217// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
218// CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
219// CHECK:   [[TMP2:%.*]] = bitcast <2 x double> [[VECINIT1_I]] to <16 x i8>
220// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
221// CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
222// CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
223// CHECK:   [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP4]], <2 x double> [[TMP5]], <2 x double> [[TMP3]]) #2
224// CHECK:   ret <2 x double> [[TMP6]]
225float64x2_t test_vfmaq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
226  return vfmaq_n_f64(a, b, c);
227}
228
229// CHECK-LABEL: define <2 x double> @test_vfmsq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #0 {
230// CHECK:   [[SUB_I:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %b
231// CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0
232// CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1
233// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
234// CHECK:   [[TMP1:%.*]] = bitcast <2 x double> [[SUB_I]] to <16 x i8>
235// CHECK:   [[TMP2:%.*]] = bitcast <2 x double> [[VECINIT1_I]] to <16 x i8>
236// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
237// CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
238// CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
239// CHECK:   [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP4]], <2 x double> [[TMP5]], <2 x double> [[TMP3]]) #2
240// CHECK:   ret <2 x double> [[TMP6]]
241float64x2_t test_vfmsq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
242  return vfmsq_n_f64(a, b, c);
243}
244