1// REQUIRES: aarch64-registered-target
2// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-cpu cyclone \
3// RUN:   -ffp-contract=fast -S -O3 -o - %s | FileCheck %s
4
5// Test new aarch64 intrinsics and types
6
7#include <arm_neon.h>
8
9
10float32_t test_vmuls_lane_f32(float32_t a, float32x2_t b) {
11  // CHECK-LABEL: test_vmuls_lane_f32
12  return vmuls_lane_f32(a, b, 1);
13  // CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
14}
15
16float64_t test_vmuld_lane_f64(float64_t a, float64x1_t b) {
17  // CHECK-LABEL: test_vmuld_lane_f64
18  return vmuld_lane_f64(a, b, 0);
19  // CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+.d\[0\]|d[0-9]+}}
20}
21
22float32_t test_vmuls_laneq_f32(float32_t a, float32x4_t b) {
23  // CHECK-LABEL: test_vmuls_laneq_f32
24  return vmuls_laneq_f32(a, b, 3);
25  // CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
26}
27
28float64_t test_vmuld_laneq_f64(float64_t a, float64x2_t b) {
29  // CHECK-LABEL: test_vmuld_laneq_f64
30  return vmuld_laneq_f64(a, b, 1);
31  // CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
32}
33
34float64x1_t test_vmul_n_f64(float64x1_t a, float64_t b) {
35  // CHECK-LABEL: test_vmul_n_f64
36  return vmul_n_f64(a, b);
37  // CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+.d\[0\]|d[0-9]+}}
38}
39
40float32_t test_vmulxs_lane_f32(float32_t a, float32x2_t b) {
41// CHECK-LABEL: test_vmulxs_lane_f32
42  return vmulxs_lane_f32(a, b, 1);
43// CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
44}
45
46float32_t test_vmulxs_laneq_f32(float32_t a, float32x4_t b) {
47// CHECK-LABEL: test_vmulxs_laneq_f32
48  return vmulxs_laneq_f32(a, b, 3);
49// CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
50}
51
52float64_t test_vmulxd_lane_f64(float64_t a, float64x1_t b) {
53// CHECK-LABEL: test_vmulxd_lane_f64
54  return vmulxd_lane_f64(a, b, 0);
55// CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+.d\[0\]|d[0-9]+}}
56}
57
58float64_t test_vmulxd_laneq_f64(float64_t a, float64x2_t b) {
59// CHECK-LABEL: test_vmulxd_laneq_f64
60  return vmulxd_laneq_f64(a, b, 1);
61// CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
62}
63
64// CHECK-LABEL: test_vmulx_lane_f64
65float64x1_t test_vmulx_lane_f64(float64x1_t a, float64x1_t b) {
66  return vmulx_lane_f64(a, b, 0);
67  // CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+.d\[0\]|d[0-9]+}}
68}
69
70
71// CHECK-LABEL: test_vmulx_laneq_f64_0
72float64x1_t test_vmulx_laneq_f64_0(float64x1_t a, float64x2_t b) {
73  return vmulx_laneq_f64(a, b, 0);
74  // CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
75}
76
77// CHECK-LABEL: test_vmulx_laneq_f64_1
78float64x1_t test_vmulx_laneq_f64_1(float64x1_t a, float64x2_t b) {
79  return vmulx_laneq_f64(a, b, 1);
80  // CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
81}
82
83
84// CHECK-LABEL: test_vfmas_lane_f32
85float32_t test_vfmas_lane_f32(float32_t a, float32_t b, float32x2_t c) {
86  return vfmas_lane_f32(a, b, c, 1);
87  // CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
88}
89
90// CHECK-LABEL: test_vfmad_lane_f64
91float64_t test_vfmad_lane_f64(float64_t a, float64_t b, float64x1_t c) {
92  return vfmad_lane_f64(a, b, c, 0);
93  // CHECK: {{fmla|fmadd}} {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+.d\[0\]|d[0-9]+}}
94}
95
96// CHECK-LABEL: test_vfmad_laneq_f64
97float64_t test_vfmad_laneq_f64(float64_t a, float64_t b, float64x2_t c) {
98  return vfmad_laneq_f64(a, b, c, 1);
99  // CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
100}
101
102// CHECK-LABEL: test_vfmss_lane_f32
103float32_t test_vfmss_lane_f32(float32_t a, float32_t b, float32x2_t c) {
104  return vfmss_lane_f32(a, b, c, 1);
105  // CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
106}
107
108// CHECK-LABEL: test_vfma_lane_f64
109float64x1_t test_vfma_lane_f64(float64x1_t a, float64x1_t b, float64x1_t v) {
110  return vfma_lane_f64(a, b, v, 0);
111  // CHECK: {{fmla|fmadd}} {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+.d\[0\]|d[0-9]+}}
112}
113
114// CHECK-LABEL: test_vfms_lane_f64
115float64x1_t test_vfms_lane_f64(float64x1_t a, float64x1_t b, float64x1_t v) {
116  return vfms_lane_f64(a, b, v, 0);
117  // CHECK: {{fmls|fmsub}} {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+.d\[0\]|d[0-9]+}}
118}
119
120// CHECK-LABEL: test_vfma_laneq_f64
121float64x1_t test_vfma_laneq_f64(float64x1_t a, float64x1_t b, float64x2_t v) {
122  return vfma_laneq_f64(a, b, v, 0);
123  // CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
124}
125
126// CHECK-LABEL: test_vfms_laneq_f64
127float64x1_t test_vfms_laneq_f64(float64x1_t a, float64x1_t b, float64x2_t v) {
128  return vfms_laneq_f64(a, b, v, 0);
129  // CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
130}
131
132// CHECK-LABEL: test_vqdmullh_lane_s16
133int32_t test_vqdmullh_lane_s16(int16_t a, int16x4_t b) {
134  return vqdmullh_lane_s16(a, b, 3);
135  // CHECK: sqdmull {{s[0-9]+|v[0-9]+.4s}}, {{h[0-9]+|v[0-9].4h}}, {{v[0-9]+}}.h[3]
136}
137
138// CHECK-LABEL: test_vqdmulls_lane_s32
139int64_t test_vqdmulls_lane_s32(int32_t a, int32x2_t b) {
140  return vqdmulls_lane_s32(a, b, 1);
141  // CHECK: sqdmull {{d[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
142}
143
144// CHECK-LABEL: test_vqdmullh_laneq_s16
145int32_t test_vqdmullh_laneq_s16(int16_t a, int16x8_t b) {
146  return vqdmullh_laneq_s16(a, b, 7);
147  // CHECK: sqdmull {{s[0-9]+|v[0-9]+.4s}}, {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[7]
148}
149
150// CHECK-LABEL: test_vqdmulls_laneq_s32
151int64_t test_vqdmulls_laneq_s32(int32_t a, int32x4_t b) {
152  return vqdmulls_laneq_s32(a, b, 3);
153  // CHECK: sqdmull {{d[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
154}
155
156// CHECK-LABEL: test_vqdmulhh_lane_s16
157int16_t test_vqdmulhh_lane_s16(int16_t a, int16x4_t b) {
158  return vqdmulhh_lane_s16(a, b, 3);
159// CHECK: sqdmulh {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[3]
160}
161
162// CHECK-LABEL: test_vqdmulhs_lane_s32
163int32_t test_vqdmulhs_lane_s32(int32_t a, int32x2_t b) {
164  return vqdmulhs_lane_s32(a, b, 1);
165// CHECK: sqdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
166}
167
168
169// CHECK-LABEL: test_vqdmulhh_laneq_s16
170int16_t test_vqdmulhh_laneq_s16(int16_t a, int16x8_t b) {
171  return vqdmulhh_laneq_s16(a, b, 7);
172// CHECK: sqdmulh {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[7]
173}
174
175
176// CHECK-LABEL: test_vqdmulhs_laneq_s32
177int32_t test_vqdmulhs_laneq_s32(int32_t a, int32x4_t b) {
178  return vqdmulhs_laneq_s32(a, b, 3);
179// CHECK: sqdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
180}
181
182// CHECK-LABEL: test_vqrdmulhh_lane_s16
183int16_t test_vqrdmulhh_lane_s16(int16_t a, int16x4_t b) {
184  return vqrdmulhh_lane_s16(a, b, 3);
185// CHECK: sqrdmulh {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[3]
186}
187
188// CHECK-LABEL: test_vqrdmulhs_lane_s32
189int32_t test_vqrdmulhs_lane_s32(int32_t a, int32x2_t b) {
190  return vqrdmulhs_lane_s32(a, b, 1);
191// CHECK: sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
192}
193
194
195// CHECK-LABEL: test_vqrdmulhh_laneq_s16
196int16_t test_vqrdmulhh_laneq_s16(int16_t a, int16x8_t b) {
197  return vqrdmulhh_laneq_s16(a, b, 7);
198// CHECK: sqrdmulh {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[7]
199}
200
201
202// CHECK-LABEL: test_vqrdmulhs_laneq_s32
203int32_t test_vqrdmulhs_laneq_s32(int32_t a, int32x4_t b) {
204  return vqrdmulhs_laneq_s32(a, b, 3);
205// CHECK: sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
206}
207
208// CHECK-LABEL: test_vqdmlalh_lane_s16
209int32_t test_vqdmlalh_lane_s16(int32_t a, int16_t b, int16x4_t c) {
210  return vqdmlalh_lane_s16(a, b, c, 3);
211// CHECK: sqdmlal {{s[0-9]+|v[0-9]+.4s}}, {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[3]
212}
213
214// CHECK-LABEL: test_vqdmlals_lane_s32
215int64_t test_vqdmlals_lane_s32(int64_t a, int32_t b, int32x2_t c) {
216  return vqdmlals_lane_s32(a, b, c, 1);
217// CHECK: sqdmlal {{d[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
218}
219
220// CHECK-LABEL: test_vqdmlalh_laneq_s16
221int32_t test_vqdmlalh_laneq_s16(int32_t a, int16_t b, int16x8_t c) {
222  return vqdmlalh_laneq_s16(a, b, c, 7);
223// CHECK: sqdmlal {{s[0-9]+|v[0-9]+.4s}}, {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[7]
224}
225
226// CHECK-LABEL: test_vqdmlals_laneq_s32
227int64_t test_vqdmlals_laneq_s32(int64_t a, int32_t b, int32x4_t c) {
228  return vqdmlals_laneq_s32(a, b, c, 3);
229// CHECK: sqdmlal {{d[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
230}
231
232// CHECK-LABEL: test_vqdmlslh_lane_s16
233int32_t test_vqdmlslh_lane_s16(int32_t a, int16_t b, int16x4_t c) {
234  return vqdmlslh_lane_s16(a, b, c, 3);
235// CHECK: sqdmlsl {{s[0-9]+|v[0-9]+.4s}}, {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[3]
236}
237
238// CHECK-LABEL: test_vqdmlsls_lane_s32
239int64_t test_vqdmlsls_lane_s32(int64_t a, int32_t b, int32x2_t c) {
240  return vqdmlsls_lane_s32(a, b, c, 1);
241// CHECK: sqdmlsl {{d[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
242}
243
244// CHECK-LABEL: test_vqdmlslh_laneq_s16
245int32_t test_vqdmlslh_laneq_s16(int32_t a, int16_t b, int16x8_t c) {
246  return vqdmlslh_laneq_s16(a, b, c, 7);
247// CHECK: sqdmlsl {{s[0-9]+|v[0-9]+.4s}}, {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[7]
248}
249
250// CHECK-LABEL: test_vqdmlsls_laneq_s32
251int64_t test_vqdmlsls_laneq_s32(int64_t a, int32_t b, int32x4_t c) {
252  return vqdmlsls_laneq_s32(a, b, c, 3);
253// CHECK: sqdmlsl {{d[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
254}
255
256// CHECK-LABEL: test_vmulx_lane_f64_0:
257float64x1_t test_vmulx_lane_f64_0() {
258      float64x1_t arg1;
259      float64x1_t arg2;
260      float64x1_t result;
261      float64_t sarg1, sarg2, sres;
262      arg1 = vcreate_f64(UINT64_C(0x3fd6304bc43ab5c2));
263      arg2 = vcreate_f64(UINT64_C(0x3fee211e215aeef3));
264      result = vmulx_lane_f64(arg1, arg2, 0);
265// CHECK: adrp x[[ADDRLO:[0-9]+]]
266// CHECK: ldr d0, [x[[ADDRLO]],
267// CHECK: adrp x[[ADDRLO:[0-9]+]]
268// CHECK: ldr d1, [x[[ADDRLO]],
269// CHECK: fmulx d0, d1, d0
270      return result;
271}
272
273// CHECK-LABEL: test_vmulx_laneq_f64_2:
274float64x1_t test_vmulx_laneq_f64_2() {
275      float64x1_t arg1;
276      float64x1_t arg2;
277      float64x2_t arg3;
278      float64x1_t result;
279      float64_t sarg1, sarg2, sres;
280      arg1 = vcreate_f64(UINT64_C(0x3fd6304bc43ab5c2));
281      arg2 = vcreate_f64(UINT64_C(0x3fee211e215aeef3));
282      arg3 = vcombine_f64(arg1, arg2);
283      result = vmulx_laneq_f64(arg1, arg3, 1);
284// CHECK: adrp x[[ADDRLO:[0-9]+]]
285// CHECK: ldr d0, [x[[ADDRLO]],
286// CHECK: adrp x[[ADDRLO:[0-9]+]]
287// CHECK: ldr d1, [x[[ADDRLO]],
288// CHECK: fmulx d0, d1, d0
289      return result;
290}
291