1// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
2// RUN:     -fallow-half-arguments-and-returns -ffp-contract=fast -S -emit-llvm -o - %s \
3// RUN: | opt -S -mem2reg \
4// RUN: | FileCheck %s
5
6// Test new aarch64 intrinsics and types
7
8#include <arm_neon.h>
9
10// CHECK-LABEL: define <8 x i8> @test_vadd_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
11// CHECK:   [[ADD_I:%.*]] = add <8 x i8> %v1, %v2
12// CHECK:   ret <8 x i8> [[ADD_I]]
13int8x8_t test_vadd_s8(int8x8_t v1, int8x8_t v2) {
14  return vadd_s8(v1, v2);
15}
16
17// CHECK-LABEL: define <4 x i16> @test_vadd_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
18// CHECK:   [[ADD_I:%.*]] = add <4 x i16> %v1, %v2
19// CHECK:   ret <4 x i16> [[ADD_I]]
20int16x4_t test_vadd_s16(int16x4_t v1, int16x4_t v2) {
21  return vadd_s16(v1, v2);
22}
23
24// CHECK-LABEL: define <2 x i32> @test_vadd_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
25// CHECK:   [[ADD_I:%.*]] = add <2 x i32> %v1, %v2
26// CHECK:   ret <2 x i32> [[ADD_I]]
27int32x2_t test_vadd_s32(int32x2_t v1, int32x2_t v2) {
28  return vadd_s32(v1, v2);
29}
30
31// CHECK-LABEL: define <1 x i64> @test_vadd_s64(<1 x i64> %v1, <1 x i64> %v2) #0 {
32// CHECK:   [[ADD_I:%.*]] = add <1 x i64> %v1, %v2
33// CHECK:   ret <1 x i64> [[ADD_I]]
34int64x1_t test_vadd_s64(int64x1_t v1, int64x1_t v2) {
35  return vadd_s64(v1, v2);
36}
37
38// CHECK-LABEL: define <2 x float> @test_vadd_f32(<2 x float> %v1, <2 x float> %v2) #0 {
39// CHECK:   [[ADD_I:%.*]] = fadd <2 x float> %v1, %v2
40// CHECK:   ret <2 x float> [[ADD_I]]
41float32x2_t test_vadd_f32(float32x2_t v1, float32x2_t v2) {
42  return vadd_f32(v1, v2);
43}
44
45// CHECK-LABEL: define <8 x i8> @test_vadd_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
46// CHECK:   [[ADD_I:%.*]] = add <8 x i8> %v1, %v2
47// CHECK:   ret <8 x i8> [[ADD_I]]
48uint8x8_t test_vadd_u8(uint8x8_t v1, uint8x8_t v2) {
49  return vadd_u8(v1, v2);
50}
51
52// CHECK-LABEL: define <4 x i16> @test_vadd_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
53// CHECK:   [[ADD_I:%.*]] = add <4 x i16> %v1, %v2
54// CHECK:   ret <4 x i16> [[ADD_I]]
55uint16x4_t test_vadd_u16(uint16x4_t v1, uint16x4_t v2) {
56  return vadd_u16(v1, v2);
57}
58
59// CHECK-LABEL: define <2 x i32> @test_vadd_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
60// CHECK:   [[ADD_I:%.*]] = add <2 x i32> %v1, %v2
61// CHECK:   ret <2 x i32> [[ADD_I]]
62uint32x2_t test_vadd_u32(uint32x2_t v1, uint32x2_t v2) {
63  return vadd_u32(v1, v2);
64}
65
66// CHECK-LABEL: define <1 x i64> @test_vadd_u64(<1 x i64> %v1, <1 x i64> %v2) #0 {
67// CHECK:   [[ADD_I:%.*]] = add <1 x i64> %v1, %v2
68// CHECK:   ret <1 x i64> [[ADD_I]]
69uint64x1_t test_vadd_u64(uint64x1_t v1, uint64x1_t v2) {
70  return vadd_u64(v1, v2);
71}
72
73// CHECK-LABEL: define <16 x i8> @test_vaddq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
74// CHECK:   [[ADD_I:%.*]] = add <16 x i8> %v1, %v2
75// CHECK:   ret <16 x i8> [[ADD_I]]
76int8x16_t test_vaddq_s8(int8x16_t v1, int8x16_t v2) {
77  return vaddq_s8(v1, v2);
78}
79
80// CHECK-LABEL: define <8 x i16> @test_vaddq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
81// CHECK:   [[ADD_I:%.*]] = add <8 x i16> %v1, %v2
82// CHECK:   ret <8 x i16> [[ADD_I]]
83int16x8_t test_vaddq_s16(int16x8_t v1, int16x8_t v2) {
84  return vaddq_s16(v1, v2);
85}
86
87// CHECK-LABEL: define <4 x i32> @test_vaddq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
88// CHECK:   [[ADD_I:%.*]] = add <4 x i32> %v1, %v2
89// CHECK:   ret <4 x i32> [[ADD_I]]
90int32x4_t test_vaddq_s32(int32x4_t v1,int32x4_t  v2) {
91  return vaddq_s32(v1, v2);
92}
93
94// CHECK-LABEL: define <2 x i64> @test_vaddq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 {
95// CHECK:   [[ADD_I:%.*]] = add <2 x i64> %v1, %v2
96// CHECK:   ret <2 x i64> [[ADD_I]]
97int64x2_t test_vaddq_s64(int64x2_t v1, int64x2_t v2) {
98  return vaddq_s64(v1, v2);
99}
100
101// CHECK-LABEL: define <4 x float> @test_vaddq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
102// CHECK:   [[ADD_I:%.*]] = fadd <4 x float> %v1, %v2
103// CHECK:   ret <4 x float> [[ADD_I]]
104float32x4_t test_vaddq_f32(float32x4_t v1, float32x4_t v2) {
105  return vaddq_f32(v1, v2);
106}
107
108// CHECK-LABEL: define <2 x double> @test_vaddq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
109// CHECK:   [[ADD_I:%.*]] = fadd <2 x double> %v1, %v2
110// CHECK:   ret <2 x double> [[ADD_I]]
111float64x2_t test_vaddq_f64(float64x2_t v1, float64x2_t v2) {
112  return vaddq_f64(v1, v2);
113}
114
115// CHECK-LABEL: define <16 x i8> @test_vaddq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
116// CHECK:   [[ADD_I:%.*]] = add <16 x i8> %v1, %v2
117// CHECK:   ret <16 x i8> [[ADD_I]]
118uint8x16_t test_vaddq_u8(uint8x16_t v1, uint8x16_t v2) {
119  return vaddq_u8(v1, v2);
120}
121
122// CHECK-LABEL: define <8 x i16> @test_vaddq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
123// CHECK:   [[ADD_I:%.*]] = add <8 x i16> %v1, %v2
124// CHECK:   ret <8 x i16> [[ADD_I]]
125uint16x8_t test_vaddq_u16(uint16x8_t v1, uint16x8_t v2) {
126  return vaddq_u16(v1, v2);
127}
128
129// CHECK-LABEL: define <4 x i32> @test_vaddq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
130// CHECK:   [[ADD_I:%.*]] = add <4 x i32> %v1, %v2
131// CHECK:   ret <4 x i32> [[ADD_I]]
132uint32x4_t test_vaddq_u32(uint32x4_t v1, uint32x4_t v2) {
133  return vaddq_u32(v1, v2);
134}
135
136// CHECK-LABEL: define <2 x i64> @test_vaddq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 {
137// CHECK:   [[ADD_I:%.*]] = add <2 x i64> %v1, %v2
138// CHECK:   ret <2 x i64> [[ADD_I]]
139uint64x2_t test_vaddq_u64(uint64x2_t v1, uint64x2_t v2) {
140  return vaddq_u64(v1, v2);
141}
142
143// CHECK-LABEL: define <8 x i8> @test_vsub_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
144// CHECK:   [[SUB_I:%.*]] = sub <8 x i8> %v1, %v2
145// CHECK:   ret <8 x i8> [[SUB_I]]
146int8x8_t test_vsub_s8(int8x8_t v1, int8x8_t v2) {
147  return vsub_s8(v1, v2);
148}
149// CHECK-LABEL: define <4 x i16> @test_vsub_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
150// CHECK:   [[SUB_I:%.*]] = sub <4 x i16> %v1, %v2
151// CHECK:   ret <4 x i16> [[SUB_I]]
152int16x4_t test_vsub_s16(int16x4_t v1, int16x4_t v2) {
153  return vsub_s16(v1, v2);
154}
155// CHECK-LABEL: define <2 x i32> @test_vsub_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
156// CHECK:   [[SUB_I:%.*]] = sub <2 x i32> %v1, %v2
157// CHECK:   ret <2 x i32> [[SUB_I]]
158int32x2_t test_vsub_s32(int32x2_t v1, int32x2_t v2) {
159  return vsub_s32(v1, v2);
160}
161
162// CHECK-LABEL: define <1 x i64> @test_vsub_s64(<1 x i64> %v1, <1 x i64> %v2) #0 {
163// CHECK:   [[SUB_I:%.*]] = sub <1 x i64> %v1, %v2
164// CHECK:   ret <1 x i64> [[SUB_I]]
165int64x1_t test_vsub_s64(int64x1_t v1, int64x1_t v2) {
166  return vsub_s64(v1, v2);
167}
168
169// CHECK-LABEL: define <2 x float> @test_vsub_f32(<2 x float> %v1, <2 x float> %v2) #0 {
170// CHECK:   [[SUB_I:%.*]] = fsub <2 x float> %v1, %v2
171// CHECK:   ret <2 x float> [[SUB_I]]
172float32x2_t test_vsub_f32(float32x2_t v1, float32x2_t v2) {
173  return vsub_f32(v1, v2);
174}
175
176// CHECK-LABEL: define <8 x i8> @test_vsub_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
177// CHECK:   [[SUB_I:%.*]] = sub <8 x i8> %v1, %v2
178// CHECK:   ret <8 x i8> [[SUB_I]]
179uint8x8_t test_vsub_u8(uint8x8_t v1, uint8x8_t v2) {
180  return vsub_u8(v1, v2);
181}
182
183// CHECK-LABEL: define <4 x i16> @test_vsub_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
184// CHECK:   [[SUB_I:%.*]] = sub <4 x i16> %v1, %v2
185// CHECK:   ret <4 x i16> [[SUB_I]]
186uint16x4_t test_vsub_u16(uint16x4_t v1, uint16x4_t v2) {
187  return vsub_u16(v1, v2);
188}
189
190// CHECK-LABEL: define <2 x i32> @test_vsub_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
191// CHECK:   [[SUB_I:%.*]] = sub <2 x i32> %v1, %v2
192// CHECK:   ret <2 x i32> [[SUB_I]]
193uint32x2_t test_vsub_u32(uint32x2_t v1, uint32x2_t v2) {
194  return vsub_u32(v1, v2);
195}
196
197// CHECK-LABEL: define <1 x i64> @test_vsub_u64(<1 x i64> %v1, <1 x i64> %v2) #0 {
198// CHECK:   [[SUB_I:%.*]] = sub <1 x i64> %v1, %v2
199// CHECK:   ret <1 x i64> [[SUB_I]]
200uint64x1_t test_vsub_u64(uint64x1_t v1, uint64x1_t v2) {
201  return vsub_u64(v1, v2);
202}
203
204// CHECK-LABEL: define <16 x i8> @test_vsubq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
205// CHECK:   [[SUB_I:%.*]] = sub <16 x i8> %v1, %v2
206// CHECK:   ret <16 x i8> [[SUB_I]]
207int8x16_t test_vsubq_s8(int8x16_t v1, int8x16_t v2) {
208  return vsubq_s8(v1, v2);
209}
210
211// CHECK-LABEL: define <8 x i16> @test_vsubq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
212// CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %v1, %v2
213// CHECK:   ret <8 x i16> [[SUB_I]]
214int16x8_t test_vsubq_s16(int16x8_t v1, int16x8_t v2) {
215  return vsubq_s16(v1, v2);
216}
217
218// CHECK-LABEL: define <4 x i32> @test_vsubq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
219// CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %v1, %v2
220// CHECK:   ret <4 x i32> [[SUB_I]]
221int32x4_t test_vsubq_s32(int32x4_t v1,int32x4_t  v2) {
222  return vsubq_s32(v1, v2);
223}
224
225// CHECK-LABEL: define <2 x i64> @test_vsubq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 {
226// CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %v1, %v2
227// CHECK:   ret <2 x i64> [[SUB_I]]
228int64x2_t test_vsubq_s64(int64x2_t v1, int64x2_t v2) {
229  return vsubq_s64(v1, v2);
230}
231
232// CHECK-LABEL: define <4 x float> @test_vsubq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
233// CHECK:   [[SUB_I:%.*]] = fsub <4 x float> %v1, %v2
234// CHECK:   ret <4 x float> [[SUB_I]]
235float32x4_t test_vsubq_f32(float32x4_t v1, float32x4_t v2) {
236  return vsubq_f32(v1, v2);
237}
238
239// CHECK-LABEL: define <2 x double> @test_vsubq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
240// CHECK:   [[SUB_I:%.*]] = fsub <2 x double> %v1, %v2
241// CHECK:   ret <2 x double> [[SUB_I]]
242float64x2_t test_vsubq_f64(float64x2_t v1, float64x2_t v2) {
243  return vsubq_f64(v1, v2);
244}
245
246// CHECK-LABEL: define <16 x i8> @test_vsubq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
247// CHECK:   [[SUB_I:%.*]] = sub <16 x i8> %v1, %v2
248// CHECK:   ret <16 x i8> [[SUB_I]]
249uint8x16_t test_vsubq_u8(uint8x16_t v1, uint8x16_t v2) {
250  return vsubq_u8(v1, v2);
251}
252
253// CHECK-LABEL: define <8 x i16> @test_vsubq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
254// CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %v1, %v2
255// CHECK:   ret <8 x i16> [[SUB_I]]
256uint16x8_t test_vsubq_u16(uint16x8_t v1, uint16x8_t v2) {
257  return vsubq_u16(v1, v2);
258}
259
260// CHECK-LABEL: define <4 x i32> @test_vsubq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
261// CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %v1, %v2
262// CHECK:   ret <4 x i32> [[SUB_I]]
263uint32x4_t test_vsubq_u32(uint32x4_t v1, uint32x4_t v2) {
264  return vsubq_u32(v1, v2);
265}
266
267// CHECK-LABEL: define <2 x i64> @test_vsubq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 {
268// CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %v1, %v2
269// CHECK:   ret <2 x i64> [[SUB_I]]
270uint64x2_t test_vsubq_u64(uint64x2_t v1, uint64x2_t v2) {
271  return vsubq_u64(v1, v2);
272}
273
274// CHECK-LABEL: define <8 x i8> @test_vmul_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
275// CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %v1, %v2
276// CHECK:   ret <8 x i8> [[MUL_I]]
277int8x8_t test_vmul_s8(int8x8_t v1, int8x8_t v2) {
278  return vmul_s8(v1, v2);
279}
280
281// CHECK-LABEL: define <4 x i16> @test_vmul_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
282// CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %v1, %v2
283// CHECK:   ret <4 x i16> [[MUL_I]]
284int16x4_t test_vmul_s16(int16x4_t v1, int16x4_t v2) {
285  return vmul_s16(v1, v2);
286}
287
288// CHECK-LABEL: define <2 x i32> @test_vmul_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
289// CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %v1, %v2
290// CHECK:   ret <2 x i32> [[MUL_I]]
291int32x2_t test_vmul_s32(int32x2_t v1, int32x2_t v2) {
292  return vmul_s32(v1, v2);
293}
294
295// CHECK-LABEL: define <2 x float> @test_vmul_f32(<2 x float> %v1, <2 x float> %v2) #0 {
296// CHECK:   [[MUL_I:%.*]] = fmul <2 x float> %v1, %v2
297// CHECK:   ret <2 x float> [[MUL_I]]
298float32x2_t test_vmul_f32(float32x2_t v1, float32x2_t v2) {
299  return vmul_f32(v1, v2);
300}
301
302
303// CHECK-LABEL: define <8 x i8> @test_vmul_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
304// CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %v1, %v2
305// CHECK:   ret <8 x i8> [[MUL_I]]
306uint8x8_t test_vmul_u8(uint8x8_t v1, uint8x8_t v2) {
307  return vmul_u8(v1, v2);
308}
309
310// CHECK-LABEL: define <4 x i16> @test_vmul_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
311// CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %v1, %v2
312// CHECK:   ret <4 x i16> [[MUL_I]]
313uint16x4_t test_vmul_u16(uint16x4_t v1, uint16x4_t v2) {
314  return vmul_u16(v1, v2);
315}
316
317// CHECK-LABEL: define <2 x i32> @test_vmul_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
318// CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %v1, %v2
319// CHECK:   ret <2 x i32> [[MUL_I]]
320uint32x2_t test_vmul_u32(uint32x2_t v1, uint32x2_t v2) {
321  return vmul_u32(v1, v2);
322}
323
324// CHECK-LABEL: define <16 x i8> @test_vmulq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
325// CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %v1, %v2
326// CHECK:   ret <16 x i8> [[MUL_I]]
327int8x16_t test_vmulq_s8(int8x16_t v1, int8x16_t v2) {
328  return vmulq_s8(v1, v2);
329}
330
331// CHECK-LABEL: define <8 x i16> @test_vmulq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
332// CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %v1, %v2
333// CHECK:   ret <8 x i16> [[MUL_I]]
334int16x8_t test_vmulq_s16(int16x8_t v1, int16x8_t v2) {
335  return vmulq_s16(v1, v2);
336}
337
338// CHECK-LABEL: define <4 x i32> @test_vmulq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
339// CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %v1, %v2
340// CHECK:   ret <4 x i32> [[MUL_I]]
341int32x4_t test_vmulq_s32(int32x4_t v1, int32x4_t v2) {
342  return vmulq_s32(v1, v2);
343}
344
345// CHECK-LABEL: define <16 x i8> @test_vmulq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
346// CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %v1, %v2
347// CHECK:   ret <16 x i8> [[MUL_I]]
348uint8x16_t test_vmulq_u8(uint8x16_t v1, uint8x16_t v2) {
349  return vmulq_u8(v1, v2);
350}
351
352// CHECK-LABEL: define <8 x i16> @test_vmulq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
353// CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %v1, %v2
354// CHECK:   ret <8 x i16> [[MUL_I]]
355uint16x8_t test_vmulq_u16(uint16x8_t v1, uint16x8_t v2) {
356  return vmulq_u16(v1, v2);
357}
358
359// CHECK-LABEL: define <4 x i32> @test_vmulq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
360// CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %v1, %v2
361// CHECK:   ret <4 x i32> [[MUL_I]]
362uint32x4_t test_vmulq_u32(uint32x4_t v1, uint32x4_t v2) {
363  return vmulq_u32(v1, v2);
364}
365
366// CHECK-LABEL: define <4 x float> @test_vmulq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
367// CHECK:   [[MUL_I:%.*]] = fmul <4 x float> %v1, %v2
368// CHECK:   ret <4 x float> [[MUL_I]]
369float32x4_t test_vmulq_f32(float32x4_t v1, float32x4_t v2) {
370  return vmulq_f32(v1, v2);
371}
372
373// CHECK-LABEL: define <2 x double> @test_vmulq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
374// CHECK:   [[MUL_I:%.*]] = fmul <2 x double> %v1, %v2
375// CHECK:   ret <2 x double> [[MUL_I]]
376float64x2_t test_vmulq_f64(float64x2_t v1, float64x2_t v2) {
377  return vmulq_f64(v1, v2);
378}
379
380// CHECK-LABEL: define <8 x i8> @test_vmul_p8(<8 x i8> %v1, <8 x i8> %v2) #0 {
381// CHECK:   [[VMUL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.pmul.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4
382// CHECK:   ret <8 x i8> [[VMUL_V_I]]
383poly8x8_t test_vmul_p8(poly8x8_t v1, poly8x8_t v2) {
384  //  test_vmul_p8
385  return vmul_p8(v1, v2);
386  //  pmul {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
387}
388
389// CHECK-LABEL: define <16 x i8> @test_vmulq_p8(<16 x i8> %v1, <16 x i8> %v2) #0 {
390// CHECK:   [[VMULQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.pmul.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4
391// CHECK:   ret <16 x i8> [[VMULQ_V_I]]
392poly8x16_t test_vmulq_p8(poly8x16_t v1, poly8x16_t v2) {
393  // test_vmulq_p8
394  return vmulq_p8(v1, v2);
395  // pmul {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
396}
397
398
399// CHECK-LABEL: define <8 x i8> @test_vmla_s8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 {
400// CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
401// CHECK:   [[ADD_I:%.*]] = add <8 x i8> %v1, [[MUL_I]]
402// CHECK:   ret <8 x i8> [[ADD_I]]
403int8x8_t test_vmla_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
404  return vmla_s8(v1, v2, v3);
405}
406
407// CHECK-LABEL: define <8 x i8> @test_vmla_s16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 {
408// CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
409// CHECK:   [[ADD_I:%.*]] = add <4 x i16> %v1, [[MUL_I]]
410// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[ADD_I]] to <8 x i8>
411// CHECK:   ret <8 x i8> [[TMP0]]
412int8x8_t test_vmla_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
413  return vmla_s16(v1, v2, v3);
414}
415
416// CHECK-LABEL: define <2 x i32> @test_vmla_s32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 {
417// CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
418// CHECK:   [[ADD_I:%.*]] = add <2 x i32> %v1, [[MUL_I]]
419// CHECK:   ret <2 x i32> [[ADD_I]]
420int32x2_t test_vmla_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
421  return vmla_s32(v1, v2, v3);
422}
423
424// CHECK-LABEL: define <2 x float> @test_vmla_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) #0 {
425// CHECK:   [[MUL_I:%.*]] = fmul <2 x float> %v2, %v3
426// CHECK:   [[ADD_I:%.*]] = fadd <2 x float> %v1, [[MUL_I]]
427// CHECK:   ret <2 x float> [[ADD_I]]
428float32x2_t test_vmla_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
429  return vmla_f32(v1, v2, v3);
430}
431
432// CHECK-LABEL: define <8 x i8> @test_vmla_u8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 {
433// CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
434// CHECK:   [[ADD_I:%.*]] = add <8 x i8> %v1, [[MUL_I]]
435// CHECK:   ret <8 x i8> [[ADD_I]]
436uint8x8_t test_vmla_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
437  return vmla_u8(v1, v2, v3);
438}
439
440// CHECK-LABEL: define <4 x i16> @test_vmla_u16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 {
441// CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
442// CHECK:   [[ADD_I:%.*]] = add <4 x i16> %v1, [[MUL_I]]
443// CHECK:   ret <4 x i16> [[ADD_I]]
444uint16x4_t test_vmla_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
445  return vmla_u16(v1, v2, v3);
446}
447
448// CHECK-LABEL: define <2 x i32> @test_vmla_u32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 {
449// CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
450// CHECK:   [[ADD_I:%.*]] = add <2 x i32> %v1, [[MUL_I]]
451// CHECK:   ret <2 x i32> [[ADD_I]]
452uint32x2_t test_vmla_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
453  return vmla_u32(v1, v2, v3);
454}
455
456// CHECK-LABEL: define <16 x i8> @test_vmlaq_s8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 {
457// CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
458// CHECK:   [[ADD_I:%.*]] = add <16 x i8> %v1, [[MUL_I]]
459// CHECK:   ret <16 x i8> [[ADD_I]]
460int8x16_t test_vmlaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
461  return vmlaq_s8(v1, v2, v3);
462}
463
464// CHECK-LABEL: define <8 x i16> @test_vmlaq_s16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 {
465// CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
466// CHECK:   [[ADD_I:%.*]] = add <8 x i16> %v1, [[MUL_I]]
467// CHECK:   ret <8 x i16> [[ADD_I]]
468int16x8_t test_vmlaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
469  return vmlaq_s16(v1, v2, v3);
470}
471
472// CHECK-LABEL: define <4 x i32> @test_vmlaq_s32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 {
473// CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
474// CHECK:   [[ADD_I:%.*]] = add <4 x i32> %v1, [[MUL_I]]
475// CHECK:   ret <4 x i32> [[ADD_I]]
476int32x4_t test_vmlaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
477  return vmlaq_s32(v1, v2, v3);
478}
479
480// CHECK-LABEL: define <4 x float> @test_vmlaq_f32(<4 x float> %v1, <4 x float> %v2, <4 x float> %v3) #0 {
481// CHECK:   [[MUL_I:%.*]] = fmul <4 x float> %v2, %v3
482// CHECK:   [[ADD_I:%.*]] = fadd <4 x float> %v1, [[MUL_I]]
483// CHECK:   ret <4 x float> [[ADD_I]]
484float32x4_t test_vmlaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
485  return vmlaq_f32(v1, v2, v3);
486}
487
488// CHECK-LABEL: define <16 x i8> @test_vmlaq_u8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 {
489// CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
490// CHECK:   [[ADD_I:%.*]] = add <16 x i8> %v1, [[MUL_I]]
491// CHECK:   ret <16 x i8> [[ADD_I]]
492uint8x16_t test_vmlaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
493  return vmlaq_u8(v1, v2, v3);
494}
495
496// CHECK-LABEL: define <8 x i16> @test_vmlaq_u16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 {
497// CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
498// CHECK:   [[ADD_I:%.*]] = add <8 x i16> %v1, [[MUL_I]]
499// CHECK:   ret <8 x i16> [[ADD_I]]
500uint16x8_t test_vmlaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
501  return vmlaq_u16(v1, v2, v3);
502}
503
504// CHECK-LABEL: define <4 x i32> @test_vmlaq_u32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 {
505// CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
506// CHECK:   [[ADD_I:%.*]] = add <4 x i32> %v1, [[MUL_I]]
507// CHECK:   ret <4 x i32> [[ADD_I]]
508uint32x4_t test_vmlaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
509  return vmlaq_u32(v1, v2, v3);
510}
511
512// CHECK-LABEL: define <2 x double> @test_vmlaq_f64(<2 x double> %v1, <2 x double> %v2, <2 x double> %v3) #0 {
513// CHECK:   [[MUL_I:%.*]] = fmul <2 x double> %v2, %v3
514// CHECK:   [[ADD_I:%.*]] = fadd <2 x double> %v1, [[MUL_I]]
515// CHECK:   ret <2 x double> [[ADD_I]]
516float64x2_t test_vmlaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
517  return vmlaq_f64(v1, v2, v3);
518}
519
520// CHECK-LABEL: define <8 x i8> @test_vmls_s8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 {
521// CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
522// CHECK:   [[SUB_I:%.*]] = sub <8 x i8> %v1, [[MUL_I]]
523// CHECK:   ret <8 x i8> [[SUB_I]]
524int8x8_t test_vmls_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
525  return vmls_s8(v1, v2, v3);
526}
527
528// CHECK-LABEL: define <8 x i8> @test_vmls_s16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 {
529// CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
530// CHECK:   [[SUB_I:%.*]] = sub <4 x i16> %v1, [[MUL_I]]
531// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SUB_I]] to <8 x i8>
532// CHECK:   ret <8 x i8> [[TMP0]]
533int8x8_t test_vmls_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
534  return vmls_s16(v1, v2, v3);
535}
536
537// CHECK-LABEL: define <2 x i32> @test_vmls_s32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 {
538// CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
539// CHECK:   [[SUB_I:%.*]] = sub <2 x i32> %v1, [[MUL_I]]
540// CHECK:   ret <2 x i32> [[SUB_I]]
541int32x2_t test_vmls_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
542  return vmls_s32(v1, v2, v3);
543}
544
545// CHECK-LABEL: define <2 x float> @test_vmls_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) #0 {
546// CHECK:   [[MUL_I:%.*]] = fmul <2 x float> %v2, %v3
547// CHECK:   [[SUB_I:%.*]] = fsub <2 x float> %v1, [[MUL_I]]
548// CHECK:   ret <2 x float> [[SUB_I]]
549float32x2_t test_vmls_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
550  return vmls_f32(v1, v2, v3);
551}
552
553// CHECK-LABEL: define <8 x i8> @test_vmls_u8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 {
554// CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
555// CHECK:   [[SUB_I:%.*]] = sub <8 x i8> %v1, [[MUL_I]]
556// CHECK:   ret <8 x i8> [[SUB_I]]
557uint8x8_t test_vmls_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
558  return vmls_u8(v1, v2, v3);
559}
560
561// CHECK-LABEL: define <4 x i16> @test_vmls_u16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 {
562// CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
563// CHECK:   [[SUB_I:%.*]] = sub <4 x i16> %v1, [[MUL_I]]
564// CHECK:   ret <4 x i16> [[SUB_I]]
565uint16x4_t test_vmls_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
566  return vmls_u16(v1, v2, v3);
567}
568
569// CHECK-LABEL: define <2 x i32> @test_vmls_u32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 {
570// CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
571// CHECK:   [[SUB_I:%.*]] = sub <2 x i32> %v1, [[MUL_I]]
572// CHECK:   ret <2 x i32> [[SUB_I]]
573uint32x2_t test_vmls_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
574  return vmls_u32(v1, v2, v3);
575}
576// CHECK-LABEL: define <16 x i8> @test_vmlsq_s8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 {
577// CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
578// CHECK:   [[SUB_I:%.*]] = sub <16 x i8> %v1, [[MUL_I]]
579// CHECK:   ret <16 x i8> [[SUB_I]]
580int8x16_t test_vmlsq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
581  return vmlsq_s8(v1, v2, v3);
582}
583
584// CHECK-LABEL: define <8 x i16> @test_vmlsq_s16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 {
585// CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
586// CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %v1, [[MUL_I]]
587// CHECK:   ret <8 x i16> [[SUB_I]]
588int16x8_t test_vmlsq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
589  return vmlsq_s16(v1, v2, v3);
590}
591
592// CHECK-LABEL: define <4 x i32> @test_vmlsq_s32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 {
593// CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
594// CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %v1, [[MUL_I]]
595// CHECK:   ret <4 x i32> [[SUB_I]]
596int32x4_t test_vmlsq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
597  return vmlsq_s32(v1, v2, v3);
598}
599
600// CHECK-LABEL: define <4 x float> @test_vmlsq_f32(<4 x float> %v1, <4 x float> %v2, <4 x float> %v3) #0 {
601// CHECK:   [[MUL_I:%.*]] = fmul <4 x float> %v2, %v3
602// CHECK:   [[SUB_I:%.*]] = fsub <4 x float> %v1, [[MUL_I]]
603// CHECK:   ret <4 x float> [[SUB_I]]
604float32x4_t test_vmlsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
605  return vmlsq_f32(v1, v2, v3);
606}
607// CHECK-LABEL: define <16 x i8> @test_vmlsq_u8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 {
608// CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
609// CHECK:   [[SUB_I:%.*]] = sub <16 x i8> %v1, [[MUL_I]]
610// CHECK:   ret <16 x i8> [[SUB_I]]
611uint8x16_t test_vmlsq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
612  return vmlsq_u8(v1, v2, v3);
613}
614
615// CHECK-LABEL: define <8 x i16> @test_vmlsq_u16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 {
616// CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
617// CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %v1, [[MUL_I]]
618// CHECK:   ret <8 x i16> [[SUB_I]]
619uint16x8_t test_vmlsq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
620  return vmlsq_u16(v1, v2, v3);
621}
622
623// CHECK-LABEL: define <4 x i32> @test_vmlsq_u32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 {
624// CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
625// CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %v1, [[MUL_I]]
626// CHECK:   ret <4 x i32> [[SUB_I]]
627uint32x4_t test_vmlsq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
628  return vmlsq_u32(v1, v2, v3);
629}
630
631// CHECK-LABEL: define <2 x double> @test_vmlsq_f64(<2 x double> %v1, <2 x double> %v2, <2 x double> %v3) #0 {
632// CHECK:   [[MUL_I:%.*]] = fmul <2 x double> %v2, %v3
633// CHECK:   [[SUB_I:%.*]] = fsub <2 x double> %v1, [[MUL_I]]
634// CHECK:   ret <2 x double> [[SUB_I]]
635float64x2_t test_vmlsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
636  return vmlsq_f64(v1, v2, v3);
637}
638// CHECK-LABEL: define <2 x float> @test_vfma_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) #0 {
639// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
640// CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
641// CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %v3 to <8 x i8>
642// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
643// CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
644// CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
645// CHECK:   [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x float> [[TMP3]]) #4
646// CHECK:   ret <2 x float> [[TMP6]]
647float32x2_t test_vfma_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
648  return vfma_f32(v1, v2, v3);
649}
650
651// CHECK-LABEL: define <4 x float> @test_vfmaq_f32(<4 x float> %v1, <4 x float> %v2, <4 x float> %v3) #0 {
652// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
653// CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
654// CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8>
655// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
656// CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
657// CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
658// CHECK:   [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x float> [[TMP3]]) #4
659// CHECK:   ret <4 x float> [[TMP6]]
660float32x4_t test_vfmaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
661  return vfmaq_f32(v1, v2, v3);
662}
663
664// CHECK-LABEL: define <2 x double> @test_vfmaq_f64(<2 x double> %v1, <2 x double> %v2, <2 x double> %v3) #0 {
665// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
666// CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
667// CHECK:   [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8>
668// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
669// CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
670// CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
671// CHECK:   [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP4]], <2 x double> [[TMP5]], <2 x double> [[TMP3]]) #4
672// CHECK:   ret <2 x double> [[TMP6]]
673float64x2_t test_vfmaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
674  return vfmaq_f64(v1, v2, v3);
675}
676// CHECK-LABEL: define <2 x float> @test_vfms_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) #0 {
677// CHECK:   [[SUB_I:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v2
678// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
679// CHECK:   [[TMP1:%.*]] = bitcast <2 x float> [[SUB_I]] to <8 x i8>
680// CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %v3 to <8 x i8>
681// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
682// CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
683// CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
684// CHECK:   [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x float> [[TMP3]]) #4
685// CHECK:   ret <2 x float> [[TMP6]]
686float32x2_t test_vfms_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
687  return vfms_f32(v1, v2, v3);
688}
689
690// CHECK-LABEL: define <4 x float> @test_vfmsq_f32(<4 x float> %v1, <4 x float> %v2, <4 x float> %v3) #0 {
691// CHECK:   [[SUB_I:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v2
692// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
693// CHECK:   [[TMP1:%.*]] = bitcast <4 x float> [[SUB_I]] to <16 x i8>
694// CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8>
695// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
696// CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
697// CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
698// CHECK:   [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x float> [[TMP3]]) #4
699// CHECK:   ret <4 x float> [[TMP6]]
700float32x4_t test_vfmsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
701  return vfmsq_f32(v1, v2, v3);
702}
703
704// CHECK-LABEL: define <2 x double> @test_vfmsq_f64(<2 x double> %v1, <2 x double> %v2, <2 x double> %v3) #0 {
705// CHECK:   [[SUB_I:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v2
706// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
707// CHECK:   [[TMP1:%.*]] = bitcast <2 x double> [[SUB_I]] to <16 x i8>
708// CHECK:   [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8>
709// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
710// CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
711// CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
712// CHECK:   [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP4]], <2 x double> [[TMP5]], <2 x double> [[TMP3]]) #4
713// CHECK:   ret <2 x double> [[TMP6]]
714float64x2_t test_vfmsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
715  return vfmsq_f64(v1, v2, v3);
716}
717
718// CHECK-LABEL: define <2 x double> @test_vdivq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
719// CHECK:   [[DIV_I:%.*]] = fdiv <2 x double> %v1, %v2
720// CHECK:   ret <2 x double> [[DIV_I]]
721float64x2_t test_vdivq_f64(float64x2_t v1, float64x2_t v2) {
722  return vdivq_f64(v1, v2);
723}
724
725// CHECK-LABEL: define <4 x float> @test_vdivq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
726// CHECK:   [[DIV_I:%.*]] = fdiv <4 x float> %v1, %v2
727// CHECK:   ret <4 x float> [[DIV_I]]
728float32x4_t test_vdivq_f32(float32x4_t v1, float32x4_t v2) {
729  return vdivq_f32(v1, v2);
730}
731
732// CHECK-LABEL: define <2 x float> @test_vdiv_f32(<2 x float> %v1, <2 x float> %v2) #0 {
733// CHECK:   [[DIV_I:%.*]] = fdiv <2 x float> %v1, %v2
734// CHECK:   ret <2 x float> [[DIV_I]]
735float32x2_t test_vdiv_f32(float32x2_t v1, float32x2_t v2) {
736  return vdiv_f32(v1, v2);
737}
738
739// CHECK-LABEL: define <8 x i8> @test_vaba_s8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 {
740// CHECK:   [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %v2, <8 x i8> %v3) #4
741// CHECK:   [[ADD_I:%.*]] = add <8 x i8> %v1, [[VABD_I_I]]
742// CHECK:   ret <8 x i8> [[ADD_I]]
743int8x8_t test_vaba_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
744  return vaba_s8(v1, v2, v3);
745}
746
747// CHECK-LABEL: define <4 x i16> @test_vaba_s16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 {
748// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
749// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
750// CHECK:   [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
751// CHECK:   [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
752// CHECK:   [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I_I]], <4 x i16> [[VABD1_I_I]]) #4
753// CHECK:   [[ADD_I:%.*]] = add <4 x i16> %v1, [[VABD2_I_I]]
754// CHECK:   ret <4 x i16> [[ADD_I]]
755int16x4_t test_vaba_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
756  return vaba_s16(v1, v2, v3);
757}
758
759// CHECK-LABEL: define <2 x i32> @test_vaba_s32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 {
760// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
761// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
762// CHECK:   [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
763// CHECK:   [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
764// CHECK:   [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I_I]], <2 x i32> [[VABD1_I_I]]) #4
765// CHECK:   [[ADD_I:%.*]] = add <2 x i32> %v1, [[VABD2_I_I]]
766// CHECK:   ret <2 x i32> [[ADD_I]]
767int32x2_t test_vaba_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
768  return vaba_s32(v1, v2, v3);
769}
770
771// CHECK-LABEL: define <8 x i8> @test_vaba_u8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 {
772// CHECK:   [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %v2, <8 x i8> %v3) #4
773// CHECK:   [[ADD_I:%.*]] = add <8 x i8> %v1, [[VABD_I_I]]
774// CHECK:   ret <8 x i8> [[ADD_I]]
775uint8x8_t test_vaba_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
776  return vaba_u8(v1, v2, v3);
777}
778
779// CHECK-LABEL: define <4 x i16> @test_vaba_u16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 {
780// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
781// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
782// CHECK:   [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
783// CHECK:   [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
784// CHECK:   [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I_I]], <4 x i16> [[VABD1_I_I]]) #4
785// CHECK:   [[ADD_I:%.*]] = add <4 x i16> %v1, [[VABD2_I_I]]
786// CHECK:   ret <4 x i16> [[ADD_I]]
787uint16x4_t test_vaba_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
788  return vaba_u16(v1, v2, v3);
789}
790
791// CHECK-LABEL: define <2 x i32> @test_vaba_u32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 {
792// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
793// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
794// CHECK:   [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
795// CHECK:   [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
796// CHECK:   [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I_I]], <2 x i32> [[VABD1_I_I]]) #4
797// CHECK:   [[ADD_I:%.*]] = add <2 x i32> %v1, [[VABD2_I_I]]
798// CHECK:   ret <2 x i32> [[ADD_I]]
799uint32x2_t test_vaba_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
800  return vaba_u32(v1, v2, v3);
801}
802
803// CHECK-LABEL: define <16 x i8> @test_vabaq_s8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 {
804// CHECK:   [[VABD_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %v2, <16 x i8> %v3) #4
805// CHECK:   [[ADD_I:%.*]] = add <16 x i8> %v1, [[VABD_I_I]]
806// CHECK:   ret <16 x i8> [[ADD_I]]
807int8x16_t test_vabaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
808  return vabaq_s8(v1, v2, v3);
809}
810
811// CHECK-LABEL: define <8 x i16> @test_vabaq_s16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 {
812// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
813// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
814// CHECK:   [[VABD_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
815// CHECK:   [[VABD1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
816// CHECK:   [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> [[VABD_I_I]], <8 x i16> [[VABD1_I_I]]) #4
817// CHECK:   [[ADD_I:%.*]] = add <8 x i16> %v1, [[VABD2_I_I]]
818// CHECK:   ret <8 x i16> [[ADD_I]]
819int16x8_t test_vabaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
820  return vabaq_s16(v1, v2, v3);
821}
822
823// CHECK-LABEL: define <4 x i32> @test_vabaq_s32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 {
824// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
825// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
826// CHECK:   [[VABD_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
827// CHECK:   [[VABD1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
828// CHECK:   [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> [[VABD_I_I]], <4 x i32> [[VABD1_I_I]]) #4
829// CHECK:   [[ADD_I:%.*]] = add <4 x i32> %v1, [[VABD2_I_I]]
830// CHECK:   ret <4 x i32> [[ADD_I]]
831int32x4_t test_vabaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
832  return vabaq_s32(v1, v2, v3);
833}
834
835// CHECK-LABEL: define <16 x i8> @test_vabaq_u8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 {
836// CHECK:   [[VABD_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %v2, <16 x i8> %v3) #4
837// CHECK:   [[ADD_I:%.*]] = add <16 x i8> %v1, [[VABD_I_I]]
838// CHECK:   ret <16 x i8> [[ADD_I]]
839uint8x16_t test_vabaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
840  return vabaq_u8(v1, v2, v3);
841}
842
843// CHECK-LABEL: define <8 x i16> @test_vabaq_u16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 {
844// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
845// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
846// CHECK:   [[VABD_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
847// CHECK:   [[VABD1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
848// CHECK:   [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> [[VABD_I_I]], <8 x i16> [[VABD1_I_I]]) #4
849// CHECK:   [[ADD_I:%.*]] = add <8 x i16> %v1, [[VABD2_I_I]]
850// CHECK:   ret <8 x i16> [[ADD_I]]
851uint16x8_t test_vabaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
852  return vabaq_u16(v1, v2, v3);
853}
854
855// CHECK-LABEL: define <4 x i32> @test_vabaq_u32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 {
856// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
857// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
858// CHECK:   [[VABD_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
859// CHECK:   [[VABD1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
860// CHECK:   [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> [[VABD_I_I]], <4 x i32> [[VABD1_I_I]]) #4
861// CHECK:   [[ADD_I:%.*]] = add <4 x i32> %v1, [[VABD2_I_I]]
862// CHECK:   ret <4 x i32> [[ADD_I]]
863uint32x4_t test_vabaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
864  return vabaq_u32(v1, v2, v3);
865}
866
867// CHECK-LABEL: define <8 x i8> @test_vabd_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
868// CHECK:   [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4
869// CHECK:   ret <8 x i8> [[VABD_I]]
870int8x8_t test_vabd_s8(int8x8_t v1, int8x8_t v2) {
871  return vabd_s8(v1, v2);
872}
873
874// CHECK-LABEL: define <4 x i16> @test_vabd_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
875// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
876// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
877// CHECK:   [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
878// CHECK:   [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
879// CHECK:   [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I]], <4 x i16> [[VABD1_I]]) #4
880// CHECK:   ret <4 x i16> [[VABD2_I]]
881int16x4_t test_vabd_s16(int16x4_t v1, int16x4_t v2) {
882  return vabd_s16(v1, v2);
883}
884
885// CHECK-LABEL: define <2 x i32> @test_vabd_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
886// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
887// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
888// CHECK:   [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
889// CHECK:   [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
890// CHECK:   [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I]], <2 x i32> [[VABD1_I]]) #4
891// CHECK:   ret <2 x i32> [[VABD2_I]]
892int32x2_t test_vabd_s32(int32x2_t v1, int32x2_t v2) {
893  return vabd_s32(v1, v2);
894}
895
896// CHECK-LABEL: define <8 x i8> @test_vabd_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
897// CHECK:   [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4
898// CHECK:   ret <8 x i8> [[VABD_I]]
899uint8x8_t test_vabd_u8(uint8x8_t v1, uint8x8_t v2) {
900  return vabd_u8(v1, v2);
901}
902
903// CHECK-LABEL: define <4 x i16> @test_vabd_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
904// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
905// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
906// CHECK:   [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
907// CHECK:   [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
908// CHECK:   [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I]], <4 x i16> [[VABD1_I]]) #4
909// CHECK:   ret <4 x i16> [[VABD2_I]]
910uint16x4_t test_vabd_u16(uint16x4_t v1, uint16x4_t v2) {
911  return vabd_u16(v1, v2);
912}
913
914// CHECK-LABEL: define <2 x i32> @test_vabd_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
915// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
916// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
917// CHECK:   [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
918// CHECK:   [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
919// CHECK:   [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I]], <2 x i32> [[VABD1_I]]) #4
920// CHECK:   ret <2 x i32> [[VABD2_I]]
921uint32x2_t test_vabd_u32(uint32x2_t v1, uint32x2_t v2) {
922  return vabd_u32(v1, v2);
923}
924
925// CHECK-LABEL: define <2 x float> @test_vabd_f32(<2 x float> %v1, <2 x float> %v2) #0 {
926// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
927// CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
928// CHECK:   [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
929// CHECK:   [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
930// CHECK:   [[VABD2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float> [[VABD_I]], <2 x float> [[VABD1_I]]) #4
931// CHECK:   ret <2 x float> [[VABD2_I]]
932float32x2_t test_vabd_f32(float32x2_t v1, float32x2_t v2) {
933  return vabd_f32(v1, v2);
934}
935
936// CHECK-LABEL: define <16 x i8> @test_vabdq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
937// CHECK:   [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4
938// CHECK:   ret <16 x i8> [[VABD_I]]
939int8x16_t test_vabdq_s8(int8x16_t v1, int8x16_t v2) {
940  return vabdq_s8(v1, v2);
941}
942
943// CHECK-LABEL: define <8 x i16> @test_vabdq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
944// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
945// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
946// CHECK:   [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
947// CHECK:   [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
948// CHECK:   [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> [[VABD_I]], <8 x i16> [[VABD1_I]]) #4
949// CHECK:   ret <8 x i16> [[VABD2_I]]
950int16x8_t test_vabdq_s16(int16x8_t v1, int16x8_t v2) {
951  return vabdq_s16(v1, v2);
952}
953
954// CHECK-LABEL: define <4 x i32> @test_vabdq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
955// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
956// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
957// CHECK:   [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
958// CHECK:   [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
959// CHECK:   [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> [[VABD_I]], <4 x i32> [[VABD1_I]]) #4
960// CHECK:   ret <4 x i32> [[VABD2_I]]
961int32x4_t test_vabdq_s32(int32x4_t v1, int32x4_t v2) {
962  return vabdq_s32(v1, v2);
963}
964
965// CHECK-LABEL: define <16 x i8> @test_vabdq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
966// CHECK:   [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4
967// CHECK:   ret <16 x i8> [[VABD_I]]
968uint8x16_t test_vabdq_u8(uint8x16_t v1, uint8x16_t v2) {
969  return vabdq_u8(v1, v2);
970}
971
972// CHECK-LABEL: define <8 x i16> @test_vabdq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
973// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
974// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
975// CHECK:   [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
976// CHECK:   [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
977// CHECK:   [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> [[VABD_I]], <8 x i16> [[VABD1_I]]) #4
978// CHECK:   ret <8 x i16> [[VABD2_I]]
979uint16x8_t test_vabdq_u16(uint16x8_t v1, uint16x8_t v2) {
980  return vabdq_u16(v1, v2);
981}
982
983// CHECK-LABEL: define <4 x i32> @test_vabdq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
984// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
985// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
986// CHECK:   [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
987// CHECK:   [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
988// CHECK:   [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> [[VABD_I]], <4 x i32> [[VABD1_I]]) #4
989// CHECK:   ret <4 x i32> [[VABD2_I]]
990uint32x4_t test_vabdq_u32(uint32x4_t v1, uint32x4_t v2) {
991  return vabdq_u32(v1, v2);
992}
993
994// CHECK-LABEL: define <4 x float> @test_vabdq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
995// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
996// CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
997// CHECK:   [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
998// CHECK:   [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
999// CHECK:   [[VABD2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float> [[VABD_I]], <4 x float> [[VABD1_I]]) #4
1000// CHECK:   ret <4 x float> [[VABD2_I]]
1001float32x4_t test_vabdq_f32(float32x4_t v1, float32x4_t v2) {
1002  return vabdq_f32(v1, v2);
1003}
1004
1005// CHECK-LABEL: define <2 x double> @test_vabdq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
1006// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1007// CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1008// CHECK:   [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
1009// CHECK:   [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
1010// CHECK:   [[VABD2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double> [[VABD_I]], <2 x double> [[VABD1_I]]) #4
1011// CHECK:   ret <2 x double> [[VABD2_I]]
1012float64x2_t test_vabdq_f64(float64x2_t v1, float64x2_t v2) {
1013  return vabdq_f64(v1, v2);
1014}
1015
1016
1017// CHECK-LABEL: define <8 x i8> @test_vbsl_s8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 {
1018// CHECK:   [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2
1019// CHECK:   [[TMP0:%.*]] = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1020// CHECK:   [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3
1021// CHECK:   [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
1022// CHECK:   ret <8 x i8> [[VBSL2_I]]
1023int8x8_t test_vbsl_s8(uint8x8_t v1, int8x8_t v2, int8x8_t v3) {
1024  return vbsl_s8(v1, v2, v3);
1025}
1026
1027// CHECK-LABEL: define <8 x i8> @test_vbsl_s16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 {
1028// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1029// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1030// CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
1031// CHECK:   [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
1032// CHECK:   [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
1033// CHECK:   [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
1034// CHECK:   [[VBSL3_I:%.*]] = and <4 x i16> [[VBSL_I]], [[VBSL1_I]]
1035// CHECK:   [[TMP3:%.*]] = xor <4 x i16> [[VBSL_I]], <i16 -1, i16 -1, i16 -1, i16 -1>
1036// CHECK:   [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], [[VBSL2_I]]
1037// CHECK:   [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
1038// CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[VBSL5_I]] to <8 x i8>
1039// CHECK:   ret <8 x i8> [[TMP4]]
1040int8x8_t test_vbsl_s16(uint16x4_t v1, int16x4_t v2, int16x4_t v3) {
1041  return vbsl_s16(v1, v2, v3);
1042}
1043
1044// CHECK-LABEL: define <2 x i32> @test_vbsl_s32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 {
1045// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
1046// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
1047// CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
1048// CHECK:   [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
1049// CHECK:   [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
1050// CHECK:   [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
1051// CHECK:   [[VBSL3_I:%.*]] = and <2 x i32> [[VBSL_I]], [[VBSL1_I]]
1052// CHECK:   [[TMP3:%.*]] = xor <2 x i32> [[VBSL_I]], <i32 -1, i32 -1>
1053// CHECK:   [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], [[VBSL2_I]]
1054// CHECK:   [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
1055// CHECK:   ret <2 x i32> [[VBSL5_I]]
1056int32x2_t test_vbsl_s32(uint32x2_t v1, int32x2_t v2, int32x2_t v3) {
1057  return vbsl_s32(v1, v2, v3);
1058}
1059
1060// CHECK-LABEL: define <1 x i64> @test_vbsl_s64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) #0 {
1061// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8>
1062// CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %v2 to <8 x i8>
1063// CHECK:   [[TMP2:%.*]] = bitcast <1 x i64> %v3 to <8 x i8>
1064// CHECK:   [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
1065// CHECK:   [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
1066// CHECK:   [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
1067// CHECK:   [[VBSL3_I:%.*]] = and <1 x i64> [[VBSL_I]], [[VBSL1_I]]
1068// CHECK:   [[TMP3:%.*]] = xor <1 x i64> [[VBSL_I]], <i64 -1>
1069// CHECK:   [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], [[VBSL2_I]]
1070// CHECK:   [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
1071// CHECK:   ret <1 x i64> [[VBSL5_I]]
1072uint64x1_t test_vbsl_s64(uint64x1_t v1, uint64x1_t v2, uint64x1_t v3) {
1073  return vbsl_s64(v1, v2, v3);
1074}
1075
1076// CHECK-LABEL: define <8 x i8> @test_vbsl_u8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 {
1077// CHECK:   [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2
1078// CHECK:   [[TMP0:%.*]] = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1079// CHECK:   [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3
1080// CHECK:   [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
1081// CHECK:   ret <8 x i8> [[VBSL2_I]]
1082uint8x8_t test_vbsl_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
1083  return vbsl_u8(v1, v2, v3);
1084}
1085
1086// CHECK-LABEL: define <4 x i16> @test_vbsl_u16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 {
1087// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1088// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1089// CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
1090// CHECK:   [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
1091// CHECK:   [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
1092// CHECK:   [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
1093// CHECK:   [[VBSL3_I:%.*]] = and <4 x i16> [[VBSL_I]], [[VBSL1_I]]
1094// CHECK:   [[TMP3:%.*]] = xor <4 x i16> [[VBSL_I]], <i16 -1, i16 -1, i16 -1, i16 -1>
1095// CHECK:   [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], [[VBSL2_I]]
1096// CHECK:   [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
1097// CHECK:   ret <4 x i16> [[VBSL5_I]]
1098uint16x4_t test_vbsl_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
1099  return vbsl_u16(v1, v2, v3);
1100}
1101
1102// CHECK-LABEL: define <2 x i32> @test_vbsl_u32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 {
1103// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
1104// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
1105// CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
1106// CHECK:   [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
1107// CHECK:   [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
1108// CHECK:   [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
1109// CHECK:   [[VBSL3_I:%.*]] = and <2 x i32> [[VBSL_I]], [[VBSL1_I]]
1110// CHECK:   [[TMP3:%.*]] = xor <2 x i32> [[VBSL_I]], <i32 -1, i32 -1>
1111// CHECK:   [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], [[VBSL2_I]]
1112// CHECK:   [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
1113// CHECK:   ret <2 x i32> [[VBSL5_I]]
1114uint32x2_t test_vbsl_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
1115  return vbsl_u32(v1, v2, v3);
1116}
1117
1118// CHECK-LABEL: define <1 x i64> @test_vbsl_u64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) #0 {
1119// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8>
1120// CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %v2 to <8 x i8>
1121// CHECK:   [[TMP2:%.*]] = bitcast <1 x i64> %v3 to <8 x i8>
1122// CHECK:   [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
1123// CHECK:   [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
1124// CHECK:   [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
1125// CHECK:   [[VBSL3_I:%.*]] = and <1 x i64> [[VBSL_I]], [[VBSL1_I]]
1126// CHECK:   [[TMP3:%.*]] = xor <1 x i64> [[VBSL_I]], <i64 -1>
1127// CHECK:   [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], [[VBSL2_I]]
1128// CHECK:   [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
1129// CHECK:   ret <1 x i64> [[VBSL5_I]]
1130uint64x1_t test_vbsl_u64(uint64x1_t v1, uint64x1_t v2, uint64x1_t v3) {
1131  return vbsl_u64(v1, v2, v3);
1132}
1133
1134// CHECK-LABEL: define <2 x float> @test_vbsl_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) #0 {
1135// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <2 x i32>
1136// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
1137// CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1138// CHECK:   [[TMP3:%.*]] = bitcast <2 x float> %v3 to <8 x i8>
1139// CHECK:   [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
1140// CHECK:   [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
1141// CHECK:   [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
1142// CHECK:   [[VBSL3_I:%.*]] = and <2 x i32> [[VBSL_I]], [[VBSL1_I]]
1143// CHECK:   [[TMP4:%.*]] = xor <2 x i32> [[VBSL_I]], <i32 -1, i32 -1>
1144// CHECK:   [[VBSL4_I:%.*]] = and <2 x i32> [[TMP4]], [[VBSL2_I]]
1145// CHECK:   [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
1146// CHECK:   [[TMP5:%.*]] = bitcast <2 x i32> [[VBSL5_I]] to <2 x float>
1147// CHECK:   ret <2 x float> [[TMP5]]
1148float32x2_t test_vbsl_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
1149  return vbsl_f32(v1, v2, v3);
1150}
1151
1152// CHECK-LABEL: define <1 x double> @test_vbsl_f64(<1 x i64> %v1, <1 x double> %v2, <1 x double> %v3) #0 {
1153// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8>
1154// CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %v2 to <8 x i8>
1155// CHECK:   [[TMP2:%.*]] = bitcast <1 x double> %v3 to <8 x i8>
1156// CHECK:   [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
1157// CHECK:   [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
1158// CHECK:   [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
1159// CHECK:   [[VBSL3_I:%.*]] = and <1 x i64> [[VBSL_I]], [[VBSL1_I]]
1160// CHECK:   [[TMP3:%.*]] = xor <1 x i64> [[VBSL_I]], <i64 -1>
1161// CHECK:   [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], [[VBSL2_I]]
1162// CHECK:   [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
1163// CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[VBSL5_I]] to <1 x double>
1164// CHECK:   ret <1 x double> [[TMP4]]
1165float64x1_t test_vbsl_f64(uint64x1_t v1, float64x1_t v2, float64x1_t v3) {
1166  return vbsl_f64(v1, v2, v3);
1167}
1168
1169// CHECK-LABEL: define <8 x i8> @test_vbsl_p8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 {
1170// CHECK:   [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2
1171// CHECK:   [[TMP0:%.*]] = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1172// CHECK:   [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3
1173// CHECK:   [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
1174// CHECK:   ret <8 x i8> [[VBSL2_I]]
1175poly8x8_t test_vbsl_p8(uint8x8_t v1, poly8x8_t v2, poly8x8_t v3) {
1176  return vbsl_p8(v1, v2, v3);
1177}
1178
1179// CHECK-LABEL: define <4 x i16> @test_vbsl_p16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 {
1180// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1181// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1182// CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
1183// CHECK:   [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
1184// CHECK:   [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
1185// CHECK:   [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
1186// CHECK:   [[VBSL3_I:%.*]] = and <4 x i16> [[VBSL_I]], [[VBSL1_I]]
1187// CHECK:   [[TMP3:%.*]] = xor <4 x i16> [[VBSL_I]], <i16 -1, i16 -1, i16 -1, i16 -1>
1188// CHECK:   [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], [[VBSL2_I]]
1189// CHECK:   [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
1190// CHECK:   ret <4 x i16> [[VBSL5_I]]
1191poly16x4_t test_vbsl_p16(uint16x4_t v1, poly16x4_t v2, poly16x4_t v3) {
1192  return vbsl_p16(v1, v2, v3);
1193}
1194
1195// CHECK-LABEL: define <16 x i8> @test_vbslq_s8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 {
1196// CHECK:   [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2
1197// CHECK:   [[TMP0:%.*]] = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1198// CHECK:   [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3
1199// CHECK:   [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
1200// CHECK:   ret <16 x i8> [[VBSL2_I]]
1201int8x16_t test_vbslq_s8(uint8x16_t v1, int8x16_t v2, int8x16_t v3) {
1202  return vbslq_s8(v1, v2, v3);
1203}
1204
1205// CHECK-LABEL: define <8 x i16> @test_vbslq_s16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 {
1206// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1207// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1208// CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
1209// CHECK:   [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
1210// CHECK:   [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
1211// CHECK:   [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
1212// CHECK:   [[VBSL3_I:%.*]] = and <8 x i16> [[VBSL_I]], [[VBSL1_I]]
1213// CHECK:   [[TMP3:%.*]] = xor <8 x i16> [[VBSL_I]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1214// CHECK:   [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], [[VBSL2_I]]
1215// CHECK:   [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
1216// CHECK:   ret <8 x i16> [[VBSL5_I]]
1217int16x8_t test_vbslq_s16(uint16x8_t v1, int16x8_t v2, int16x8_t v3) {
1218  return vbslq_s16(v1, v2, v3);
1219}
1220
1221// CHECK-LABEL: define <4 x i32> @test_vbslq_s32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 {
1222// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1223// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
1224// CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
1225// CHECK:   [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
1226// CHECK:   [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
1227// CHECK:   [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1228// CHECK:   [[VBSL3_I:%.*]] = and <4 x i32> [[VBSL_I]], [[VBSL1_I]]
1229// CHECK:   [[TMP3:%.*]] = xor <4 x i32> [[VBSL_I]], <i32 -1, i32 -1, i32 -1, i32 -1>
1230// CHECK:   [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], [[VBSL2_I]]
1231// CHECK:   [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
1232// CHECK:   ret <4 x i32> [[VBSL5_I]]
1233int32x4_t test_vbslq_s32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) {
1234  return vbslq_s32(v1, v2, v3);
1235}
1236
1237// CHECK-LABEL: define <2 x i64> @test_vbslq_s64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) #0 {
1238// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1239// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
1240// CHECK:   [[TMP2:%.*]] = bitcast <2 x i64> %v3 to <16 x i8>
1241// CHECK:   [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
1242// CHECK:   [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
1243// CHECK:   [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
1244// CHECK:   [[VBSL3_I:%.*]] = and <2 x i64> [[VBSL_I]], [[VBSL1_I]]
1245// CHECK:   [[TMP3:%.*]] = xor <2 x i64> [[VBSL_I]], <i64 -1, i64 -1>
1246// CHECK:   [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], [[VBSL2_I]]
1247// CHECK:   [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
1248// CHECK:   ret <2 x i64> [[VBSL5_I]]
1249int64x2_t test_vbslq_s64(uint64x2_t v1, int64x2_t v2, int64x2_t v3) {
1250  return vbslq_s64(v1, v2, v3);
1251}
1252
1253// CHECK-LABEL: define <16 x i8> @test_vbslq_u8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 {
1254// CHECK:   [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2
1255// CHECK:   [[TMP0:%.*]] = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1256// CHECK:   [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3
1257// CHECK:   [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
1258// CHECK:   ret <16 x i8> [[VBSL2_I]]
1259uint8x16_t test_vbslq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
1260  return vbslq_u8(v1, v2, v3);
1261}
1262
1263// CHECK-LABEL: define <8 x i16> @test_vbslq_u16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 {
1264// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1265// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1266// CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
1267// CHECK:   [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
1268// CHECK:   [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
1269// CHECK:   [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
1270// CHECK:   [[VBSL3_I:%.*]] = and <8 x i16> [[VBSL_I]], [[VBSL1_I]]
1271// CHECK:   [[TMP3:%.*]] = xor <8 x i16> [[VBSL_I]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1272// CHECK:   [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], [[VBSL2_I]]
1273// CHECK:   [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
1274// CHECK:   ret <8 x i16> [[VBSL5_I]]
1275uint16x8_t test_vbslq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
1276  return vbslq_u16(v1, v2, v3);
1277}
1278
1279// CHECK-LABEL: define <4 x i32> @test_vbslq_u32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 {
1280// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1281// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
1282// CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
1283// CHECK:   [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
1284// CHECK:   [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
1285// CHECK:   [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1286// CHECK:   [[VBSL3_I:%.*]] = and <4 x i32> [[VBSL_I]], [[VBSL1_I]]
1287// CHECK:   [[TMP3:%.*]] = xor <4 x i32> [[VBSL_I]], <i32 -1, i32 -1, i32 -1, i32 -1>
1288// CHECK:   [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], [[VBSL2_I]]
1289// CHECK:   [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
1290// CHECK:   ret <4 x i32> [[VBSL5_I]]
1291int32x4_t test_vbslq_u32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) {
1292  return vbslq_s32(v1, v2, v3);
1293}
1294
1295// CHECK-LABEL: define <2 x i64> @test_vbslq_u64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) #0 {
1296// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1297// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
1298// CHECK:   [[TMP2:%.*]] = bitcast <2 x i64> %v3 to <16 x i8>
1299// CHECK:   [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
1300// CHECK:   [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
1301// CHECK:   [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
1302// CHECK:   [[VBSL3_I:%.*]] = and <2 x i64> [[VBSL_I]], [[VBSL1_I]]
1303// CHECK:   [[TMP3:%.*]] = xor <2 x i64> [[VBSL_I]], <i64 -1, i64 -1>
1304// CHECK:   [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], [[VBSL2_I]]
1305// CHECK:   [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
1306// CHECK:   ret <2 x i64> [[VBSL5_I]]
1307uint64x2_t test_vbslq_u64(uint64x2_t v1, uint64x2_t v2, uint64x2_t v3) {
1308  return vbslq_u64(v1, v2, v3);
1309}
1310
1311// CHECK-LABEL: define <4 x float> @test_vbslq_f32(<4 x i32> %v1, <4 x float> %v2, <4 x float> %v3) #0 {
1312// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1313// CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1314// CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8>
1315// CHECK:   [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
1316// CHECK:   [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
1317// CHECK:   [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1318// CHECK:   [[VBSL3_I:%.*]] = and <4 x i32> [[VBSL_I]], [[VBSL1_I]]
1319// CHECK:   [[TMP3:%.*]] = xor <4 x i32> [[VBSL_I]], <i32 -1, i32 -1, i32 -1, i32 -1>
1320// CHECK:   [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], [[VBSL2_I]]
1321// CHECK:   [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
1322// CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[VBSL5_I]] to <4 x float>
1323// CHECK:   ret <4 x float> [[TMP4]]
1324float32x4_t test_vbslq_f32(uint32x4_t v1, float32x4_t v2, float32x4_t v3) {
1325  return vbslq_f32(v1, v2, v3);
1326}
1327
1328// CHECK-LABEL: define <16 x i8> @test_vbslq_p8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 {
1329// CHECK:   [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2
1330// CHECK:   [[TMP0:%.*]] = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1331// CHECK:   [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3
1332// CHECK:   [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
1333// CHECK:   ret <16 x i8> [[VBSL2_I]]
1334poly8x16_t test_vbslq_p8(uint8x16_t v1, poly8x16_t v2, poly8x16_t v3) {
1335  return vbslq_p8(v1, v2, v3);
1336}
1337
1338// CHECK-LABEL: define <8 x i16> @test_vbslq_p16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 {
1339// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1340// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1341// CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
1342// CHECK:   [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
1343// CHECK:   [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
1344// CHECK:   [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
1345// CHECK:   [[VBSL3_I:%.*]] = and <8 x i16> [[VBSL_I]], [[VBSL1_I]]
1346// CHECK:   [[TMP3:%.*]] = xor <8 x i16> [[VBSL_I]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1347// CHECK:   [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], [[VBSL2_I]]
1348// CHECK:   [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
1349// CHECK:   ret <8 x i16> [[VBSL5_I]]
1350poly16x8_t test_vbslq_p16(uint16x8_t v1, poly16x8_t v2, poly16x8_t v3) {
1351  return vbslq_p16(v1, v2, v3);
1352}
1353
1354// CHECK-LABEL: define <2 x double> @test_vbslq_f64(<2 x i64> %v1, <2 x double> %v2, <2 x double> %v3) #0 {
1355// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1356// CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1357// CHECK:   [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8>
1358// CHECK:   [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
1359// CHECK:   [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
1360// CHECK:   [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
1361// CHECK:   [[VBSL3_I:%.*]] = and <2 x i64> [[VBSL_I]], [[VBSL1_I]]
1362// CHECK:   [[TMP3:%.*]] = xor <2 x i64> [[VBSL_I]], <i64 -1, i64 -1>
1363// CHECK:   [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], [[VBSL2_I]]
1364// CHECK:   [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
1365// CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[VBSL5_I]] to <2 x double>
1366// CHECK:   ret <2 x double> [[TMP4]]
1367float64x2_t test_vbslq_f64(uint64x2_t v1, float64x2_t v2, float64x2_t v3) {
1368  return vbslq_f64(v1, v2, v3);
1369}
1370
1371// CHECK-LABEL: define <2 x float> @test_vrecps_f32(<2 x float> %v1, <2 x float> %v2) #0 {
1372// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1373// CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1374// CHECK:   [[VRECPS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
1375// CHECK:   [[VRECPS_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
1376// CHECK:   [[VRECPS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frecps.v2f32(<2 x float> [[VRECPS_V_I]], <2 x float> [[VRECPS_V1_I]]) #4
1377// CHECK:   [[VRECPS_V3_I:%.*]] = bitcast <2 x float> [[VRECPS_V2_I]] to <8 x i8>
1378// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRECPS_V3_I]] to <2 x float>
1379// CHECK:   ret <2 x float> [[TMP2]]
1380float32x2_t test_vrecps_f32(float32x2_t v1, float32x2_t v2) {
1381   return vrecps_f32(v1, v2);
1382}
1383
1384// CHECK-LABEL: define <4 x float> @test_vrecpsq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
1385// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1386// CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1387// CHECK:   [[VRECPSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
1388// CHECK:   [[VRECPSQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
1389// CHECK:   [[VRECPSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frecps.v4f32(<4 x float> [[VRECPSQ_V_I]], <4 x float> [[VRECPSQ_V1_I]]) #4
1390// CHECK:   [[VRECPSQ_V3_I:%.*]] = bitcast <4 x float> [[VRECPSQ_V2_I]] to <16 x i8>
1391// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VRECPSQ_V3_I]] to <4 x float>
1392// CHECK:   ret <4 x float> [[TMP2]]
1393float32x4_t test_vrecpsq_f32(float32x4_t v1, float32x4_t v2) {
1394   return vrecpsq_f32(v1, v2);
1395}
1396
1397// CHECK-LABEL: define <2 x double> @test_vrecpsq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
1398// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1399// CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1400// CHECK:   [[VRECPSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
1401// CHECK:   [[VRECPSQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
1402// CHECK:   [[VRECPSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frecps.v2f64(<2 x double> [[VRECPSQ_V_I]], <2 x double> [[VRECPSQ_V1_I]]) #4
1403// CHECK:   [[VRECPSQ_V3_I:%.*]] = bitcast <2 x double> [[VRECPSQ_V2_I]] to <16 x i8>
1404// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VRECPSQ_V3_I]] to <2 x double>
1405// CHECK:   ret <2 x double> [[TMP2]]
1406float64x2_t test_vrecpsq_f64(float64x2_t v1, float64x2_t v2) {
1407  return vrecpsq_f64(v1, v2);
1408}
1409
1410// CHECK-LABEL: define <2 x float> @test_vrsqrts_f32(<2 x float> %v1, <2 x float> %v2) #0 {
1411// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1412// CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1413// CHECK:   [[VRSQRTS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
1414// CHECK:   [[VRSQRTS_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
1415// CHECK:   [[VRSQRTS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frsqrts.v2f32(<2 x float> [[VRSQRTS_V_I]], <2 x float> [[VRSQRTS_V1_I]]) #4
1416// CHECK:   [[VRSQRTS_V3_I:%.*]] = bitcast <2 x float> [[VRSQRTS_V2_I]] to <8 x i8>
1417// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRSQRTS_V3_I]] to <2 x float>
1418// CHECK:   ret <2 x float> [[TMP2]]
1419float32x2_t test_vrsqrts_f32(float32x2_t v1, float32x2_t v2) {
1420  return vrsqrts_f32(v1, v2);
1421}
1422
1423// CHECK-LABEL: define <4 x float> @test_vrsqrtsq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
1424// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1425// CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1426// CHECK:   [[VRSQRTSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
1427// CHECK:   [[VRSQRTSQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
1428// CHECK:   [[VRSQRTSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frsqrts.v4f32(<4 x float> [[VRSQRTSQ_V_I]], <4 x float> [[VRSQRTSQ_V1_I]]) #4
1429// CHECK:   [[VRSQRTSQ_V3_I:%.*]] = bitcast <4 x float> [[VRSQRTSQ_V2_I]] to <16 x i8>
1430// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VRSQRTSQ_V3_I]] to <4 x float>
1431// CHECK:   ret <4 x float> [[TMP2]]
1432float32x4_t test_vrsqrtsq_f32(float32x4_t v1, float32x4_t v2) {
1433  return vrsqrtsq_f32(v1, v2);
1434}
1435
1436// CHECK-LABEL: define <2 x double> @test_vrsqrtsq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
1437// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1438// CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1439// CHECK:   [[VRSQRTSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
1440// CHECK:   [[VRSQRTSQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
1441// CHECK:   [[VRSQRTSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frsqrts.v2f64(<2 x double> [[VRSQRTSQ_V_I]], <2 x double> [[VRSQRTSQ_V1_I]]) #4
1442// CHECK:   [[VRSQRTSQ_V3_I:%.*]] = bitcast <2 x double> [[VRSQRTSQ_V2_I]] to <16 x i8>
1443// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VRSQRTSQ_V3_I]] to <2 x double>
1444// CHECK:   ret <2 x double> [[TMP2]]
1445float64x2_t test_vrsqrtsq_f64(float64x2_t v1, float64x2_t v2) {
1446  return vrsqrtsq_f64(v1, v2);
1447}
1448
1449// CHECK-LABEL: define <2 x i32> @test_vcage_f32(<2 x float> %v1, <2 x float> %v2) #0 {
1450// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1451// CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1452// CHECK:   [[VCAGE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
1453// CHECK:   [[VCAGE_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
1454// CHECK:   [[VCAGE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> [[VCAGE_V_I]], <2 x float> [[VCAGE_V1_I]]) #4
1455// CHECK:   ret <2 x i32> [[VCAGE_V2_I]]
1456uint32x2_t test_vcage_f32(float32x2_t v1, float32x2_t v2) {
1457  return vcage_f32(v1, v2);
1458}
1459
1460// CHECK-LABEL: define <1 x i64> @test_vcage_f64(<1 x double> %a, <1 x double> %b) #0 {
1461// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
1462// CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
1463// CHECK:   [[VCAGE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
1464// CHECK:   [[VCAGE_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
1465// CHECK:   [[VCAGE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> [[VCAGE_V_I]], <1 x double> [[VCAGE_V1_I]]) #4
1466// CHECK:   ret <1 x i64> [[VCAGE_V2_I]]
1467uint64x1_t test_vcage_f64(float64x1_t a, float64x1_t b) {
1468  return vcage_f64(a, b);
1469}
1470
1471// CHECK-LABEL: define <4 x i32> @test_vcageq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
1472// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1473// CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1474// CHECK:   [[VCAGEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
1475// CHECK:   [[VCAGEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
1476// CHECK:   [[VCAGEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> [[VCAGEQ_V_I]], <4 x float> [[VCAGEQ_V1_I]]) #4
1477// CHECK:   ret <4 x i32> [[VCAGEQ_V2_I]]
1478uint32x4_t test_vcageq_f32(float32x4_t v1, float32x4_t v2) {
1479  return vcageq_f32(v1, v2);
1480}
1481
1482// CHECK-LABEL: define <2 x i64> @test_vcageq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
1483// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1484// CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1485// CHECK:   [[VCAGEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
1486// CHECK:   [[VCAGEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
1487// CHECK:   [[VCAGEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> [[VCAGEQ_V_I]], <2 x double> [[VCAGEQ_V1_I]]) #4
1488// CHECK:   ret <2 x i64> [[VCAGEQ_V2_I]]
1489uint64x2_t test_vcageq_f64(float64x2_t v1, float64x2_t v2) {
1490  return vcageq_f64(v1, v2);
1491}
1492
1493// CHECK-LABEL: define <2 x i32> @test_vcagt_f32(<2 x float> %v1, <2 x float> %v2) #0 {
1494// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1495// CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1496// CHECK:   [[VCAGT_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
1497// CHECK:   [[VCAGT_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
1498// CHECK:   [[VCAGT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> [[VCAGT_V_I]], <2 x float> [[VCAGT_V1_I]]) #4
1499// CHECK:   ret <2 x i32> [[VCAGT_V2_I]]
1500uint32x2_t test_vcagt_f32(float32x2_t v1, float32x2_t v2) {
1501  return vcagt_f32(v1, v2);
1502}
1503
1504// CHECK-LABEL: define <1 x i64> @test_vcagt_f64(<1 x double> %a, <1 x double> %b) #0 {
1505// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
1506// CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
1507// CHECK:   [[VCAGT_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
1508// CHECK:   [[VCAGT_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
1509// CHECK:   [[VCAGT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> [[VCAGT_V_I]], <1 x double> [[VCAGT_V1_I]]) #4
1510// CHECK:   ret <1 x i64> [[VCAGT_V2_I]]
1511uint64x1_t test_vcagt_f64(float64x1_t a, float64x1_t b) {
1512  return vcagt_f64(a, b);
1513}
1514
1515// CHECK-LABEL: define <4 x i32> @test_vcagtq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
1516// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1517// CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1518// CHECK:   [[VCAGTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
1519// CHECK:   [[VCAGTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
1520// CHECK:   [[VCAGTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> [[VCAGTQ_V_I]], <4 x float> [[VCAGTQ_V1_I]]) #4
1521// CHECK:   ret <4 x i32> [[VCAGTQ_V2_I]]
1522uint32x4_t test_vcagtq_f32(float32x4_t v1, float32x4_t v2) {
1523  return vcagtq_f32(v1, v2);
1524}
1525
1526// CHECK-LABEL: define <2 x i64> @test_vcagtq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
1527// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1528// CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1529// CHECK:   [[VCAGTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
1530// CHECK:   [[VCAGTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
1531// CHECK:   [[VCAGTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> [[VCAGTQ_V_I]], <2 x double> [[VCAGTQ_V1_I]]) #4
1532// CHECK:   ret <2 x i64> [[VCAGTQ_V2_I]]
1533uint64x2_t test_vcagtq_f64(float64x2_t v1, float64x2_t v2) {
1534  return vcagtq_f64(v1, v2);
1535}
1536
1537// CHECK-LABEL: define <2 x i32> @test_vcale_f32(<2 x float> %v1, <2 x float> %v2) #0 {
1538// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1539// CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1540// CHECK:   [[VCALE_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
1541// CHECK:   [[VCALE_V1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
1542// CHECK:   [[VCALE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> [[VCALE_V_I]], <2 x float> [[VCALE_V1_I]]) #4
1543// CHECK:   ret <2 x i32> [[VCALE_V2_I]]
1544uint32x2_t test_vcale_f32(float32x2_t v1, float32x2_t v2) {
1545  return vcale_f32(v1, v2);
1546  // Using registers other than v0, v1 are possible, but would be odd.
1547}
1548
1549// CHECK-LABEL: define <1 x i64> @test_vcale_f64(<1 x double> %a, <1 x double> %b) #0 {
1550// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
1551// CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
1552// CHECK:   [[VCALE_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
1553// CHECK:   [[VCALE_V1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
1554// CHECK:   [[VCALE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> [[VCALE_V_I]], <1 x double> [[VCALE_V1_I]]) #4
1555// CHECK:   ret <1 x i64> [[VCALE_V2_I]]
1556uint64x1_t test_vcale_f64(float64x1_t a, float64x1_t b) {
1557  return vcale_f64(a, b);
1558}
1559
1560// CHECK-LABEL: define <4 x i32> @test_vcaleq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
1561// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1562// CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1563// CHECK:   [[VCALEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
1564// CHECK:   [[VCALEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
1565// CHECK:   [[VCALEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> [[VCALEQ_V_I]], <4 x float> [[VCALEQ_V1_I]]) #4
1566// CHECK:   ret <4 x i32> [[VCALEQ_V2_I]]
1567uint32x4_t test_vcaleq_f32(float32x4_t v1, float32x4_t v2) {
1568  return vcaleq_f32(v1, v2);
1569  // Using registers other than v0, v1 are possible, but would be odd.
1570}
1571
1572// CHECK-LABEL: define <2 x i64> @test_vcaleq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
1573// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1574// CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1575// CHECK:   [[VCALEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
1576// CHECK:   [[VCALEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
1577// CHECK:   [[VCALEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> [[VCALEQ_V_I]], <2 x double> [[VCALEQ_V1_I]]) #4
1578// CHECK:   ret <2 x i64> [[VCALEQ_V2_I]]
1579uint64x2_t test_vcaleq_f64(float64x2_t v1, float64x2_t v2) {
1580  return vcaleq_f64(v1, v2);
1581  // Using registers other than v0, v1 are possible, but would be odd.
1582}
1583
1584// CHECK-LABEL: define <2 x i32> @test_vcalt_f32(<2 x float> %v1, <2 x float> %v2) #0 {
1585// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1586// CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1587// CHECK:   [[VCALT_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
1588// CHECK:   [[VCALT_V1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
1589// CHECK:   [[VCALT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> [[VCALT_V_I]], <2 x float> [[VCALT_V1_I]]) #4
1590// CHECK:   ret <2 x i32> [[VCALT_V2_I]]
1591uint32x2_t test_vcalt_f32(float32x2_t v1, float32x2_t v2) {
1592  return vcalt_f32(v1, v2);
1593  // Using registers other than v0, v1 are possible, but would be odd.
1594}
1595
1596// CHECK-LABEL: define <1 x i64> @test_vcalt_f64(<1 x double> %a, <1 x double> %b) #0 {
1597// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
1598// CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
1599// CHECK:   [[VCALT_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
1600// CHECK:   [[VCALT_V1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
1601// CHECK:   [[VCALT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> [[VCALT_V_I]], <1 x double> [[VCALT_V1_I]]) #4
1602// CHECK:   ret <1 x i64> [[VCALT_V2_I]]
1603uint64x1_t test_vcalt_f64(float64x1_t a, float64x1_t b) {
1604  return vcalt_f64(a, b);
1605}
1606
1607// CHECK-LABEL: define <4 x i32> @test_vcaltq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
1608// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1609// CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1610// CHECK:   [[VCALTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
1611// CHECK:   [[VCALTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
1612// CHECK:   [[VCALTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> [[VCALTQ_V_I]], <4 x float> [[VCALTQ_V1_I]]) #4
1613// CHECK:   ret <4 x i32> [[VCALTQ_V2_I]]
1614uint32x4_t test_vcaltq_f32(float32x4_t v1, float32x4_t v2) {
1615  return vcaltq_f32(v1, v2);
1616  // Using registers other than v0, v1 are possible, but would be odd.
1617}
1618
1619// CHECK-LABEL: define <2 x i64> @test_vcaltq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
1620// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1621// CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1622// CHECK:   [[VCALTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
1623// CHECK:   [[VCALTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
1624// CHECK:   [[VCALTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> [[VCALTQ_V_I]], <2 x double> [[VCALTQ_V1_I]]) #4
1625// CHECK:   ret <2 x i64> [[VCALTQ_V2_I]]
1626uint64x2_t test_vcaltq_f64(float64x2_t v1, float64x2_t v2) {
1627  return vcaltq_f64(v1, v2);
1628  // Using registers other than v0, v1 are possible, but would be odd.
1629}
1630
1631// CHECK-LABEL: define <8 x i8> @test_vtst_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
1632// CHECK:   [[TMP0:%.*]] = and <8 x i8> %v1, %v2
1633// CHECK:   [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
1634// CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
1635// CHECK:   ret <8 x i8> [[VTST_I]]
1636uint8x8_t test_vtst_s8(int8x8_t v1, int8x8_t v2) {
1637  return vtst_s8(v1, v2);
1638}
1639
1640// CHECK-LABEL: define <4 x i16> @test_vtst_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
1641// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1642// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1643// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
1644// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
1645// CHECK:   [[TMP4:%.*]] = and <4 x i16> [[TMP2]], [[TMP3]]
1646// CHECK:   [[TMP5:%.*]] = icmp ne <4 x i16> [[TMP4]], zeroinitializer
1647// CHECK:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i16>
1648// CHECK:   ret <4 x i16> [[VTST_I]]
1649uint16x4_t test_vtst_s16(int16x4_t v1, int16x4_t v2) {
1650  return vtst_s16(v1, v2);
1651}
1652
1653// CHECK-LABEL: define <2 x i32> @test_vtst_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
1654// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
1655// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
1656// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
1657// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
1658// CHECK:   [[TMP4:%.*]] = and <2 x i32> [[TMP2]], [[TMP3]]
1659// CHECK:   [[TMP5:%.*]] = icmp ne <2 x i32> [[TMP4]], zeroinitializer
1660// CHECK:   [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i32>
1661// CHECK:   ret <2 x i32> [[VTST_I]]
1662uint32x2_t test_vtst_s32(int32x2_t v1, int32x2_t v2) {
1663  return vtst_s32(v1, v2);
1664}
1665
1666// CHECK-LABEL: define <8 x i8> @test_vtst_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
1667// CHECK:   [[TMP0:%.*]] = and <8 x i8> %v1, %v2
1668// CHECK:   [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
1669// CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
1670// CHECK:   ret <8 x i8> [[VTST_I]]
1671uint8x8_t test_vtst_u8(uint8x8_t v1, uint8x8_t v2) {
1672  return vtst_u8(v1, v2);
1673}
1674
1675// CHECK-LABEL: define <4 x i16> @test_vtst_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
1676// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1677// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1678// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
1679// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
1680// CHECK:   [[TMP4:%.*]] = and <4 x i16> [[TMP2]], [[TMP3]]
1681// CHECK:   [[TMP5:%.*]] = icmp ne <4 x i16> [[TMP4]], zeroinitializer
1682// CHECK:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i16>
1683// CHECK:   ret <4 x i16> [[VTST_I]]
1684uint16x4_t test_vtst_u16(uint16x4_t v1, uint16x4_t v2) {
1685  return vtst_u16(v1, v2);
1686}
1687
1688// CHECK-LABEL: define <2 x i32> @test_vtst_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
1689// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
1690// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
1691// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
1692// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
1693// CHECK:   [[TMP4:%.*]] = and <2 x i32> [[TMP2]], [[TMP3]]
1694// CHECK:   [[TMP5:%.*]] = icmp ne <2 x i32> [[TMP4]], zeroinitializer
1695// CHECK:   [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i32>
1696// CHECK:   ret <2 x i32> [[VTST_I]]
1697uint32x2_t test_vtst_u32(uint32x2_t v1, uint32x2_t v2) {
1698  return vtst_u32(v1, v2);
1699}
1700
1701// CHECK-LABEL: define <16 x i8> @test_vtstq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
1702// CHECK:   [[TMP0:%.*]] = and <16 x i8> %v1, %v2
1703// CHECK:   [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
1704// CHECK:   [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
1705// CHECK:   ret <16 x i8> [[VTST_I]]
1706uint8x16_t test_vtstq_s8(int8x16_t v1, int8x16_t v2) {
1707  return vtstq_s8(v1, v2);
1708}
1709
1710// CHECK-LABEL: define <8 x i16> @test_vtstq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
1711// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1712// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1713// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
1714// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
1715// CHECK:   [[TMP4:%.*]] = and <8 x i16> [[TMP2]], [[TMP3]]
1716// CHECK:   [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP4]], zeroinitializer
1717// CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16>
1718// CHECK:   ret <8 x i16> [[VTST_I]]
1719uint16x8_t test_vtstq_s16(int16x8_t v1, int16x8_t v2) {
1720  return vtstq_s16(v1, v2);
1721}
1722
1723// CHECK-LABEL: define <4 x i32> @test_vtstq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
1724// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1725// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
1726// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
1727// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
1728// CHECK:   [[TMP4:%.*]] = and <4 x i32> [[TMP2]], [[TMP3]]
1729// CHECK:   [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer
1730// CHECK:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32>
1731// CHECK:   ret <4 x i32> [[VTST_I]]
1732uint32x4_t test_vtstq_s32(int32x4_t v1, int32x4_t v2) {
1733  return vtstq_s32(v1, v2);
1734}
1735
1736// CHECK-LABEL: define <16 x i8> @test_vtstq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
1737// CHECK:   [[TMP0:%.*]] = and <16 x i8> %v1, %v2
1738// CHECK:   [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
1739// CHECK:   [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
1740// CHECK:   ret <16 x i8> [[VTST_I]]
1741uint8x16_t test_vtstq_u8(uint8x16_t v1, uint8x16_t v2) {
1742  return vtstq_u8(v1, v2);
1743}
1744
1745// CHECK-LABEL: define <8 x i16> @test_vtstq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
1746// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1747// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1748// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
1749// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
1750// CHECK:   [[TMP4:%.*]] = and <8 x i16> [[TMP2]], [[TMP3]]
1751// CHECK:   [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP4]], zeroinitializer
1752// CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16>
1753// CHECK:   ret <8 x i16> [[VTST_I]]
1754uint16x8_t test_vtstq_u16(uint16x8_t v1, uint16x8_t v2) {
1755  return vtstq_u16(v1, v2);
1756}
1757
1758// CHECK-LABEL: define <4 x i32> @test_vtstq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
1759// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1760// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
1761// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
1762// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
1763// CHECK:   [[TMP4:%.*]] = and <4 x i32> [[TMP2]], [[TMP3]]
1764// CHECK:   [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer
1765// CHECK:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32>
1766// CHECK:   ret <4 x i32> [[VTST_I]]
1767uint32x4_t test_vtstq_u32(uint32x4_t v1, uint32x4_t v2) {
1768  return vtstq_u32(v1, v2);
1769}
1770
1771// CHECK-LABEL: define <2 x i64> @test_vtstq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 {
1772// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1773// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
1774// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
1775// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
1776// CHECK:   [[TMP4:%.*]] = and <2 x i64> [[TMP2]], [[TMP3]]
1777// CHECK:   [[TMP5:%.*]] = icmp ne <2 x i64> [[TMP4]], zeroinitializer
1778// CHECK:   [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i64>
1779// CHECK:   ret <2 x i64> [[VTST_I]]
1780uint64x2_t test_vtstq_s64(int64x2_t v1, int64x2_t v2) {
1781  return vtstq_s64(v1, v2);
1782}
1783
1784// CHECK-LABEL: define <2 x i64> @test_vtstq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 {
1785// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1786// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
1787// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
1788// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
1789// CHECK:   [[TMP4:%.*]] = and <2 x i64> [[TMP2]], [[TMP3]]
1790// CHECK:   [[TMP5:%.*]] = icmp ne <2 x i64> [[TMP4]], zeroinitializer
1791// CHECK:   [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i64>
1792// CHECK:   ret <2 x i64> [[VTST_I]]
1793uint64x2_t test_vtstq_u64(uint64x2_t v1, uint64x2_t v2) {
1794  return vtstq_u64(v1, v2);
1795}
1796
1797// CHECK-LABEL: define <8 x i8> @test_vtst_p8(<8 x i8> %v1, <8 x i8> %v2) #0 {
1798// CHECK:   [[TMP0:%.*]] = and <8 x i8> %v1, %v2
1799// CHECK:   [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
1800// CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
1801// CHECK:   ret <8 x i8> [[VTST_I]]
1802uint8x8_t test_vtst_p8(poly8x8_t v1, poly8x8_t v2) {
1803  return vtst_p8(v1, v2);
1804}
1805
1806// CHECK-LABEL: define <4 x i16> @test_vtst_p16(<4 x i16> %v1, <4 x i16> %v2) #0 {
1807// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1808// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1809// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
1810// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
1811// CHECK:   [[TMP4:%.*]] = and <4 x i16> [[TMP2]], [[TMP3]]
1812// CHECK:   [[TMP5:%.*]] = icmp ne <4 x i16> [[TMP4]], zeroinitializer
1813// CHECK:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i16>
1814// CHECK:   ret <4 x i16> [[VTST_I]]
1815uint16x4_t test_vtst_p16(poly16x4_t v1, poly16x4_t v2) {
1816  return vtst_p16(v1, v2);
1817}
1818
1819// CHECK-LABEL: define <16 x i8> @test_vtstq_p8(<16 x i8> %v1, <16 x i8> %v2) #0 {
1820// CHECK:   [[TMP0:%.*]] = and <16 x i8> %v1, %v2
1821// CHECK:   [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
1822// CHECK:   [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
1823// CHECK:   ret <16 x i8> [[VTST_I]]
1824uint8x16_t test_vtstq_p8(poly8x16_t v1, poly8x16_t v2) {
1825  return vtstq_p8(v1, v2);
1826}
1827
1828// CHECK-LABEL: define <8 x i16> @test_vtstq_p16(<8 x i16> %v1, <8 x i16> %v2) #0 {
1829// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1830// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1831// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
1832// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
1833// CHECK:   [[TMP4:%.*]] = and <8 x i16> [[TMP2]], [[TMP3]]
1834// CHECK:   [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP4]], zeroinitializer
1835// CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16>
1836// CHECK:   ret <8 x i16> [[VTST_I]]
1837uint16x8_t test_vtstq_p16(poly16x8_t v1, poly16x8_t v2) {
1838  return vtstq_p16(v1, v2);
1839}
1840
1841// CHECK-LABEL: define <1 x i64> @test_vtst_s64(<1 x i64> %a, <1 x i64> %b) #0 {
1842// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
1843// CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
1844// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
1845// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
1846// CHECK:   [[TMP4:%.*]] = and <1 x i64> [[TMP2]], [[TMP3]]
1847// CHECK:   [[TMP5:%.*]] = icmp ne <1 x i64> [[TMP4]], zeroinitializer
1848// CHECK:   [[VTST_I:%.*]] = sext <1 x i1> [[TMP5]] to <1 x i64>
1849// CHECK:   ret <1 x i64> [[VTST_I]]
1850uint64x1_t test_vtst_s64(int64x1_t a, int64x1_t b) {
1851  return vtst_s64(a, b);
1852}
1853
1854// CHECK-LABEL: define <1 x i64> @test_vtst_u64(<1 x i64> %a, <1 x i64> %b) #0 {
1855// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
1856// CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
1857// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
1858// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
1859// CHECK:   [[TMP4:%.*]] = and <1 x i64> [[TMP2]], [[TMP3]]
1860// CHECK:   [[TMP5:%.*]] = icmp ne <1 x i64> [[TMP4]], zeroinitializer
1861// CHECK:   [[VTST_I:%.*]] = sext <1 x i1> [[TMP5]] to <1 x i64>
1862// CHECK:   ret <1 x i64> [[VTST_I]]
1863uint64x1_t test_vtst_u64(uint64x1_t a, uint64x1_t b) {
1864  return vtst_u64(a, b);
1865}
1866
1867// CHECK-LABEL: define <8 x i8> @test_vceq_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
1868// CHECK:   [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2
1869// CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1870// CHECK:   ret <8 x i8> [[SEXT_I]]
1871uint8x8_t test_vceq_s8(int8x8_t v1, int8x8_t v2) {
1872  return vceq_s8(v1, v2);
1873}
1874
1875// CHECK-LABEL: define <4 x i16> @test_vceq_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
1876// CHECK:   [[CMP_I:%.*]] = icmp eq <4 x i16> %v1, %v2
1877// CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1878// CHECK:   ret <4 x i16> [[SEXT_I]]
1879uint16x4_t test_vceq_s16(int16x4_t v1, int16x4_t v2) {
1880  return vceq_s16(v1, v2);
1881}
1882
1883// CHECK-LABEL: define <2 x i32> @test_vceq_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
1884// CHECK:   [[CMP_I:%.*]] = icmp eq <2 x i32> %v1, %v2
1885// CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1886// CHECK:   ret <2 x i32> [[SEXT_I]]
1887uint32x2_t test_vceq_s32(int32x2_t v1, int32x2_t v2) {
1888  return vceq_s32(v1, v2);
1889}
1890
1891// CHECK-LABEL: define <1 x i64> @test_vceq_s64(<1 x i64> %a, <1 x i64> %b) #0 {
1892// CHECK:   [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b
1893// CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1894// CHECK:   ret <1 x i64> [[SEXT_I]]
1895uint64x1_t test_vceq_s64(int64x1_t a, int64x1_t b) {
1896  return vceq_s64(a, b);
1897}
1898
1899// CHECK-LABEL: define <1 x i64> @test_vceq_u64(<1 x i64> %a, <1 x i64> %b) #0 {
1900// CHECK:   [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b
1901// CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1902// CHECK:   ret <1 x i64> [[SEXT_I]]
1903uint64x1_t test_vceq_u64(uint64x1_t a, uint64x1_t b) {
1904  return vceq_u64(a, b);
1905}
1906
1907// CHECK-LABEL: define <2 x i32> @test_vceq_f32(<2 x float> %v1, <2 x float> %v2) #0 {
1908// CHECK:   [[CMP_I:%.*]] = fcmp oeq <2 x float> %v1, %v2
1909// CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1910// CHECK:   ret <2 x i32> [[SEXT_I]]
1911uint32x2_t test_vceq_f32(float32x2_t v1, float32x2_t v2) {
1912  return vceq_f32(v1, v2);
1913}
1914
1915// CHECK-LABEL: define <1 x i64> @test_vceq_f64(<1 x double> %a, <1 x double> %b) #0 {
1916// CHECK:   [[CMP_I:%.*]] = fcmp oeq <1 x double> %a, %b
1917// CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1918// CHECK:   ret <1 x i64> [[SEXT_I]]
1919uint64x1_t test_vceq_f64(float64x1_t a, float64x1_t b) {
1920  return vceq_f64(a, b);
1921}
1922
1923// CHECK-LABEL: define <8 x i8> @test_vceq_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
1924// CHECK:   [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2
1925// CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1926// CHECK:   ret <8 x i8> [[SEXT_I]]
1927uint8x8_t test_vceq_u8(uint8x8_t v1, uint8x8_t v2) {
1928  return vceq_u8(v1, v2);
1929}
1930
1931// CHECK-LABEL: define <4 x i16> @test_vceq_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
1932// CHECK:   [[CMP_I:%.*]] = icmp eq <4 x i16> %v1, %v2
1933// CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1934// CHECK:   ret <4 x i16> [[SEXT_I]]
1935uint16x4_t test_vceq_u16(uint16x4_t v1, uint16x4_t v2) {
1936  return vceq_u16(v1, v2);
1937}
1938
1939// CHECK-LABEL: define <2 x i32> @test_vceq_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
1940// CHECK:   [[CMP_I:%.*]] = icmp eq <2 x i32> %v1, %v2
1941// CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1942// CHECK:   ret <2 x i32> [[SEXT_I]]
1943uint32x2_t test_vceq_u32(uint32x2_t v1, uint32x2_t v2) {
1944  return vceq_u32(v1, v2);
1945}
1946
1947// CHECK-LABEL: define <8 x i8> @test_vceq_p8(<8 x i8> %v1, <8 x i8> %v2) #0 {
1948// CHECK:   [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2
1949// CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1950// CHECK:   ret <8 x i8> [[SEXT_I]]
1951uint8x8_t test_vceq_p8(poly8x8_t v1, poly8x8_t v2) {
1952  return vceq_p8(v1, v2);
1953}
1954
1955// CHECK-LABEL: define <16 x i8> @test_vceqq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
1956// CHECK:   [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2
1957// CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1958// CHECK:   ret <16 x i8> [[SEXT_I]]
1959uint8x16_t test_vceqq_s8(int8x16_t v1, int8x16_t v2) {
1960  return vceqq_s8(v1, v2);
1961}
1962
1963// CHECK-LABEL: define <8 x i16> @test_vceqq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
1964// CHECK:   [[CMP_I:%.*]] = icmp eq <8 x i16> %v1, %v2
1965// CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1966// CHECK:   ret <8 x i16> [[SEXT_I]]
1967uint16x8_t test_vceqq_s16(int16x8_t v1, int16x8_t v2) {
1968  return vceqq_s16(v1, v2);
1969}
1970
1971// CHECK-LABEL: define <4 x i32> @test_vceqq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
1972// CHECK:   [[CMP_I:%.*]] = icmp eq <4 x i32> %v1, %v2
1973// CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1974// CHECK:   ret <4 x i32> [[SEXT_I]]
1975uint32x4_t test_vceqq_s32(int32x4_t v1, int32x4_t v2) {
1976  return vceqq_s32(v1, v2);
1977}
1978
1979// CHECK-LABEL: define <4 x i32> @test_vceqq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
1980// CHECK:   [[CMP_I:%.*]] = fcmp oeq <4 x float> %v1, %v2
1981// CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1982// CHECK:   ret <4 x i32> [[SEXT_I]]
1983uint32x4_t test_vceqq_f32(float32x4_t v1, float32x4_t v2) {
1984  return vceqq_f32(v1, v2);
1985}
1986
1987// CHECK-LABEL: define <16 x i8> @test_vceqq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
1988// CHECK:   [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2
1989// CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1990// CHECK:   ret <16 x i8> [[SEXT_I]]
1991uint8x16_t test_vceqq_u8(uint8x16_t v1, uint8x16_t v2) {
1992  return vceqq_u8(v1, v2);
1993}
1994
1995// CHECK-LABEL: define <8 x i16> @test_vceqq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
1996// CHECK:   [[CMP_I:%.*]] = icmp eq <8 x i16> %v1, %v2
1997// CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1998// CHECK:   ret <8 x i16> [[SEXT_I]]
1999uint16x8_t test_vceqq_u16(uint16x8_t v1, uint16x8_t v2) {
2000  return vceqq_u16(v1, v2);
2001}
2002
2003// CHECK-LABEL: define <4 x i32> @test_vceqq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
2004// CHECK:   [[CMP_I:%.*]] = icmp eq <4 x i32> %v1, %v2
2005// CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2006// CHECK:   ret <4 x i32> [[SEXT_I]]
2007uint32x4_t test_vceqq_u32(uint32x4_t v1, uint32x4_t v2) {
2008  return vceqq_u32(v1, v2);
2009}
2010
2011// CHECK-LABEL: define <16 x i8> @test_vceqq_p8(<16 x i8> %v1, <16 x i8> %v2) #0 {
2012// CHECK:   [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2
2013// CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2014// CHECK:   ret <16 x i8> [[SEXT_I]]
2015uint8x16_t test_vceqq_p8(poly8x16_t v1, poly8x16_t v2) {
2016  return vceqq_p8(v1, v2);
2017}
2018
2019
2020// CHECK-LABEL: define <2 x i64> @test_vceqq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 {
2021// CHECK:   [[CMP_I:%.*]] = icmp eq <2 x i64> %v1, %v2
2022// CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2023// CHECK:   ret <2 x i64> [[SEXT_I]]
2024uint64x2_t test_vceqq_s64(int64x2_t v1, int64x2_t v2) {
2025  return vceqq_s64(v1, v2);
2026}
2027
2028// CHECK-LABEL: define <2 x i64> @test_vceqq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 {
2029// CHECK:   [[CMP_I:%.*]] = icmp eq <2 x i64> %v1, %v2
2030// CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2031// CHECK:   ret <2 x i64> [[SEXT_I]]
2032uint64x2_t test_vceqq_u64(uint64x2_t v1, uint64x2_t v2) {
2033  return vceqq_u64(v1, v2);
2034}
2035
2036// CHECK-LABEL: define <2 x i64> @test_vceqq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
2037// CHECK:   [[CMP_I:%.*]] = fcmp oeq <2 x double> %v1, %v2
2038// CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2039// CHECK:   ret <2 x i64> [[SEXT_I]]
2040uint64x2_t test_vceqq_f64(float64x2_t v1, float64x2_t v2) {
2041  return vceqq_f64(v1, v2);
2042}
2043// CHECK-LABEL: define <8 x i8> @test_vcge_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
2044// CHECK:   [[CMP_I:%.*]] = icmp sge <8 x i8> %v1, %v2
2045// CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2046// CHECK:   ret <8 x i8> [[SEXT_I]]
2047uint8x8_t test_vcge_s8(int8x8_t v1, int8x8_t v2) {
2048  return vcge_s8(v1, v2);
2049}
2050
2051// CHECK-LABEL: define <4 x i16> @test_vcge_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
2052// CHECK:   [[CMP_I:%.*]] = icmp sge <4 x i16> %v1, %v2
2053// CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2054// CHECK:   ret <4 x i16> [[SEXT_I]]
2055uint16x4_t test_vcge_s16(int16x4_t v1, int16x4_t v2) {
2056  return vcge_s16(v1, v2);
2057}
2058
2059// CHECK-LABEL: define <2 x i32> @test_vcge_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
2060// CHECK:   [[CMP_I:%.*]] = icmp sge <2 x i32> %v1, %v2
2061// CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2062// CHECK:   ret <2 x i32> [[SEXT_I]]
2063uint32x2_t test_vcge_s32(int32x2_t v1, int32x2_t v2) {
2064  return vcge_s32(v1, v2);
2065}
2066
2067// CHECK-LABEL: define <1 x i64> @test_vcge_s64(<1 x i64> %a, <1 x i64> %b) #0 {
2068// CHECK:   [[CMP_I:%.*]] = icmp sge <1 x i64> %a, %b
2069// CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2070// CHECK:   ret <1 x i64> [[SEXT_I]]
2071uint64x1_t test_vcge_s64(int64x1_t a, int64x1_t b) {
2072  return vcge_s64(a, b);
2073}
2074
2075// CHECK-LABEL: define <1 x i64> @test_vcge_u64(<1 x i64> %a, <1 x i64> %b) #0 {
2076// CHECK:   [[CMP_I:%.*]] = icmp uge <1 x i64> %a, %b
2077// CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2078// CHECK:   ret <1 x i64> [[SEXT_I]]
2079uint64x1_t test_vcge_u64(uint64x1_t a, uint64x1_t b) {
2080  return vcge_u64(a, b);
2081}
2082
2083// CHECK-LABEL: define <2 x i32> @test_vcge_f32(<2 x float> %v1, <2 x float> %v2) #0 {
2084// CHECK:   [[CMP_I:%.*]] = fcmp oge <2 x float> %v1, %v2
2085// CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2086// CHECK:   ret <2 x i32> [[SEXT_I]]
2087uint32x2_t test_vcge_f32(float32x2_t v1, float32x2_t v2) {
2088  return vcge_f32(v1, v2);
2089}
2090
2091// CHECK-LABEL: define <1 x i64> @test_vcge_f64(<1 x double> %a, <1 x double> %b) #0 {
2092// CHECK:   [[CMP_I:%.*]] = fcmp oge <1 x double> %a, %b
2093// CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2094// CHECK:   ret <1 x i64> [[SEXT_I]]
2095uint64x1_t test_vcge_f64(float64x1_t a, float64x1_t b) {
2096  return vcge_f64(a, b);
2097}
2098
2099// CHECK-LABEL: define <8 x i8> @test_vcge_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
2100// CHECK:   [[CMP_I:%.*]] = icmp uge <8 x i8> %v1, %v2
2101// CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2102// CHECK:   ret <8 x i8> [[SEXT_I]]
2103uint8x8_t test_vcge_u8(uint8x8_t v1, uint8x8_t v2) {
2104  return vcge_u8(v1, v2);
2105}
2106
2107// CHECK-LABEL: define <4 x i16> @test_vcge_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
2108// CHECK:   [[CMP_I:%.*]] = icmp uge <4 x i16> %v1, %v2
2109// CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2110// CHECK:   ret <4 x i16> [[SEXT_I]]
2111uint16x4_t test_vcge_u16(uint16x4_t v1, uint16x4_t v2) {
2112  return vcge_u16(v1, v2);
2113}
2114
2115// CHECK-LABEL: define <2 x i32> @test_vcge_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
2116// CHECK:   [[CMP_I:%.*]] = icmp uge <2 x i32> %v1, %v2
2117// CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2118// CHECK:   ret <2 x i32> [[SEXT_I]]
2119uint32x2_t test_vcge_u32(uint32x2_t v1, uint32x2_t v2) {
2120  return vcge_u32(v1, v2);
2121}
2122
2123// CHECK-LABEL: define <16 x i8> @test_vcgeq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
2124// CHECK:   [[CMP_I:%.*]] = icmp sge <16 x i8> %v1, %v2
2125// CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2126// CHECK:   ret <16 x i8> [[SEXT_I]]
2127uint8x16_t test_vcgeq_s8(int8x16_t v1, int8x16_t v2) {
2128  return vcgeq_s8(v1, v2);
2129}
2130
2131// CHECK-LABEL: define <8 x i16> @test_vcgeq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
2132// CHECK:   [[CMP_I:%.*]] = icmp sge <8 x i16> %v1, %v2
2133// CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2134// CHECK:   ret <8 x i16> [[SEXT_I]]
2135uint16x8_t test_vcgeq_s16(int16x8_t v1, int16x8_t v2) {
2136  return vcgeq_s16(v1, v2);
2137}
2138
2139// CHECK-LABEL: define <4 x i32> @test_vcgeq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
2140// CHECK:   [[CMP_I:%.*]] = icmp sge <4 x i32> %v1, %v2
2141// CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2142// CHECK:   ret <4 x i32> [[SEXT_I]]
2143uint32x4_t test_vcgeq_s32(int32x4_t v1, int32x4_t v2) {
2144  return vcgeq_s32(v1, v2);
2145}
2146
2147// CHECK-LABEL: define <4 x i32> @test_vcgeq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
2148// CHECK:   [[CMP_I:%.*]] = fcmp oge <4 x float> %v1, %v2
2149// CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2150// CHECK:   ret <4 x i32> [[SEXT_I]]
2151uint32x4_t test_vcgeq_f32(float32x4_t v1, float32x4_t v2) {
2152  return vcgeq_f32(v1, v2);
2153}
2154
2155// CHECK-LABEL: define <16 x i8> @test_vcgeq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
2156// CHECK:   [[CMP_I:%.*]] = icmp uge <16 x i8> %v1, %v2
2157// CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2158// CHECK:   ret <16 x i8> [[SEXT_I]]
2159uint8x16_t test_vcgeq_u8(uint8x16_t v1, uint8x16_t v2) {
2160  return vcgeq_u8(v1, v2);
2161}
2162
2163// CHECK-LABEL: define <8 x i16> @test_vcgeq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
2164// CHECK:   [[CMP_I:%.*]] = icmp uge <8 x i16> %v1, %v2
2165// CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2166// CHECK:   ret <8 x i16> [[SEXT_I]]
2167uint16x8_t test_vcgeq_u16(uint16x8_t v1, uint16x8_t v2) {
2168  return vcgeq_u16(v1, v2);
2169}
2170
2171// CHECK-LABEL: define <4 x i32> @test_vcgeq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
2172// CHECK:   [[CMP_I:%.*]] = icmp uge <4 x i32> %v1, %v2
2173// CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2174// CHECK:   ret <4 x i32> [[SEXT_I]]
2175uint32x4_t test_vcgeq_u32(uint32x4_t v1, uint32x4_t v2) {
2176  return vcgeq_u32(v1, v2);
2177}
2178
2179// CHECK-LABEL: define <2 x i64> @test_vcgeq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 {
2180// CHECK:   [[CMP_I:%.*]] = icmp sge <2 x i64> %v1, %v2
2181// CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2182// CHECK:   ret <2 x i64> [[SEXT_I]]
2183uint64x2_t test_vcgeq_s64(int64x2_t v1, int64x2_t v2) {
2184  return vcgeq_s64(v1, v2);
2185}
2186
2187// CHECK-LABEL: define <2 x i64> @test_vcgeq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 {
2188// CHECK:   [[CMP_I:%.*]] = icmp uge <2 x i64> %v1, %v2
2189// CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2190// CHECK:   ret <2 x i64> [[SEXT_I]]
2191uint64x2_t test_vcgeq_u64(uint64x2_t v1, uint64x2_t v2) {
2192  return vcgeq_u64(v1, v2);
2193}
2194
2195// CHECK-LABEL: define <2 x i64> @test_vcgeq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
2196// CHECK:   [[CMP_I:%.*]] = fcmp oge <2 x double> %v1, %v2
2197// CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2198// CHECK:   ret <2 x i64> [[SEXT_I]]
2199uint64x2_t test_vcgeq_f64(float64x2_t v1, float64x2_t v2) {
2200  return vcgeq_f64(v1, v2);
2201}
2202
2203// Notes about vcle:
2204// LE condition predicate implemented as GE, so check reversed operands.
2205// Using registers other than v0, v1 are possible, but would be odd.
2206// CHECK-LABEL: define <8 x i8> @test_vcle_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
2207// CHECK:   [[CMP_I:%.*]] = icmp sle <8 x i8> %v1, %v2
2208// CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2209// CHECK:   ret <8 x i8> [[SEXT_I]]
2210uint8x8_t test_vcle_s8(int8x8_t v1, int8x8_t v2) {
2211  return vcle_s8(v1, v2);
2212}
2213
2214// CHECK-LABEL: define <4 x i16> @test_vcle_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
2215// CHECK:   [[CMP_I:%.*]] = icmp sle <4 x i16> %v1, %v2
2216// CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2217// CHECK:   ret <4 x i16> [[SEXT_I]]
2218uint16x4_t test_vcle_s16(int16x4_t v1, int16x4_t v2) {
2219  return vcle_s16(v1, v2);
2220}
2221
2222// CHECK-LABEL: define <2 x i32> @test_vcle_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
2223// CHECK:   [[CMP_I:%.*]] = icmp sle <2 x i32> %v1, %v2
2224// CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2225// CHECK:   ret <2 x i32> [[SEXT_I]]
2226uint32x2_t test_vcle_s32(int32x2_t v1, int32x2_t v2) {
2227  return vcle_s32(v1, v2);
2228}
2229
2230// CHECK-LABEL: define <1 x i64> @test_vcle_s64(<1 x i64> %a, <1 x i64> %b) #0 {
2231// CHECK:   [[CMP_I:%.*]] = icmp sle <1 x i64> %a, %b
2232// CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2233// CHECK:   ret <1 x i64> [[SEXT_I]]
2234uint64x1_t test_vcle_s64(int64x1_t a, int64x1_t b) {
2235  return vcle_s64(a, b);
2236}
2237
2238// CHECK-LABEL: define <1 x i64> @test_vcle_u64(<1 x i64> %a, <1 x i64> %b) #0 {
2239// CHECK:   [[CMP_I:%.*]] = icmp ule <1 x i64> %a, %b
2240// CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2241// CHECK:   ret <1 x i64> [[SEXT_I]]
2242uint64x1_t test_vcle_u64(uint64x1_t a, uint64x1_t b) {
2243  return vcle_u64(a, b);
2244}
2245
2246// CHECK-LABEL: define <2 x i32> @test_vcle_f32(<2 x float> %v1, <2 x float> %v2) #0 {
2247// CHECK:   [[CMP_I:%.*]] = fcmp ole <2 x float> %v1, %v2
2248// CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2249// CHECK:   ret <2 x i32> [[SEXT_I]]
2250uint32x2_t test_vcle_f32(float32x2_t v1, float32x2_t v2) {
2251  return vcle_f32(v1, v2);
2252}
2253
2254// CHECK-LABEL: define <1 x i64> @test_vcle_f64(<1 x double> %a, <1 x double> %b) #0 {
2255// CHECK:   [[CMP_I:%.*]] = fcmp ole <1 x double> %a, %b
2256// CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2257// CHECK:   ret <1 x i64> [[SEXT_I]]
2258uint64x1_t test_vcle_f64(float64x1_t a, float64x1_t b) {
2259  return vcle_f64(a, b);
2260}
2261
2262// CHECK-LABEL: define <8 x i8> @test_vcle_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
2263// CHECK:   [[CMP_I:%.*]] = icmp ule <8 x i8> %v1, %v2
2264// CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2265// CHECK:   ret <8 x i8> [[SEXT_I]]
2266uint8x8_t test_vcle_u8(uint8x8_t v1, uint8x8_t v2) {
2267  return vcle_u8(v1, v2);
2268}
2269
2270// CHECK-LABEL: define <4 x i16> @test_vcle_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
2271// CHECK:   [[CMP_I:%.*]] = icmp ule <4 x i16> %v1, %v2
2272// CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2273// CHECK:   ret <4 x i16> [[SEXT_I]]
2274uint16x4_t test_vcle_u16(uint16x4_t v1, uint16x4_t v2) {
2275  return vcle_u16(v1, v2);
2276}
2277
2278// CHECK-LABEL: define <2 x i32> @test_vcle_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
2279// CHECK:   [[CMP_I:%.*]] = icmp ule <2 x i32> %v1, %v2
2280// CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2281// CHECK:   ret <2 x i32> [[SEXT_I]]
2282uint32x2_t test_vcle_u32(uint32x2_t v1, uint32x2_t v2) {
2283  return vcle_u32(v1, v2);
2284}
2285
2286// CHECK-LABEL: define <16 x i8> @test_vcleq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
2287// CHECK:   [[CMP_I:%.*]] = icmp sle <16 x i8> %v1, %v2
2288// CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2289// CHECK:   ret <16 x i8> [[SEXT_I]]
2290uint8x16_t test_vcleq_s8(int8x16_t v1, int8x16_t v2) {
2291  return vcleq_s8(v1, v2);
2292}
2293
2294// CHECK-LABEL: define <8 x i16> @test_vcleq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
2295// CHECK:   [[CMP_I:%.*]] = icmp sle <8 x i16> %v1, %v2
2296// CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2297// CHECK:   ret <8 x i16> [[SEXT_I]]
2298uint16x8_t test_vcleq_s16(int16x8_t v1, int16x8_t v2) {
2299  return vcleq_s16(v1, v2);
2300}
2301
2302// CHECK-LABEL: define <4 x i32> @test_vcleq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
2303// CHECK:   [[CMP_I:%.*]] = icmp sle <4 x i32> %v1, %v2
2304// CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2305// CHECK:   ret <4 x i32> [[SEXT_I]]
2306uint32x4_t test_vcleq_s32(int32x4_t v1, int32x4_t v2) {
2307  return vcleq_s32(v1, v2);
2308}
2309
2310// CHECK-LABEL: define <4 x i32> @test_vcleq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
2311// CHECK:   [[CMP_I:%.*]] = fcmp ole <4 x float> %v1, %v2
2312// CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2313// CHECK:   ret <4 x i32> [[SEXT_I]]
2314uint32x4_t test_vcleq_f32(float32x4_t v1, float32x4_t v2) {
2315  return vcleq_f32(v1, v2);
2316}
2317
2318// CHECK-LABEL: define <16 x i8> @test_vcleq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
2319// CHECK:   [[CMP_I:%.*]] = icmp ule <16 x i8> %v1, %v2
2320// CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2321// CHECK:   ret <16 x i8> [[SEXT_I]]
2322uint8x16_t test_vcleq_u8(uint8x16_t v1, uint8x16_t v2) {
2323  return vcleq_u8(v1, v2);
2324}
2325
2326// CHECK-LABEL: define <8 x i16> @test_vcleq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
2327// CHECK:   [[CMP_I:%.*]] = icmp ule <8 x i16> %v1, %v2
2328// CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2329// CHECK:   ret <8 x i16> [[SEXT_I]]
2330uint16x8_t test_vcleq_u16(uint16x8_t v1, uint16x8_t v2) {
2331  return vcleq_u16(v1, v2);
2332}
2333
2334// CHECK-LABEL: define <4 x i32> @test_vcleq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
2335// CHECK:   [[CMP_I:%.*]] = icmp ule <4 x i32> %v1, %v2
2336// CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2337// CHECK:   ret <4 x i32> [[SEXT_I]]
2338uint32x4_t test_vcleq_u32(uint32x4_t v1, uint32x4_t v2) {
2339  return vcleq_u32(v1, v2);
2340}
2341
2342// CHECK-LABEL: define <2 x i64> @test_vcleq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 {
2343// CHECK:   [[CMP_I:%.*]] = icmp sle <2 x i64> %v1, %v2
2344// CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2345// CHECK:   ret <2 x i64> [[SEXT_I]]
2346uint64x2_t test_vcleq_s64(int64x2_t v1, int64x2_t v2) {
2347  return vcleq_s64(v1, v2);
2348}
2349
2350// CHECK-LABEL: define <2 x i64> @test_vcleq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 {
2351// CHECK:   [[CMP_I:%.*]] = icmp ule <2 x i64> %v1, %v2
2352// CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2353// CHECK:   ret <2 x i64> [[SEXT_I]]
2354uint64x2_t test_vcleq_u64(uint64x2_t v1, uint64x2_t v2) {
2355  return vcleq_u64(v1, v2);
2356}
2357
2358// CHECK-LABEL: define <2 x i64> @test_vcleq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
2359// CHECK:   [[CMP_I:%.*]] = fcmp ole <2 x double> %v1, %v2
2360// CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2361// CHECK:   ret <2 x i64> [[SEXT_I]]
2362uint64x2_t test_vcleq_f64(float64x2_t v1, float64x2_t v2) {
2363  return vcleq_f64(v1, v2);
2364}
2365
2366
2367// CHECK-LABEL: define <8 x i8> @test_vcgt_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
2368// CHECK:   [[CMP_I:%.*]] = icmp sgt <8 x i8> %v1, %v2
2369// CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2370// CHECK:   ret <8 x i8> [[SEXT_I]]
2371uint8x8_t test_vcgt_s8(int8x8_t v1, int8x8_t v2) {
2372  return vcgt_s8(v1, v2);
2373}
2374
2375// CHECK-LABEL: define <4 x i16> @test_vcgt_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
2376// CHECK:   [[CMP_I:%.*]] = icmp sgt <4 x i16> %v1, %v2
2377// CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2378// CHECK:   ret <4 x i16> [[SEXT_I]]
2379uint16x4_t test_vcgt_s16(int16x4_t v1, int16x4_t v2) {
2380  return vcgt_s16(v1, v2);
2381}
2382
2383// CHECK-LABEL: define <2 x i32> @test_vcgt_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
2384// CHECK:   [[CMP_I:%.*]] = icmp sgt <2 x i32> %v1, %v2
2385// CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2386// CHECK:   ret <2 x i32> [[SEXT_I]]
2387uint32x2_t test_vcgt_s32(int32x2_t v1, int32x2_t v2) {
2388  return vcgt_s32(v1, v2);
2389}
2390
2391// CHECK-LABEL: define <1 x i64> @test_vcgt_s64(<1 x i64> %a, <1 x i64> %b) #0 {
2392// CHECK:   [[CMP_I:%.*]] = icmp sgt <1 x i64> %a, %b
2393// CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2394// CHECK:   ret <1 x i64> [[SEXT_I]]
2395uint64x1_t test_vcgt_s64(int64x1_t a, int64x1_t b) {
2396  return vcgt_s64(a, b);
2397}
2398
2399// CHECK-LABEL: define <1 x i64> @test_vcgt_u64(<1 x i64> %a, <1 x i64> %b) #0 {
2400// CHECK:   [[CMP_I:%.*]] = icmp ugt <1 x i64> %a, %b
2401// CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2402// CHECK:   ret <1 x i64> [[SEXT_I]]
2403uint64x1_t test_vcgt_u64(uint64x1_t a, uint64x1_t b) {
2404  return vcgt_u64(a, b);
2405}
2406
2407// CHECK-LABEL: define <2 x i32> @test_vcgt_f32(<2 x float> %v1, <2 x float> %v2) #0 {
2408// CHECK:   [[CMP_I:%.*]] = fcmp ogt <2 x float> %v1, %v2
2409// CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2410// CHECK:   ret <2 x i32> [[SEXT_I]]
2411uint32x2_t test_vcgt_f32(float32x2_t v1, float32x2_t v2) {
2412  return vcgt_f32(v1, v2);
2413}
2414
2415// CHECK-LABEL: define <1 x i64> @test_vcgt_f64(<1 x double> %a, <1 x double> %b) #0 {
2416// CHECK:   [[CMP_I:%.*]] = fcmp ogt <1 x double> %a, %b
2417// CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2418// CHECK:   ret <1 x i64> [[SEXT_I]]
2419uint64x1_t test_vcgt_f64(float64x1_t a, float64x1_t b) {
2420  return vcgt_f64(a, b);
2421}
2422
2423// CHECK-LABEL: define <8 x i8> @test_vcgt_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
2424// CHECK:   [[CMP_I:%.*]] = icmp ugt <8 x i8> %v1, %v2
2425// CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2426// CHECK:   ret <8 x i8> [[SEXT_I]]
2427uint8x8_t test_vcgt_u8(uint8x8_t v1, uint8x8_t v2) {
2428  return vcgt_u8(v1, v2);
2429}
2430
2431// CHECK-LABEL: define <4 x i16> @test_vcgt_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
2432// CHECK:   [[CMP_I:%.*]] = icmp ugt <4 x i16> %v1, %v2
2433// CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2434// CHECK:   ret <4 x i16> [[SEXT_I]]
2435uint16x4_t test_vcgt_u16(uint16x4_t v1, uint16x4_t v2) {
2436  return vcgt_u16(v1, v2);
2437}
2438
2439// CHECK-LABEL: define <2 x i32> @test_vcgt_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
2440// CHECK:   [[CMP_I:%.*]] = icmp ugt <2 x i32> %v1, %v2
2441// CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2442// CHECK:   ret <2 x i32> [[SEXT_I]]
2443uint32x2_t test_vcgt_u32(uint32x2_t v1, uint32x2_t v2) {
2444  return vcgt_u32(v1, v2);
2445}
2446
2447// CHECK-LABEL: define <16 x i8> @test_vcgtq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
2448// CHECK:   [[CMP_I:%.*]] = icmp sgt <16 x i8> %v1, %v2
2449// CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2450// CHECK:   ret <16 x i8> [[SEXT_I]]
2451uint8x16_t test_vcgtq_s8(int8x16_t v1, int8x16_t v2) {
2452  return vcgtq_s8(v1, v2);
2453}
2454
2455// CHECK-LABEL: define <8 x i16> @test_vcgtq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
2456// CHECK:   [[CMP_I:%.*]] = icmp sgt <8 x i16> %v1, %v2
2457// CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2458// CHECK:   ret <8 x i16> [[SEXT_I]]
2459uint16x8_t test_vcgtq_s16(int16x8_t v1, int16x8_t v2) {
2460  return vcgtq_s16(v1, v2);
2461}
2462
2463// CHECK-LABEL: define <4 x i32> @test_vcgtq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
2464// CHECK:   [[CMP_I:%.*]] = icmp sgt <4 x i32> %v1, %v2
2465// CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2466// CHECK:   ret <4 x i32> [[SEXT_I]]
2467uint32x4_t test_vcgtq_s32(int32x4_t v1, int32x4_t v2) {
2468  return vcgtq_s32(v1, v2);
2469}
2470
2471// CHECK-LABEL: define <4 x i32> @test_vcgtq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
2472// CHECK:   [[CMP_I:%.*]] = fcmp ogt <4 x float> %v1, %v2
2473// CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2474// CHECK:   ret <4 x i32> [[SEXT_I]]
2475uint32x4_t test_vcgtq_f32(float32x4_t v1, float32x4_t v2) {
2476  return vcgtq_f32(v1, v2);
2477}
2478
2479// CHECK-LABEL: define <16 x i8> @test_vcgtq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
2480// CHECK:   [[CMP_I:%.*]] = icmp ugt <16 x i8> %v1, %v2
2481// CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2482// CHECK:   ret <16 x i8> [[SEXT_I]]
2483uint8x16_t test_vcgtq_u8(uint8x16_t v1, uint8x16_t v2) {
2484  return vcgtq_u8(v1, v2);
2485}
2486
2487// CHECK-LABEL: define <8 x i16> @test_vcgtq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
2488// CHECK:   [[CMP_I:%.*]] = icmp ugt <8 x i16> %v1, %v2
2489// CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2490// CHECK:   ret <8 x i16> [[SEXT_I]]
2491uint16x8_t test_vcgtq_u16(uint16x8_t v1, uint16x8_t v2) {
2492  return vcgtq_u16(v1, v2);
2493}
2494
2495// CHECK-LABEL: define <4 x i32> @test_vcgtq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
2496// CHECK:   [[CMP_I:%.*]] = icmp ugt <4 x i32> %v1, %v2
2497// CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2498// CHECK:   ret <4 x i32> [[SEXT_I]]
2499uint32x4_t test_vcgtq_u32(uint32x4_t v1, uint32x4_t v2) {
2500  return vcgtq_u32(v1, v2);
2501}
2502
2503// CHECK-LABEL: define <2 x i64> @test_vcgtq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 {
2504// CHECK:   [[CMP_I:%.*]] = icmp sgt <2 x i64> %v1, %v2
2505// CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2506// CHECK:   ret <2 x i64> [[SEXT_I]]
2507uint64x2_t test_vcgtq_s64(int64x2_t v1, int64x2_t v2) {
2508  return vcgtq_s64(v1, v2);
2509}
2510
2511// CHECK-LABEL: define <2 x i64> @test_vcgtq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 {
2512// CHECK:   [[CMP_I:%.*]] = icmp ugt <2 x i64> %v1, %v2
2513// CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2514// CHECK:   ret <2 x i64> [[SEXT_I]]
2515uint64x2_t test_vcgtq_u64(uint64x2_t v1, uint64x2_t v2) {
2516  return vcgtq_u64(v1, v2);
2517}
2518
2519// CHECK-LABEL: define <2 x i64> @test_vcgtq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
2520// CHECK:   [[CMP_I:%.*]] = fcmp ogt <2 x double> %v1, %v2
2521// CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2522// CHECK:   ret <2 x i64> [[SEXT_I]]
2523uint64x2_t test_vcgtq_f64(float64x2_t v1, float64x2_t v2) {
2524  return vcgtq_f64(v1, v2);
2525}
2526
2527
2528// Notes about vclt:
2529// LT condition predicate implemented as GT, so check reversed operands.
2530// Using registers other than v0, v1 are possible, but would be odd.
2531
2532// CHECK-LABEL: define <8 x i8> @test_vclt_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
2533// CHECK:   [[CMP_I:%.*]] = icmp slt <8 x i8> %v1, %v2
2534// CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2535// CHECK:   ret <8 x i8> [[SEXT_I]]
2536uint8x8_t test_vclt_s8(int8x8_t v1, int8x8_t v2) {
2537  return vclt_s8(v1, v2);
2538}
2539
2540// CHECK-LABEL: define <4 x i16> @test_vclt_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
2541// CHECK:   [[CMP_I:%.*]] = icmp slt <4 x i16> %v1, %v2
2542// CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2543// CHECK:   ret <4 x i16> [[SEXT_I]]
2544uint16x4_t test_vclt_s16(int16x4_t v1, int16x4_t v2) {
2545  return vclt_s16(v1, v2);
2546}
2547
2548// CHECK-LABEL: define <2 x i32> @test_vclt_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
2549// CHECK:   [[CMP_I:%.*]] = icmp slt <2 x i32> %v1, %v2
2550// CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2551// CHECK:   ret <2 x i32> [[SEXT_I]]
2552uint32x2_t test_vclt_s32(int32x2_t v1, int32x2_t v2) {
2553  return vclt_s32(v1, v2);
2554}
2555
2556// CHECK-LABEL: define <1 x i64> @test_vclt_s64(<1 x i64> %a, <1 x i64> %b) #0 {
2557// CHECK:   [[CMP_I:%.*]] = icmp slt <1 x i64> %a, %b
2558// CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2559// CHECK:   ret <1 x i64> [[SEXT_I]]
2560uint64x1_t test_vclt_s64(int64x1_t a, int64x1_t b) {
2561  return vclt_s64(a, b);
2562}
2563
2564// CHECK-LABEL: define <1 x i64> @test_vclt_u64(<1 x i64> %a, <1 x i64> %b) #0 {
2565// CHECK:   [[CMP_I:%.*]] = icmp ult <1 x i64> %a, %b
2566// CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2567// CHECK:   ret <1 x i64> [[SEXT_I]]
2568uint64x1_t test_vclt_u64(uint64x1_t a, uint64x1_t b) {
2569  return vclt_u64(a, b);
2570}
2571
2572// CHECK-LABEL: define <2 x i32> @test_vclt_f32(<2 x float> %v1, <2 x float> %v2) #0 {
2573// CHECK:   [[CMP_I:%.*]] = fcmp olt <2 x float> %v1, %v2
2574// CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2575// CHECK:   ret <2 x i32> [[SEXT_I]]
2576uint32x2_t test_vclt_f32(float32x2_t v1, float32x2_t v2) {
2577  return vclt_f32(v1, v2);
2578}
2579
2580// CHECK-LABEL: define <1 x i64> @test_vclt_f64(<1 x double> %a, <1 x double> %b) #0 {
2581// CHECK:   [[CMP_I:%.*]] = fcmp olt <1 x double> %a, %b
2582// CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2583// CHECK:   ret <1 x i64> [[SEXT_I]]
2584uint64x1_t test_vclt_f64(float64x1_t a, float64x1_t b) {
2585  return vclt_f64(a, b);
2586}
2587
2588// CHECK-LABEL: define <8 x i8> @test_vclt_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
2589// CHECK:   [[CMP_I:%.*]] = icmp ult <8 x i8> %v1, %v2
2590// CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2591// CHECK:   ret <8 x i8> [[SEXT_I]]
2592uint8x8_t test_vclt_u8(uint8x8_t v1, uint8x8_t v2) {
2593  return vclt_u8(v1, v2);
2594}
2595
2596// CHECK-LABEL: define <4 x i16> @test_vclt_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
2597// CHECK:   [[CMP_I:%.*]] = icmp ult <4 x i16> %v1, %v2
2598// CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2599// CHECK:   ret <4 x i16> [[SEXT_I]]
2600uint16x4_t test_vclt_u16(uint16x4_t v1, uint16x4_t v2) {
2601  return vclt_u16(v1, v2);
2602}
2603
2604// CHECK-LABEL: define <2 x i32> @test_vclt_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
2605// CHECK:   [[CMP_I:%.*]] = icmp ult <2 x i32> %v1, %v2
2606// CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2607// CHECK:   ret <2 x i32> [[SEXT_I]]
2608uint32x2_t test_vclt_u32(uint32x2_t v1, uint32x2_t v2) {
2609  return vclt_u32(v1, v2);
2610}
2611
2612// CHECK-LABEL: define <16 x i8> @test_vcltq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
2613// CHECK:   [[CMP_I:%.*]] = icmp slt <16 x i8> %v1, %v2
2614// CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2615// CHECK:   ret <16 x i8> [[SEXT_I]]
2616uint8x16_t test_vcltq_s8(int8x16_t v1, int8x16_t v2) {
2617  return vcltq_s8(v1, v2);
2618}
2619
2620// CHECK-LABEL: define <8 x i16> @test_vcltq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
2621// CHECK:   [[CMP_I:%.*]] = icmp slt <8 x i16> %v1, %v2
2622// CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2623// CHECK:   ret <8 x i16> [[SEXT_I]]
2624uint16x8_t test_vcltq_s16(int16x8_t v1, int16x8_t v2) {
2625  return vcltq_s16(v1, v2);
2626}
2627
2628// CHECK-LABEL: define <4 x i32> @test_vcltq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
2629// CHECK:   [[CMP_I:%.*]] = icmp slt <4 x i32> %v1, %v2
2630// CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2631// CHECK:   ret <4 x i32> [[SEXT_I]]
2632uint32x4_t test_vcltq_s32(int32x4_t v1, int32x4_t v2) {
2633  return vcltq_s32(v1, v2);
2634}
2635
2636// CHECK-LABEL: define <4 x i32> @test_vcltq_f32(<4 x float> %v1, <4 x float> %v2) #0 {
2637// CHECK:   [[CMP_I:%.*]] = fcmp olt <4 x float> %v1, %v2
2638// CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2639// CHECK:   ret <4 x i32> [[SEXT_I]]
2640uint32x4_t test_vcltq_f32(float32x4_t v1, float32x4_t v2) {
2641  return vcltq_f32(v1, v2);
2642}
2643
2644// CHECK-LABEL: define <16 x i8> @test_vcltq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
2645// CHECK:   [[CMP_I:%.*]] = icmp ult <16 x i8> %v1, %v2
2646// CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2647// CHECK:   ret <16 x i8> [[SEXT_I]]
2648uint8x16_t test_vcltq_u8(uint8x16_t v1, uint8x16_t v2) {
2649  return vcltq_u8(v1, v2);
2650}
2651
2652// CHECK-LABEL: define <8 x i16> @test_vcltq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
2653// CHECK:   [[CMP_I:%.*]] = icmp ult <8 x i16> %v1, %v2
2654// CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2655// CHECK:   ret <8 x i16> [[SEXT_I]]
2656uint16x8_t test_vcltq_u16(uint16x8_t v1, uint16x8_t v2) {
2657  return vcltq_u16(v1, v2);
2658}
2659
2660// CHECK-LABEL: define <4 x i32> @test_vcltq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
2661// CHECK:   [[CMP_I:%.*]] = icmp ult <4 x i32> %v1, %v2
2662// CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2663// CHECK:   ret <4 x i32> [[SEXT_I]]
2664uint32x4_t test_vcltq_u32(uint32x4_t v1, uint32x4_t v2) {
2665  return vcltq_u32(v1, v2);
2666}
2667
2668// CHECK-LABEL: define <2 x i64> @test_vcltq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 {
2669// CHECK:   [[CMP_I:%.*]] = icmp slt <2 x i64> %v1, %v2
2670// CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2671// CHECK:   ret <2 x i64> [[SEXT_I]]
2672uint64x2_t test_vcltq_s64(int64x2_t v1, int64x2_t v2) {
2673  return vcltq_s64(v1, v2);
2674}
2675
2676// CHECK-LABEL: define <2 x i64> @test_vcltq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 {
2677// CHECK:   [[CMP_I:%.*]] = icmp ult <2 x i64> %v1, %v2
2678// CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2679// CHECK:   ret <2 x i64> [[SEXT_I]]
2680uint64x2_t test_vcltq_u64(uint64x2_t v1, uint64x2_t v2) {
2681  return vcltq_u64(v1, v2);
2682}
2683
2684// CHECK-LABEL: define <2 x i64> @test_vcltq_f64(<2 x double> %v1, <2 x double> %v2) #0 {
2685// CHECK:   [[CMP_I:%.*]] = fcmp olt <2 x double> %v1, %v2
2686// CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2687// CHECK:   ret <2 x i64> [[SEXT_I]]
2688uint64x2_t test_vcltq_f64(float64x2_t v1, float64x2_t v2) {
2689  return vcltq_f64(v1, v2);
2690}
2691
2692
2693// CHECK-LABEL: define <8 x i8> @test_vhadd_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
2694// CHECK:   [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4
2695// CHECK:   ret <8 x i8> [[VHADD_V_I]]
2696int8x8_t test_vhadd_s8(int8x8_t v1, int8x8_t v2) {
2697  return vhadd_s8(v1, v2);
2698}
2699
2700// CHECK-LABEL: define <4 x i16> @test_vhadd_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
2701// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2702// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2703// CHECK:   [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2704// CHECK:   [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2705// CHECK:   [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16> [[VHADD_V_I]], <4 x i16> [[VHADD_V1_I]]) #4
2706// CHECK:   [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
2707// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <4 x i16>
2708// CHECK:   ret <4 x i16> [[TMP2]]
2709int16x4_t test_vhadd_s16(int16x4_t v1, int16x4_t v2) {
2710  return vhadd_s16(v1, v2);
2711}
2712
2713// CHECK-LABEL: define <2 x i32> @test_vhadd_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
2714// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2715// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2716// CHECK:   [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2717// CHECK:   [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2718// CHECK:   [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32> [[VHADD_V_I]], <2 x i32> [[VHADD_V1_I]]) #4
2719// CHECK:   [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
2720// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <2 x i32>
2721// CHECK:   ret <2 x i32> [[TMP2]]
2722int32x2_t test_vhadd_s32(int32x2_t v1, int32x2_t v2) {
2723  return vhadd_s32(v1, v2);
2724}
2725
2726// CHECK-LABEL: define <8 x i8> @test_vhadd_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
2727// CHECK:   [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4
2728// CHECK:   ret <8 x i8> [[VHADD_V_I]]
2729uint8x8_t test_vhadd_u8(uint8x8_t v1, uint8x8_t v2) {
2730  return vhadd_u8(v1, v2);
2731}
2732
2733// CHECK-LABEL: define <4 x i16> @test_vhadd_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
2734// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2735// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2736// CHECK:   [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2737// CHECK:   [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2738// CHECK:   [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16> [[VHADD_V_I]], <4 x i16> [[VHADD_V1_I]]) #4
2739// CHECK:   [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
2740// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <4 x i16>
2741// CHECK:   ret <4 x i16> [[TMP2]]
2742uint16x4_t test_vhadd_u16(uint16x4_t v1, uint16x4_t v2) {
2743  return vhadd_u16(v1, v2);
2744}
2745
2746// CHECK-LABEL: define <2 x i32> @test_vhadd_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
2747// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2748// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2749// CHECK:   [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2750// CHECK:   [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2751// CHECK:   [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32> [[VHADD_V_I]], <2 x i32> [[VHADD_V1_I]]) #4
2752// CHECK:   [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
2753// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <2 x i32>
2754// CHECK:   ret <2 x i32> [[TMP2]]
2755uint32x2_t test_vhadd_u32(uint32x2_t v1, uint32x2_t v2) {
2756  return vhadd_u32(v1, v2);
2757}
2758
2759// CHECK-LABEL: define <16 x i8> @test_vhaddq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
2760// CHECK:   [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4
2761// CHECK:   ret <16 x i8> [[VHADDQ_V_I]]
2762int8x16_t test_vhaddq_s8(int8x16_t v1, int8x16_t v2) {
2763  return vhaddq_s8(v1, v2);
2764}
2765
2766// CHECK-LABEL: define <8 x i16> @test_vhaddq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
2767// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2768// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2769// CHECK:   [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
2770// CHECK:   [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
2771// CHECK:   [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> [[VHADDQ_V_I]], <8 x i16> [[VHADDQ_V1_I]]) #4
2772// CHECK:   [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
2773// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <8 x i16>
2774// CHECK:   ret <8 x i16> [[TMP2]]
2775int16x8_t test_vhaddq_s16(int16x8_t v1, int16x8_t v2) {
2776  return vhaddq_s16(v1, v2);
2777}
2778
2779// CHECK-LABEL: define <4 x i32> @test_vhaddq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
2780// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2781// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2782// CHECK:   [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
2783// CHECK:   [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
2784// CHECK:   [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32> [[VHADDQ_V_I]], <4 x i32> [[VHADDQ_V1_I]]) #4
2785// CHECK:   [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
2786// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <4 x i32>
2787// CHECK:   ret <4 x i32> [[TMP2]]
2788int32x4_t test_vhaddq_s32(int32x4_t v1, int32x4_t v2) {
2789  return vhaddq_s32(v1, v2);
2790}
2791
2792// CHECK-LABEL: define <16 x i8> @test_vhaddq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
2793// CHECK:   [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4
2794// CHECK:   ret <16 x i8> [[VHADDQ_V_I]]
2795uint8x16_t test_vhaddq_u8(uint8x16_t v1, uint8x16_t v2) {
2796  return vhaddq_u8(v1, v2);
2797}
2798
2799// CHECK-LABEL: define <8 x i16> @test_vhaddq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
2800// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2801// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2802// CHECK:   [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
2803// CHECK:   [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
2804// CHECK:   [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> [[VHADDQ_V_I]], <8 x i16> [[VHADDQ_V1_I]]) #4
2805// CHECK:   [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
2806// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <8 x i16>
2807// CHECK:   ret <8 x i16> [[TMP2]]
2808uint16x8_t test_vhaddq_u16(uint16x8_t v1, uint16x8_t v2) {
2809  return vhaddq_u16(v1, v2);
2810}
2811
2812// CHECK-LABEL: define <4 x i32> @test_vhaddq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
2813// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2814// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2815// CHECK:   [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
2816// CHECK:   [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
2817// CHECK:   [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32> [[VHADDQ_V_I]], <4 x i32> [[VHADDQ_V1_I]]) #4
2818// CHECK:   [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
2819// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <4 x i32>
2820// CHECK:   ret <4 x i32> [[TMP2]]
2821uint32x4_t test_vhaddq_u32(uint32x4_t v1, uint32x4_t v2) {
2822  return vhaddq_u32(v1, v2);
2823}
2824
2825
2826// CHECK-LABEL: define <8 x i8> @test_vhsub_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
2827// CHECK:   [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shsub.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4
2828// CHECK:   ret <8 x i8> [[VHSUB_V_I]]
2829int8x8_t test_vhsub_s8(int8x8_t v1, int8x8_t v2) {
2830  return vhsub_s8(v1, v2);
2831}
2832
2833// CHECK-LABEL: define <4 x i16> @test_vhsub_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
2834// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2835// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2836// CHECK:   [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2837// CHECK:   [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2838// CHECK:   [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shsub.v4i16(<4 x i16> [[VHSUB_V_I]], <4 x i16> [[VHSUB_V1_I]]) #4
2839// CHECK:   [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
2840// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <4 x i16>
2841// CHECK:   ret <4 x i16> [[TMP2]]
2842int16x4_t test_vhsub_s16(int16x4_t v1, int16x4_t v2) {
2843  return vhsub_s16(v1, v2);
2844}
2845
2846// CHECK-LABEL: define <2 x i32> @test_vhsub_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
2847// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2848// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2849// CHECK:   [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2850// CHECK:   [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2851// CHECK:   [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shsub.v2i32(<2 x i32> [[VHSUB_V_I]], <2 x i32> [[VHSUB_V1_I]]) #4
2852// CHECK:   [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
2853// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <2 x i32>
2854// CHECK:   ret <2 x i32> [[TMP2]]
2855int32x2_t test_vhsub_s32(int32x2_t v1, int32x2_t v2) {
2856  return vhsub_s32(v1, v2);
2857}
2858
2859// CHECK-LABEL: define <8 x i8> @test_vhsub_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
2860// CHECK:   [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhsub.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4
2861// CHECK:   ret <8 x i8> [[VHSUB_V_I]]
2862uint8x8_t test_vhsub_u8(uint8x8_t v1, uint8x8_t v2) {
2863  return vhsub_u8(v1, v2);
2864}
2865
2866// CHECK-LABEL: define <4 x i16> @test_vhsub_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
2867// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2868// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2869// CHECK:   [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2870// CHECK:   [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2871// CHECK:   [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhsub.v4i16(<4 x i16> [[VHSUB_V_I]], <4 x i16> [[VHSUB_V1_I]]) #4
2872// CHECK:   [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
2873// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <4 x i16>
2874// CHECK:   ret <4 x i16> [[TMP2]]
2875uint16x4_t test_vhsub_u16(uint16x4_t v1, uint16x4_t v2) {
2876  return vhsub_u16(v1, v2);
2877}
2878
2879// CHECK-LABEL: define <2 x i32> @test_vhsub_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
2880// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2881// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2882// CHECK:   [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2883// CHECK:   [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2884// CHECK:   [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhsub.v2i32(<2 x i32> [[VHSUB_V_I]], <2 x i32> [[VHSUB_V1_I]]) #4
2885// CHECK:   [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
2886// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <2 x i32>
2887// CHECK:   ret <2 x i32> [[TMP2]]
2888uint32x2_t test_vhsub_u32(uint32x2_t v1, uint32x2_t v2) {
2889  return vhsub_u32(v1, v2);
2890}
2891
2892// CHECK-LABEL: define <16 x i8> @test_vhsubq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
2893// CHECK:   [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shsub.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4
2894// CHECK:   ret <16 x i8> [[VHSUBQ_V_I]]
2895int8x16_t test_vhsubq_s8(int8x16_t v1, int8x16_t v2) {
2896  return vhsubq_s8(v1, v2);
2897}
2898
2899// CHECK-LABEL: define <8 x i16> @test_vhsubq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
2900// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2901// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2902// CHECK:   [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
2903// CHECK:   [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
2904// CHECK:   [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shsub.v8i16(<8 x i16> [[VHSUBQ_V_I]], <8 x i16> [[VHSUBQ_V1_I]]) #4
2905// CHECK:   [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
2906// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <8 x i16>
2907// CHECK:   ret <8 x i16> [[TMP2]]
2908int16x8_t test_vhsubq_s16(int16x8_t v1, int16x8_t v2) {
2909  return vhsubq_s16(v1, v2);
2910}
2911
2912// CHECK-LABEL: define <4 x i32> @test_vhsubq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
2913// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2914// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2915// CHECK:   [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
2916// CHECK:   [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
2917// CHECK:   [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shsub.v4i32(<4 x i32> [[VHSUBQ_V_I]], <4 x i32> [[VHSUBQ_V1_I]]) #4
2918// CHECK:   [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
2919// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <4 x i32>
2920// CHECK:   ret <4 x i32> [[TMP2]]
2921int32x4_t test_vhsubq_s32(int32x4_t v1, int32x4_t v2) {
2922  return vhsubq_s32(v1, v2);
2923}
2924
2925// CHECK-LABEL: define <16 x i8> @test_vhsubq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
2926// CHECK:   [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhsub.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4
2927// CHECK:   ret <16 x i8> [[VHSUBQ_V_I]]
2928uint8x16_t test_vhsubq_u8(uint8x16_t v1, uint8x16_t v2) {
2929  return vhsubq_u8(v1, v2);
2930}
2931
2932// CHECK-LABEL: define <8 x i16> @test_vhsubq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
2933// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2934// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2935// CHECK:   [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
2936// CHECK:   [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
2937// CHECK:   [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhsub.v8i16(<8 x i16> [[VHSUBQ_V_I]], <8 x i16> [[VHSUBQ_V1_I]]) #4
2938// CHECK:   [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
2939// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <8 x i16>
2940// CHECK:   ret <8 x i16> [[TMP2]]
2941uint16x8_t test_vhsubq_u16(uint16x8_t v1, uint16x8_t v2) {
2942  return vhsubq_u16(v1, v2);
2943}
2944
2945// CHECK-LABEL: define <4 x i32> @test_vhsubq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
2946// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2947// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2948// CHECK:   [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
2949// CHECK:   [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
2950// CHECK:   [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhsub.v4i32(<4 x i32> [[VHSUBQ_V_I]], <4 x i32> [[VHSUBQ_V1_I]]) #4
2951// CHECK:   [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
2952// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <4 x i32>
2953// CHECK:   ret <4 x i32> [[TMP2]]
2954uint32x4_t test_vhsubq_u32(uint32x4_t v1, uint32x4_t v2) {
2955  return vhsubq_u32(v1, v2);
2956}
2957
2958
2959// CHECK-LABEL: define <8 x i8> @test_vrhadd_s8(<8 x i8> %v1, <8 x i8> %v2) #0 {
2960// CHECK:   [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4
2961// CHECK:   ret <8 x i8> [[VRHADD_V_I]]
2962int8x8_t test_vrhadd_s8(int8x8_t v1, int8x8_t v2) {
2963  return vrhadd_s8(v1, v2);
2964}
2965
2966// CHECK-LABEL: define <4 x i16> @test_vrhadd_s16(<4 x i16> %v1, <4 x i16> %v2) #0 {
2967// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2968// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2969// CHECK:   [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2970// CHECK:   [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
2971// CHECK:   [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16> [[VRHADD_V_I]], <4 x i16> [[VRHADD_V1_I]]) #4
2972// CHECK:   [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
2973// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <4 x i16>
2974// CHECK:   ret <4 x i16> [[TMP2]]
2975int16x4_t test_vrhadd_s16(int16x4_t v1, int16x4_t v2) {
2976  return vrhadd_s16(v1, v2);
2977}
2978
2979// CHECK-LABEL: define <2 x i32> @test_vrhadd_s32(<2 x i32> %v1, <2 x i32> %v2) #0 {
2980// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2981// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2982// CHECK:   [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2983// CHECK:   [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
2984// CHECK:   [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32> [[VRHADD_V_I]], <2 x i32> [[VRHADD_V1_I]]) #4
2985// CHECK:   [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
2986// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <2 x i32>
2987// CHECK:   ret <2 x i32> [[TMP2]]
2988int32x2_t test_vrhadd_s32(int32x2_t v1, int32x2_t v2) {
2989  return vrhadd_s32(v1, v2);
2990}
2991
2992// CHECK-LABEL: define <8 x i8> @test_vrhadd_u8(<8 x i8> %v1, <8 x i8> %v2) #0 {
2993// CHECK:   [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4
2994// CHECK:   ret <8 x i8> [[VRHADD_V_I]]
2995uint8x8_t test_vrhadd_u8(uint8x8_t v1, uint8x8_t v2) {
2996  return vrhadd_u8(v1, v2);
2997}
2998
2999// CHECK-LABEL: define <4 x i16> @test_vrhadd_u16(<4 x i16> %v1, <4 x i16> %v2) #0 {
3000// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
3001// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
3002// CHECK:   [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3003// CHECK:   [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3004// CHECK:   [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16> [[VRHADD_V_I]], <4 x i16> [[VRHADD_V1_I]]) #4
3005// CHECK:   [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
3006// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <4 x i16>
3007// CHECK:   ret <4 x i16> [[TMP2]]
3008uint16x4_t test_vrhadd_u16(uint16x4_t v1, uint16x4_t v2) {
3009  return vrhadd_u16(v1, v2);
3010}
3011
3012// CHECK-LABEL: define <2 x i32> @test_vrhadd_u32(<2 x i32> %v1, <2 x i32> %v2) #0 {
3013// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
3014// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
3015// CHECK:   [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3016// CHECK:   [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3017// CHECK:   [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32> [[VRHADD_V_I]], <2 x i32> [[VRHADD_V1_I]]) #4
3018// CHECK:   [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
3019// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <2 x i32>
3020// CHECK:   ret <2 x i32> [[TMP2]]
3021uint32x2_t test_vrhadd_u32(uint32x2_t v1, uint32x2_t v2) {
3022  return vrhadd_u32(v1, v2);
3023}
3024
3025// CHECK-LABEL: define <16 x i8> @test_vrhaddq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 {
3026// CHECK:   [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4
3027// CHECK:   ret <16 x i8> [[VRHADDQ_V_I]]
3028int8x16_t test_vrhaddq_s8(int8x16_t v1, int8x16_t v2) {
3029  return vrhaddq_s8(v1, v2);
3030}
3031
3032// CHECK-LABEL: define <8 x i16> @test_vrhaddq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 {
3033// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
3034// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
3035// CHECK:   [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3036// CHECK:   [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3037// CHECK:   [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> [[VRHADDQ_V_I]], <8 x i16> [[VRHADDQ_V1_I]]) #4
3038// CHECK:   [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
3039// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <8 x i16>
3040// CHECK:   ret <8 x i16> [[TMP2]]
3041int16x8_t test_vrhaddq_s16(int16x8_t v1, int16x8_t v2) {
3042  return vrhaddq_s16(v1, v2);
3043}
3044
3045// CHECK-LABEL: define <4 x i32> @test_vrhaddq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 {
3046// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
3047// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
3048// CHECK:   [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3049// CHECK:   [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3050// CHECK:   [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32> [[VRHADDQ_V_I]], <4 x i32> [[VRHADDQ_V1_I]]) #4
3051// CHECK:   [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
3052// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <4 x i32>
3053// CHECK:   ret <4 x i32> [[TMP2]]
3054int32x4_t test_vrhaddq_s32(int32x4_t v1, int32x4_t v2) {
3055  return vrhaddq_s32(v1, v2);
3056}
3057
3058// CHECK-LABEL: define <16 x i8> @test_vrhaddq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 {
3059// CHECK:   [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4
3060// CHECK:   ret <16 x i8> [[VRHADDQ_V_I]]
3061uint8x16_t test_vrhaddq_u8(uint8x16_t v1, uint8x16_t v2) {
3062  return vrhaddq_u8(v1, v2);
3063}
3064
3065// CHECK-LABEL: define <8 x i16> @test_vrhaddq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 {
3066// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
3067// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
3068// CHECK:   [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3069// CHECK:   [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3070// CHECK:   [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> [[VRHADDQ_V_I]], <8 x i16> [[VRHADDQ_V1_I]]) #4
3071// CHECK:   [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
3072// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <8 x i16>
3073// CHECK:   ret <8 x i16> [[TMP2]]
3074uint16x8_t test_vrhaddq_u16(uint16x8_t v1, uint16x8_t v2) {
3075  return vrhaddq_u16(v1, v2);
3076}
3077
3078// CHECK-LABEL: define <4 x i32> @test_vrhaddq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 {
3079// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
3080// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
3081// CHECK:   [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3082// CHECK:   [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3083// CHECK:   [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32> [[VRHADDQ_V_I]], <4 x i32> [[VRHADDQ_V1_I]]) #4
3084// CHECK:   [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
3085// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <4 x i32>
3086// CHECK:   ret <4 x i32> [[TMP2]]
3087uint32x4_t test_vrhaddq_u32(uint32x4_t v1, uint32x4_t v2) {
3088  return vrhaddq_u32(v1, v2);
3089}
3090// CHECK-LABEL: define <8 x i8> @test_vqadd_s8(<8 x i8> %a, <8 x i8> %b) #0 {
3091// CHECK:   [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4
3092// CHECK:   ret <8 x i8> [[VQADD_V_I]]
3093int8x8_t test_vqadd_s8(int8x8_t a, int8x8_t b) {
3094  return vqadd_s8(a, b);
3095}
3096
3097// CHECK-LABEL: define <4 x i16> @test_vqadd_s16(<4 x i16> %a, <4 x i16> %b) #0 {
3098// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3099// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3100// CHECK:   [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3101// CHECK:   [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3102// CHECK:   [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[VQADD_V_I]], <4 x i16> [[VQADD_V1_I]]) #4
3103// CHECK:   [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
3104// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <4 x i16>
3105// CHECK:   ret <4 x i16> [[TMP2]]
3106int16x4_t test_vqadd_s16(int16x4_t a, int16x4_t b) {
3107  return vqadd_s16(a, b);
3108}
3109
3110// CHECK-LABEL: define <2 x i32> @test_vqadd_s32(<2 x i32> %a, <2 x i32> %b) #0 {
3111// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3112// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3113// CHECK:   [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3114// CHECK:   [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3115// CHECK:   [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> [[VQADD_V_I]], <2 x i32> [[VQADD_V1_I]]) #4
3116// CHECK:   [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
3117// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <2 x i32>
3118// CHECK:   ret <2 x i32> [[TMP2]]
3119int32x2_t test_vqadd_s32(int32x2_t a, int32x2_t b) {
3120  return vqadd_s32(a, b);
3121}
3122
3123// CHECK-LABEL: define <1 x i64> @test_vqadd_s64(<1 x i64> %a, <1 x i64> %b) #0 {
3124// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3125// CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3126// CHECK:   [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3127// CHECK:   [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3128// CHECK:   [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqadd.v1i64(<1 x i64> [[VQADD_V_I]], <1 x i64> [[VQADD_V1_I]]) #4
3129// CHECK:   [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
3130// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <1 x i64>
3131// CHECK:   ret <1 x i64> [[TMP2]]
3132int64x1_t test_vqadd_s64(int64x1_t a, int64x1_t b) {
3133  return vqadd_s64(a, b);
3134}
3135
3136// CHECK-LABEL: define <8 x i8> @test_vqadd_u8(<8 x i8> %a, <8 x i8> %b) #0 {
3137// CHECK:   [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4
3138// CHECK:   ret <8 x i8> [[VQADD_V_I]]
3139uint8x8_t test_vqadd_u8(uint8x8_t a, uint8x8_t b) {
3140  return vqadd_u8(a, b);
3141}
3142
3143// CHECK-LABEL: define <4 x i16> @test_vqadd_u16(<4 x i16> %a, <4 x i16> %b) #0 {
3144// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3145// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3146// CHECK:   [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3147// CHECK:   [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3148// CHECK:   [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> [[VQADD_V_I]], <4 x i16> [[VQADD_V1_I]]) #4
3149// CHECK:   [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
3150// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <4 x i16>
3151// CHECK:   ret <4 x i16> [[TMP2]]
3152uint16x4_t test_vqadd_u16(uint16x4_t a, uint16x4_t b) {
3153  return vqadd_u16(a, b);
3154}
3155
3156// CHECK-LABEL: define <2 x i32> @test_vqadd_u32(<2 x i32> %a, <2 x i32> %b) #0 {
3157// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3158// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3159// CHECK:   [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3160// CHECK:   [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3161// CHECK:   [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqadd.v2i32(<2 x i32> [[VQADD_V_I]], <2 x i32> [[VQADD_V1_I]]) #4
3162// CHECK:   [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
3163// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <2 x i32>
3164// CHECK:   ret <2 x i32> [[TMP2]]
3165uint32x2_t test_vqadd_u32(uint32x2_t a, uint32x2_t b) {
3166  return vqadd_u32(a, b);
3167}
3168
3169// CHECK-LABEL: define <1 x i64> @test_vqadd_u64(<1 x i64> %a, <1 x i64> %b) #0 {
3170// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3171// CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3172// CHECK:   [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3173// CHECK:   [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3174// CHECK:   [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqadd.v1i64(<1 x i64> [[VQADD_V_I]], <1 x i64> [[VQADD_V1_I]]) #4
3175// CHECK:   [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
3176// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <1 x i64>
3177// CHECK:   ret <1 x i64> [[TMP2]]
3178uint64x1_t test_vqadd_u64(uint64x1_t a, uint64x1_t b) {
3179  return vqadd_u64(a, b);
3180}
3181
3182// CHECK-LABEL: define <16 x i8> @test_vqaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
3183// CHECK:   [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqadd.v16i8(<16 x i8> %a, <16 x i8> %b) #4
3184// CHECK:   ret <16 x i8> [[VQADDQ_V_I]]
3185int8x16_t test_vqaddq_s8(int8x16_t a, int8x16_t b) {
3186  return vqaddq_s8(a, b);
3187}
3188
3189// CHECK-LABEL: define <8 x i16> @test_vqaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
3190// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3191// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3192// CHECK:   [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3193// CHECK:   [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3194// CHECK:   [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> [[VQADDQ_V_I]], <8 x i16> [[VQADDQ_V1_I]]) #4
3195// CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
3196// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <8 x i16>
3197// CHECK:   ret <8 x i16> [[TMP2]]
3198int16x8_t test_vqaddq_s16(int16x8_t a, int16x8_t b) {
3199  return vqaddq_s16(a, b);
3200}
3201
3202// CHECK-LABEL: define <4 x i32> @test_vqaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
3203// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3204// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3205// CHECK:   [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3206// CHECK:   [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3207// CHECK:   [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQADDQ_V_I]], <4 x i32> [[VQADDQ_V1_I]]) #4
3208// CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
3209// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <4 x i32>
3210// CHECK:   ret <4 x i32> [[TMP2]]
3211int32x4_t test_vqaddq_s32(int32x4_t a, int32x4_t b) {
3212  return vqaddq_s32(a, b);
3213}
3214
3215// CHECK-LABEL: define <2 x i64> @test_vqaddq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
3216// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3217// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3218// CHECK:   [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3219// CHECK:   [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3220// CHECK:   [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQADDQ_V_I]], <2 x i64> [[VQADDQ_V1_I]]) #4
3221// CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
3222// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <2 x i64>
3223// CHECK:   ret <2 x i64> [[TMP2]]
3224int64x2_t test_vqaddq_s64(int64x2_t a, int64x2_t b) {
3225  return vqaddq_s64(a, b);
3226}
3227
3228// CHECK-LABEL: define <16 x i8> @test_vqaddq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
3229// CHECK:   [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8> %a, <16 x i8> %b) #4
3230// CHECK:   ret <16 x i8> [[VQADDQ_V_I]]
3231uint8x16_t test_vqaddq_u8(uint8x16_t a, uint8x16_t b) {
3232  return vqaddq_u8(a, b);
3233}
3234
3235// CHECK-LABEL: define <8 x i16> @test_vqaddq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
3236// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3237// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3238// CHECK:   [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3239// CHECK:   [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3240// CHECK:   [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqadd.v8i16(<8 x i16> [[VQADDQ_V_I]], <8 x i16> [[VQADDQ_V1_I]]) #4
3241// CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
3242// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <8 x i16>
3243// CHECK:   ret <8 x i16> [[TMP2]]
3244uint16x8_t test_vqaddq_u16(uint16x8_t a, uint16x8_t b) {
3245  return vqaddq_u16(a, b);
3246}
3247
3248// CHECK-LABEL: define <4 x i32> @test_vqaddq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
3249// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3250// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3251// CHECK:   [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3252// CHECK:   [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3253// CHECK:   [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqadd.v4i32(<4 x i32> [[VQADDQ_V_I]], <4 x i32> [[VQADDQ_V1_I]]) #4
3254// CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
3255// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <4 x i32>
3256// CHECK:   ret <4 x i32> [[TMP2]]
3257uint32x4_t test_vqaddq_u32(uint32x4_t a, uint32x4_t b) {
3258  return vqaddq_u32(a, b);
3259}
3260
3261// CHECK-LABEL: define <2 x i64> @test_vqaddq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
3262// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3263// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3264// CHECK:   [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3265// CHECK:   [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3266// CHECK:   [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64> [[VQADDQ_V_I]], <2 x i64> [[VQADDQ_V1_I]]) #4
3267// CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
3268// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <2 x i64>
3269// CHECK:   ret <2 x i64> [[TMP2]]
3270uint64x2_t test_vqaddq_u64(uint64x2_t a, uint64x2_t b) {
3271  return vqaddq_u64(a, b);
3272}
3273
3274
3275// CHECK-LABEL: define <8 x i8> @test_vqsub_s8(<8 x i8> %a, <8 x i8> %b) #0 {
3276// CHECK:   [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> %a, <8 x i8> %b) #4
3277// CHECK:   ret <8 x i8> [[VQSUB_V_I]]
3278int8x8_t test_vqsub_s8(int8x8_t a, int8x8_t b) {
3279  return vqsub_s8(a, b);
3280}
3281
3282// CHECK-LABEL: define <4 x i16> @test_vqsub_s16(<4 x i16> %a, <4 x i16> %b) #0 {
3283// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3284// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3285// CHECK:   [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3286// CHECK:   [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3287// CHECK:   [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[VQSUB_V_I]], <4 x i16> [[VQSUB_V1_I]]) #4
3288// CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
3289// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <4 x i16>
3290// CHECK:   ret <4 x i16> [[TMP2]]
3291int16x4_t test_vqsub_s16(int16x4_t a, int16x4_t b) {
3292  return vqsub_s16(a, b);
3293}
3294
3295// CHECK-LABEL: define <2 x i32> @test_vqsub_s32(<2 x i32> %a, <2 x i32> %b) #0 {
3296// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3297// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3298// CHECK:   [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3299// CHECK:   [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3300// CHECK:   [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> [[VQSUB_V_I]], <2 x i32> [[VQSUB_V1_I]]) #4
3301// CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
3302// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <2 x i32>
3303// CHECK:   ret <2 x i32> [[TMP2]]
3304int32x2_t test_vqsub_s32(int32x2_t a, int32x2_t b) {
3305  return vqsub_s32(a, b);
3306}
3307
3308// CHECK-LABEL: define <1 x i64> @test_vqsub_s64(<1 x i64> %a, <1 x i64> %b) #0 {
3309// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3310// CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3311// CHECK:   [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3312// CHECK:   [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3313// CHECK:   [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqsub.v1i64(<1 x i64> [[VQSUB_V_I]], <1 x i64> [[VQSUB_V1_I]]) #4
3314// CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
3315// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <1 x i64>
3316// CHECK:   ret <1 x i64> [[TMP2]]
3317int64x1_t test_vqsub_s64(int64x1_t a, int64x1_t b) {
3318  return vqsub_s64(a, b);
3319}
3320
3321// CHECK-LABEL: define <8 x i8> @test_vqsub_u8(<8 x i8> %a, <8 x i8> %b) #0 {
3322// CHECK:   [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> %a, <8 x i8> %b) #4
3323// CHECK:   ret <8 x i8> [[VQSUB_V_I]]
3324uint8x8_t test_vqsub_u8(uint8x8_t a, uint8x8_t b) {
3325  return vqsub_u8(a, b);
3326}
3327
3328// CHECK-LABEL: define <4 x i16> @test_vqsub_u16(<4 x i16> %a, <4 x i16> %b) #0 {
3329// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3330// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3331// CHECK:   [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3332// CHECK:   [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3333// CHECK:   [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> [[VQSUB_V_I]], <4 x i16> [[VQSUB_V1_I]]) #4
3334// CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
3335// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <4 x i16>
3336// CHECK:   ret <4 x i16> [[TMP2]]
3337uint16x4_t test_vqsub_u16(uint16x4_t a, uint16x4_t b) {
3338  return vqsub_u16(a, b);
3339}
3340
3341// CHECK-LABEL: define <2 x i32> @test_vqsub_u32(<2 x i32> %a, <2 x i32> %b) #0 {
3342// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3343// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3344// CHECK:   [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3345// CHECK:   [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3346// CHECK:   [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqsub.v2i32(<2 x i32> [[VQSUB_V_I]], <2 x i32> [[VQSUB_V1_I]]) #4
3347// CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
3348// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <2 x i32>
3349// CHECK:   ret <2 x i32> [[TMP2]]
3350uint32x2_t test_vqsub_u32(uint32x2_t a, uint32x2_t b) {
3351  return vqsub_u32(a, b);
3352}
3353
3354// CHECK-LABEL: define <1 x i64> @test_vqsub_u64(<1 x i64> %a, <1 x i64> %b) #0 {
3355// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3356// CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3357// CHECK:   [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3358// CHECK:   [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3359// CHECK:   [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqsub.v1i64(<1 x i64> [[VQSUB_V_I]], <1 x i64> [[VQSUB_V1_I]]) #4
3360// CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
3361// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <1 x i64>
3362// CHECK:   ret <1 x i64> [[TMP2]]
3363uint64x1_t test_vqsub_u64(uint64x1_t a, uint64x1_t b) {
3364  return vqsub_u64(a, b);
3365}
3366
3367// CHECK-LABEL: define <16 x i8> @test_vqsubq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
3368// CHECK:   [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqsub.v16i8(<16 x i8> %a, <16 x i8> %b) #4
3369// CHECK:   ret <16 x i8> [[VQSUBQ_V_I]]
3370int8x16_t test_vqsubq_s8(int8x16_t a, int8x16_t b) {
3371  return vqsubq_s8(a, b);
3372}
3373
3374// CHECK-LABEL: define <8 x i16> @test_vqsubq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
3375// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3376// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3377// CHECK:   [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3378// CHECK:   [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3379// CHECK:   [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> [[VQSUBQ_V_I]], <8 x i16> [[VQSUBQ_V1_I]]) #4
3380// CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
3381// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <8 x i16>
3382// CHECK:   ret <8 x i16> [[TMP2]]
3383int16x8_t test_vqsubq_s16(int16x8_t a, int16x8_t b) {
3384  return vqsubq_s16(a, b);
3385}
3386
3387// CHECK-LABEL: define <4 x i32> @test_vqsubq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
3388// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3389// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3390// CHECK:   [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3391// CHECK:   [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3392// CHECK:   [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQSUBQ_V_I]], <4 x i32> [[VQSUBQ_V1_I]]) #4
3393// CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
3394// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <4 x i32>
3395// CHECK:   ret <4 x i32> [[TMP2]]
3396int32x4_t test_vqsubq_s32(int32x4_t a, int32x4_t b) {
3397  return vqsubq_s32(a, b);
3398}
3399
3400// CHECK-LABEL: define <2 x i64> @test_vqsubq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
3401// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3402// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3403// CHECK:   [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3404// CHECK:   [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3405// CHECK:   [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQSUBQ_V_I]], <2 x i64> [[VQSUBQ_V1_I]]) #4
3406// CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
3407// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <2 x i64>
3408// CHECK:   ret <2 x i64> [[TMP2]]
3409int64x2_t test_vqsubq_s64(int64x2_t a, int64x2_t b) {
3410  return vqsubq_s64(a, b);
3411}
3412
3413// CHECK-LABEL: define <16 x i8> @test_vqsubq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
3414// CHECK:   [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqsub.v16i8(<16 x i8> %a, <16 x i8> %b) #4
3415// CHECK:   ret <16 x i8> [[VQSUBQ_V_I]]
3416uint8x16_t test_vqsubq_u8(uint8x16_t a, uint8x16_t b) {
3417  return vqsubq_u8(a, b);
3418}
3419
3420// CHECK-LABEL: define <8 x i16> @test_vqsubq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
3421// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3422// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3423// CHECK:   [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3424// CHECK:   [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3425// CHECK:   [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqsub.v8i16(<8 x i16> [[VQSUBQ_V_I]], <8 x i16> [[VQSUBQ_V1_I]]) #4
3426// CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
3427// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <8 x i16>
3428// CHECK:   ret <8 x i16> [[TMP2]]
3429uint16x8_t test_vqsubq_u16(uint16x8_t a, uint16x8_t b) {
3430  return vqsubq_u16(a, b);
3431}
3432
3433// CHECK-LABEL: define <4 x i32> @test_vqsubq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
3434// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3435// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3436// CHECK:   [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3437// CHECK:   [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3438// CHECK:   [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqsub.v4i32(<4 x i32> [[VQSUBQ_V_I]], <4 x i32> [[VQSUBQ_V1_I]]) #4
3439// CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
3440// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <4 x i32>
3441// CHECK:   ret <4 x i32> [[TMP2]]
3442uint32x4_t test_vqsubq_u32(uint32x4_t a, uint32x4_t b) {
3443  return vqsubq_u32(a, b);
3444}
3445
3446// CHECK-LABEL: define <2 x i64> @test_vqsubq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
3447// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3448// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3449// CHECK:   [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3450// CHECK:   [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3451// CHECK:   [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqsub.v2i64(<2 x i64> [[VQSUBQ_V_I]], <2 x i64> [[VQSUBQ_V1_I]]) #4
3452// CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
3453// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <2 x i64>
3454// CHECK:   ret <2 x i64> [[TMP2]]
3455uint64x2_t test_vqsubq_u64(uint64x2_t a, uint64x2_t b) {
3456  return vqsubq_u64(a, b);
3457}
3458
3459
3460// CHECK-LABEL: define <8 x i8> @test_vshl_s8(<8 x i8> %a, <8 x i8> %b) #0 {
3461// CHECK:   [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4
3462// CHECK:   ret <8 x i8> [[VSHL_V_I]]
3463int8x8_t test_vshl_s8(int8x8_t a, int8x8_t b) {
3464  return vshl_s8(a, b);
3465}
3466
3467// CHECK-LABEL: define <4 x i16> @test_vshl_s16(<4 x i16> %a, <4 x i16> %b) #0 {
3468// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3469// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3470// CHECK:   [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3471// CHECK:   [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3472// CHECK:   [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sshl.v4i16(<4 x i16> [[VSHL_V_I]], <4 x i16> [[VSHL_V1_I]]) #4
3473// CHECK:   [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8>
3474// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <4 x i16>
3475// CHECK:   ret <4 x i16> [[TMP2]]
3476int16x4_t test_vshl_s16(int16x4_t a, int16x4_t b) {
3477  return vshl_s16(a, b);
3478}
3479
3480// CHECK-LABEL: define <2 x i32> @test_vshl_s32(<2 x i32> %a, <2 x i32> %b) #0 {
3481// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3482// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3483// CHECK:   [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3484// CHECK:   [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3485// CHECK:   [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sshl.v2i32(<2 x i32> [[VSHL_V_I]], <2 x i32> [[VSHL_V1_I]]) #4
3486// CHECK:   [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8>
3487// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <2 x i32>
3488// CHECK:   ret <2 x i32> [[TMP2]]
3489int32x2_t test_vshl_s32(int32x2_t a, int32x2_t b) {
3490  return vshl_s32(a, b);
3491}
3492
3493// CHECK-LABEL: define <1 x i64> @test_vshl_s64(<1 x i64> %a, <1 x i64> %b) #0 {
3494// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3495// CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3496// CHECK:   [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3497// CHECK:   [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3498// CHECK:   [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> [[VSHL_V_I]], <1 x i64> [[VSHL_V1_I]]) #4
3499// CHECK:   [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8>
3500// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <1 x i64>
3501// CHECK:   ret <1 x i64> [[TMP2]]
3502int64x1_t test_vshl_s64(int64x1_t a, int64x1_t b) {
3503  return vshl_s64(a, b);
3504}
3505
3506// CHECK-LABEL: define <8 x i8> @test_vshl_u8(<8 x i8> %a, <8 x i8> %b) #0 {
3507// CHECK:   [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.ushl.v8i8(<8 x i8> %a, <8 x i8> %b) #4
3508// CHECK:   ret <8 x i8> [[VSHL_V_I]]
3509uint8x8_t test_vshl_u8(uint8x8_t a, int8x8_t b) {
3510  return vshl_u8(a, b);
3511}
3512
3513// CHECK-LABEL: define <4 x i16> @test_vshl_u16(<4 x i16> %a, <4 x i16> %b) #0 {
3514// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3515// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3516// CHECK:   [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3517// CHECK:   [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3518// CHECK:   [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.ushl.v4i16(<4 x i16> [[VSHL_V_I]], <4 x i16> [[VSHL_V1_I]]) #4
3519// CHECK:   [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8>
3520// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <4 x i16>
3521// CHECK:   ret <4 x i16> [[TMP2]]
3522uint16x4_t test_vshl_u16(uint16x4_t a, int16x4_t b) {
3523  return vshl_u16(a, b);
3524}
3525
3526// CHECK-LABEL: define <2 x i32> @test_vshl_u32(<2 x i32> %a, <2 x i32> %b) #0 {
3527// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3528// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3529// CHECK:   [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3530// CHECK:   [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3531// CHECK:   [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.ushl.v2i32(<2 x i32> [[VSHL_V_I]], <2 x i32> [[VSHL_V1_I]]) #4
3532// CHECK:   [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8>
3533// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <2 x i32>
3534// CHECK:   ret <2 x i32> [[TMP2]]
3535uint32x2_t test_vshl_u32(uint32x2_t a, int32x2_t b) {
3536  return vshl_u32(a, b);
3537}
3538
3539// CHECK-LABEL: define <1 x i64> @test_vshl_u64(<1 x i64> %a, <1 x i64> %b) #0 {
3540// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3541// CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3542// CHECK:   [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3543// CHECK:   [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3544// CHECK:   [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> [[VSHL_V_I]], <1 x i64> [[VSHL_V1_I]]) #4
3545// CHECK:   [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8>
3546// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <1 x i64>
3547// CHECK:   ret <1 x i64> [[TMP2]]
3548uint64x1_t test_vshl_u64(uint64x1_t a, int64x1_t b) {
3549  return vshl_u64(a, b);
3550}
3551
3552// CHECK-LABEL: define <16 x i8> @test_vshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
3553// CHECK:   [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4
3554// CHECK:   ret <16 x i8> [[VSHLQ_V_I]]
3555int8x16_t test_vshlq_s8(int8x16_t a, int8x16_t b) {
3556  return vshlq_s8(a, b);
3557}
3558
3559// CHECK-LABEL: define <8 x i16> @test_vshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
3560// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3561// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3562// CHECK:   [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3563// CHECK:   [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3564// CHECK:   [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> [[VSHLQ_V_I]], <8 x i16> [[VSHLQ_V1_I]]) #4
3565// CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
3566// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <8 x i16>
3567// CHECK:   ret <8 x i16> [[TMP2]]
3568int16x8_t test_vshlq_s16(int16x8_t a, int16x8_t b) {
3569  return vshlq_s16(a, b);
3570}
3571
3572// CHECK-LABEL: define <4 x i32> @test_vshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
3573// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3574// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3575// CHECK:   [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3576// CHECK:   [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3577// CHECK:   [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[VSHLQ_V_I]], <4 x i32> [[VSHLQ_V1_I]]) #4
3578// CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
3579// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <4 x i32>
3580// CHECK:   ret <4 x i32> [[TMP2]]
3581int32x4_t test_vshlq_s32(int32x4_t a, int32x4_t b) {
3582  return vshlq_s32(a, b);
3583}
3584
3585// CHECK-LABEL: define <2 x i64> @test_vshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
3586// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3587// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3588// CHECK:   [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3589// CHECK:   [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3590// CHECK:   [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[VSHLQ_V_I]], <2 x i64> [[VSHLQ_V1_I]]) #4
3591// CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
3592// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <2 x i64>
3593// CHECK:   ret <2 x i64> [[TMP2]]
3594int64x2_t test_vshlq_s64(int64x2_t a, int64x2_t b) {
3595  return vshlq_s64(a, b);
3596}
3597
3598// CHECK-LABEL: define <16 x i8> @test_vshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
3599// CHECK:   [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.ushl.v16i8(<16 x i8> %a, <16 x i8> %b) #4
3600// CHECK:   ret <16 x i8> [[VSHLQ_V_I]]
3601uint8x16_t test_vshlq_u8(uint8x16_t a, int8x16_t b) {
3602  return vshlq_u8(a, b);
3603}
3604
3605// CHECK-LABEL: define <8 x i16> @test_vshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
3606// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3607// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3608// CHECK:   [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3609// CHECK:   [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3610// CHECK:   [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[VSHLQ_V_I]], <8 x i16> [[VSHLQ_V1_I]]) #4
3611// CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
3612// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <8 x i16>
3613// CHECK:   ret <8 x i16> [[TMP2]]
3614uint16x8_t test_vshlq_u16(uint16x8_t a, int16x8_t b) {
3615  return vshlq_u16(a, b);
3616}
3617
3618// CHECK-LABEL: define <4 x i32> @test_vshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
3619// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3620// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3621// CHECK:   [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3622// CHECK:   [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3623// CHECK:   [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[VSHLQ_V_I]], <4 x i32> [[VSHLQ_V1_I]]) #4
3624// CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
3625// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <4 x i32>
3626// CHECK:   ret <4 x i32> [[TMP2]]
3627uint32x4_t test_vshlq_u32(uint32x4_t a, int32x4_t b) {
3628  return vshlq_u32(a, b);
3629}
3630
3631// CHECK-LABEL: define <2 x i64> @test_vshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
3632// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3633// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3634// CHECK:   [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3635// CHECK:   [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3636// CHECK:   [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> [[VSHLQ_V_I]], <2 x i64> [[VSHLQ_V1_I]]) #4
3637// CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
3638// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <2 x i64>
3639// CHECK:   ret <2 x i64> [[TMP2]]
3640uint64x2_t test_vshlq_u64(uint64x2_t a, int64x2_t b) {
3641  return vshlq_u64(a, b);
3642}
3643
3644
3645// CHECK-LABEL: define <8 x i8> @test_vqshl_s8(<8 x i8> %a, <8 x i8> %b) #0 {
3646// CHECK:   [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4
3647// CHECK:   ret <8 x i8> [[VQSHL_V_I]]
3648int8x8_t test_vqshl_s8(int8x8_t a, int8x8_t b) {
3649  return vqshl_s8(a, b);
3650}
3651
3652// CHECK-LABEL: define <4 x i16> @test_vqshl_s16(<4 x i16> %a, <4 x i16> %b) #0 {
3653// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3654// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3655// CHECK:   [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3656// CHECK:   [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3657// CHECK:   [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[VQSHL_V_I]], <4 x i16> [[VQSHL_V1_I]]) #4
3658// CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
3659// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <4 x i16>
3660// CHECK:   ret <4 x i16> [[TMP2]]
3661int16x4_t test_vqshl_s16(int16x4_t a, int16x4_t b) {
3662  return vqshl_s16(a, b);
3663}
3664
3665// CHECK-LABEL: define <2 x i32> @test_vqshl_s32(<2 x i32> %a, <2 x i32> %b) #0 {
3666// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3667// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3668// CHECK:   [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3669// CHECK:   [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3670// CHECK:   [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[VQSHL_V_I]], <2 x i32> [[VQSHL_V1_I]]) #4
3671// CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
3672// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <2 x i32>
3673// CHECK:   ret <2 x i32> [[TMP2]]
3674int32x2_t test_vqshl_s32(int32x2_t a, int32x2_t b) {
3675  return vqshl_s32(a, b);
3676}
3677
3678// CHECK-LABEL: define <1 x i64> @test_vqshl_s64(<1 x i64> %a, <1 x i64> %b) #0 {
3679// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3680// CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3681// CHECK:   [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3682// CHECK:   [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3683// CHECK:   [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[VQSHL_V_I]], <1 x i64> [[VQSHL_V1_I]]) #4
3684// CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
3685// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <1 x i64>
3686// CHECK:   ret <1 x i64> [[TMP2]]
3687int64x1_t test_vqshl_s64(int64x1_t a, int64x1_t b) {
3688  return vqshl_s64(a, b);
3689}
3690
3691// CHECK-LABEL: define <8 x i8> @test_vqshl_u8(<8 x i8> %a, <8 x i8> %b) #0 {
3692// CHECK:   [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4
3693// CHECK:   ret <8 x i8> [[VQSHL_V_I]]
3694uint8x8_t test_vqshl_u8(uint8x8_t a, int8x8_t b) {
3695  return vqshl_u8(a, b);
3696}
3697
3698// CHECK-LABEL: define <4 x i16> @test_vqshl_u16(<4 x i16> %a, <4 x i16> %b) #0 {
3699// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3700// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3701// CHECK:   [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3702// CHECK:   [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3703// CHECK:   [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[VQSHL_V_I]], <4 x i16> [[VQSHL_V1_I]]) #4
3704// CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
3705// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <4 x i16>
3706// CHECK:   ret <4 x i16> [[TMP2]]
3707uint16x4_t test_vqshl_u16(uint16x4_t a, int16x4_t b) {
3708  return vqshl_u16(a, b);
3709}
3710
3711// CHECK-LABEL: define <2 x i32> @test_vqshl_u32(<2 x i32> %a, <2 x i32> %b) #0 {
3712// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3713// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3714// CHECK:   [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3715// CHECK:   [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3716// CHECK:   [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[VQSHL_V_I]], <2 x i32> [[VQSHL_V1_I]]) #4
3717// CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
3718// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <2 x i32>
3719// CHECK:   ret <2 x i32> [[TMP2]]
3720uint32x2_t test_vqshl_u32(uint32x2_t a, int32x2_t b) {
3721  return vqshl_u32(a, b);
3722}
3723
3724// CHECK-LABEL: define <1 x i64> @test_vqshl_u64(<1 x i64> %a, <1 x i64> %b) #0 {
3725// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3726// CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3727// CHECK:   [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3728// CHECK:   [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3729// CHECK:   [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[VQSHL_V_I]], <1 x i64> [[VQSHL_V1_I]]) #4
3730// CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
3731// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <1 x i64>
3732// CHECK:   ret <1 x i64> [[TMP2]]
3733uint64x1_t test_vqshl_u64(uint64x1_t a, int64x1_t b) {
3734  return vqshl_u64(a, b);
3735}
3736
3737// CHECK-LABEL: define <16 x i8> @test_vqshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
3738// CHECK:   [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4
3739// CHECK:   ret <16 x i8> [[VQSHLQ_V_I]]
3740int8x16_t test_vqshlq_s8(int8x16_t a, int8x16_t b) {
3741  return vqshlq_s8(a, b);
3742}
3743
3744// CHECK-LABEL: define <8 x i16> @test_vqshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
3745// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3746// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3747// CHECK:   [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3748// CHECK:   [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3749// CHECK:   [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[VQSHLQ_V_I]], <8 x i16> [[VQSHLQ_V1_I]]) #4
3750// CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
3751// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <8 x i16>
3752// CHECK:   ret <8 x i16> [[TMP2]]
3753int16x8_t test_vqshlq_s16(int16x8_t a, int16x8_t b) {
3754  return vqshlq_s16(a, b);
3755}
3756
3757// CHECK-LABEL: define <4 x i32> @test_vqshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
3758// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3759// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3760// CHECK:   [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3761// CHECK:   [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3762// CHECK:   [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[VQSHLQ_V_I]], <4 x i32> [[VQSHLQ_V1_I]]) #4
3763// CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
3764// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <4 x i32>
3765// CHECK:   ret <4 x i32> [[TMP2]]
3766int32x4_t test_vqshlq_s32(int32x4_t a, int32x4_t b) {
3767  return vqshlq_s32(a, b);
3768}
3769
3770// CHECK-LABEL: define <2 x i64> @test_vqshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
3771// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3772// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3773// CHECK:   [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3774// CHECK:   [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3775// CHECK:   [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[VQSHLQ_V_I]], <2 x i64> [[VQSHLQ_V1_I]]) #4
3776// CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
3777// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <2 x i64>
3778// CHECK:   ret <2 x i64> [[TMP2]]
3779int64x2_t test_vqshlq_s64(int64x2_t a, int64x2_t b) {
3780  return vqshlq_s64(a, b);
3781}
3782
3783// CHECK-LABEL: define <16 x i8> @test_vqshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
3784// CHECK:   [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4
3785// CHECK:   ret <16 x i8> [[VQSHLQ_V_I]]
3786uint8x16_t test_vqshlq_u8(uint8x16_t a, int8x16_t b) {
3787  return vqshlq_u8(a, b);
3788}
3789
3790// CHECK-LABEL: define <8 x i16> @test_vqshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
3791// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3792// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3793// CHECK:   [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3794// CHECK:   [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3795// CHECK:   [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[VQSHLQ_V_I]], <8 x i16> [[VQSHLQ_V1_I]]) #4
3796// CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
3797// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <8 x i16>
3798// CHECK:   ret <8 x i16> [[TMP2]]
3799uint16x8_t test_vqshlq_u16(uint16x8_t a, int16x8_t b) {
3800  return vqshlq_u16(a, b);
3801}
3802
3803// CHECK-LABEL: define <4 x i32> @test_vqshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
3804// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3805// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3806// CHECK:   [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3807// CHECK:   [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3808// CHECK:   [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[VQSHLQ_V_I]], <4 x i32> [[VQSHLQ_V1_I]]) #4
3809// CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
3810// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <4 x i32>
3811// CHECK:   ret <4 x i32> [[TMP2]]
3812uint32x4_t test_vqshlq_u32(uint32x4_t a, int32x4_t b) {
3813  return vqshlq_u32(a, b);
3814}
3815
3816// CHECK-LABEL: define <2 x i64> @test_vqshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
3817// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3818// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3819// CHECK:   [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3820// CHECK:   [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3821// CHECK:   [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[VQSHLQ_V_I]], <2 x i64> [[VQSHLQ_V1_I]]) #4
3822// CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
3823// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <2 x i64>
3824// CHECK:   ret <2 x i64> [[TMP2]]
3825uint64x2_t test_vqshlq_u64(uint64x2_t a, int64x2_t b) {
3826  return vqshlq_u64(a, b);
3827}
3828
3829// CHECK-LABEL: define <8 x i8> @test_vrshl_s8(<8 x i8> %a, <8 x i8> %b) #0 {
3830// CHECK:   [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4
3831// CHECK:   ret <8 x i8> [[VRSHL_V_I]]
3832int8x8_t test_vrshl_s8(int8x8_t a, int8x8_t b) {
3833  return vrshl_s8(a, b);
3834}
3835
3836// CHECK-LABEL: define <4 x i16> @test_vrshl_s16(<4 x i16> %a, <4 x i16> %b) #0 {
3837// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3838// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3839// CHECK:   [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3840// CHECK:   [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3841// CHECK:   [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHL_V_I]], <4 x i16> [[VRSHL_V1_I]]) #4
3842// CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
3843// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <4 x i16>
3844// CHECK:   ret <4 x i16> [[TMP2]]
3845int16x4_t test_vrshl_s16(int16x4_t a, int16x4_t b) {
3846  return vrshl_s16(a, b);
3847}
3848
3849// CHECK-LABEL: define <2 x i32> @test_vrshl_s32(<2 x i32> %a, <2 x i32> %b) #0 {
3850// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3851// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3852// CHECK:   [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3853// CHECK:   [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3854// CHECK:   [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHL_V_I]], <2 x i32> [[VRSHL_V1_I]]) #4
3855// CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
3856// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <2 x i32>
3857// CHECK:   ret <2 x i32> [[TMP2]]
3858int32x2_t test_vrshl_s32(int32x2_t a, int32x2_t b) {
3859  return vrshl_s32(a, b);
3860}
3861
3862// CHECK-LABEL: define <1 x i64> @test_vrshl_s64(<1 x i64> %a, <1 x i64> %b) #0 {
3863// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3864// CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3865// CHECK:   [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3866// CHECK:   [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3867// CHECK:   [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHL_V_I]], <1 x i64> [[VRSHL_V1_I]]) #4
3868// CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
3869// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <1 x i64>
3870// CHECK:   ret <1 x i64> [[TMP2]]
3871int64x1_t test_vrshl_s64(int64x1_t a, int64x1_t b) {
3872  return vrshl_s64(a, b);
3873}
3874
3875// CHECK-LABEL: define <8 x i8> @test_vrshl_u8(<8 x i8> %a, <8 x i8> %b) #0 {
3876// CHECK:   [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4
3877// CHECK:   ret <8 x i8> [[VRSHL_V_I]]
3878uint8x8_t test_vrshl_u8(uint8x8_t a, int8x8_t b) {
3879  return vrshl_u8(a, b);
3880}
3881
3882// CHECK-LABEL: define <4 x i16> @test_vrshl_u16(<4 x i16> %a, <4 x i16> %b) #0 {
3883// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3884// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3885// CHECK:   [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3886// CHECK:   [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3887// CHECK:   [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHL_V_I]], <4 x i16> [[VRSHL_V1_I]]) #4
3888// CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
3889// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <4 x i16>
3890// CHECK:   ret <4 x i16> [[TMP2]]
3891uint16x4_t test_vrshl_u16(uint16x4_t a, int16x4_t b) {
3892  return vrshl_u16(a, b);
3893}
3894
3895// CHECK-LABEL: define <2 x i32> @test_vrshl_u32(<2 x i32> %a, <2 x i32> %b) #0 {
3896// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3897// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3898// CHECK:   [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3899// CHECK:   [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3900// CHECK:   [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHL_V_I]], <2 x i32> [[VRSHL_V1_I]]) #4
3901// CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
3902// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <2 x i32>
3903// CHECK:   ret <2 x i32> [[TMP2]]
3904uint32x2_t test_vrshl_u32(uint32x2_t a, int32x2_t b) {
3905  return vrshl_u32(a, b);
3906}
3907
3908// CHECK-LABEL: define <1 x i64> @test_vrshl_u64(<1 x i64> %a, <1 x i64> %b) #0 {
3909// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3910// CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3911// CHECK:   [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3912// CHECK:   [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3913// CHECK:   [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHL_V_I]], <1 x i64> [[VRSHL_V1_I]]) #4
3914// CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
3915// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <1 x i64>
3916// CHECK:   ret <1 x i64> [[TMP2]]
3917uint64x1_t test_vrshl_u64(uint64x1_t a, int64x1_t b) {
3918  return vrshl_u64(a, b);
3919}
3920
3921// CHECK-LABEL: define <16 x i8> @test_vrshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
3922// CHECK:   [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4
3923// CHECK:   ret <16 x i8> [[VRSHLQ_V_I]]
3924int8x16_t test_vrshlq_s8(int8x16_t a, int8x16_t b) {
3925  return vrshlq_s8(a, b);
3926}
3927
3928// CHECK-LABEL: define <8 x i16> @test_vrshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
3929// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3930// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3931// CHECK:   [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3932// CHECK:   [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3933// CHECK:   [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHLQ_V_I]], <8 x i16> [[VRSHLQ_V1_I]]) #4
3934// CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
3935// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <8 x i16>
3936// CHECK:   ret <8 x i16> [[TMP2]]
3937int16x8_t test_vrshlq_s16(int16x8_t a, int16x8_t b) {
3938  return vrshlq_s16(a, b);
3939}
3940
3941// CHECK-LABEL: define <4 x i32> @test_vrshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
3942// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3943// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3944// CHECK:   [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3945// CHECK:   [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3946// CHECK:   [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHLQ_V_I]], <4 x i32> [[VRSHLQ_V1_I]]) #4
3947// CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
3948// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <4 x i32>
3949// CHECK:   ret <4 x i32> [[TMP2]]
3950int32x4_t test_vrshlq_s32(int32x4_t a, int32x4_t b) {
3951  return vrshlq_s32(a, b);
3952}
3953
3954// CHECK-LABEL: define <2 x i64> @test_vrshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
3955// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3956// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3957// CHECK:   [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3958// CHECK:   [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3959// CHECK:   [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHLQ_V_I]], <2 x i64> [[VRSHLQ_V1_I]]) #4
3960// CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
3961// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <2 x i64>
3962// CHECK:   ret <2 x i64> [[TMP2]]
3963int64x2_t test_vrshlq_s64(int64x2_t a, int64x2_t b) {
3964  return vrshlq_s64(a, b);
3965}
3966
3967// CHECK-LABEL: define <16 x i8> @test_vrshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
3968// CHECK:   [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4
3969// CHECK:   ret <16 x i8> [[VRSHLQ_V_I]]
3970uint8x16_t test_vrshlq_u8(uint8x16_t a, int8x16_t b) {
3971  return vrshlq_u8(a, b);
3972}
3973
3974// CHECK-LABEL: define <8 x i16> @test_vrshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
3975// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3976// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3977// CHECK:   [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3978// CHECK:   [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3979// CHECK:   [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHLQ_V_I]], <8 x i16> [[VRSHLQ_V1_I]]) #4
3980// CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
3981// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <8 x i16>
3982// CHECK:   ret <8 x i16> [[TMP2]]
3983uint16x8_t test_vrshlq_u16(uint16x8_t a, int16x8_t b) {
3984  return vrshlq_u16(a, b);
3985}
3986
3987// CHECK-LABEL: define <4 x i32> @test_vrshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
3988// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3989// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3990// CHECK:   [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3991// CHECK:   [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3992// CHECK:   [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHLQ_V_I]], <4 x i32> [[VRSHLQ_V1_I]]) #4
3993// CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
3994// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <4 x i32>
3995// CHECK:   ret <4 x i32> [[TMP2]]
3996uint32x4_t test_vrshlq_u32(uint32x4_t a, int32x4_t b) {
3997  return vrshlq_u32(a, b);
3998}
3999
4000// CHECK-LABEL: define <2 x i64> @test_vrshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
4001// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4002// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
4003// CHECK:   [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4004// CHECK:   [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
4005// CHECK:   [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHLQ_V_I]], <2 x i64> [[VRSHLQ_V1_I]]) #4
4006// CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
4007// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <2 x i64>
4008// CHECK:   ret <2 x i64> [[TMP2]]
4009uint64x2_t test_vrshlq_u64(uint64x2_t a, int64x2_t b) {
4010  return vrshlq_u64(a, b);
4011}
4012
4013
4014// CHECK-LABEL: define <8 x i8> @test_vqrshl_s8(<8 x i8> %a, <8 x i8> %b) #0 {
4015// CHECK:   [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4
4016// CHECK:   ret <8 x i8> [[VQRSHL_V_I]]
4017int8x8_t test_vqrshl_s8(int8x8_t a, int8x8_t b) {
4018  return vqrshl_s8(a, b);
4019}
4020
4021// CHECK-LABEL: define <4 x i16> @test_vqrshl_s16(<4 x i16> %a, <4 x i16> %b) #0 {
4022// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4023// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4024// CHECK:   [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4025// CHECK:   [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4026// CHECK:   [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> [[VQRSHL_V_I]], <4 x i16> [[VQRSHL_V1_I]]) #4
4027// CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
4028// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <4 x i16>
4029// CHECK:   ret <4 x i16> [[TMP2]]
4030int16x4_t test_vqrshl_s16(int16x4_t a, int16x4_t b) {
4031  return vqrshl_s16(a, b);
4032}
4033
4034// CHECK-LABEL: define <2 x i32> @test_vqrshl_s32(<2 x i32> %a, <2 x i32> %b) #0 {
4035// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4036// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4037// CHECK:   [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4038// CHECK:   [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4039// CHECK:   [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> [[VQRSHL_V_I]], <2 x i32> [[VQRSHL_V1_I]]) #4
4040// CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
4041// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <2 x i32>
4042// CHECK:   ret <2 x i32> [[TMP2]]
4043int32x2_t test_vqrshl_s32(int32x2_t a, int32x2_t b) {
4044  return vqrshl_s32(a, b);
4045}
4046
4047// CHECK-LABEL: define <1 x i64> @test_vqrshl_s64(<1 x i64> %a, <1 x i64> %b) #0 {
4048// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
4049// CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
4050// CHECK:   [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
4051// CHECK:   [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
4052// CHECK:   [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> [[VQRSHL_V_I]], <1 x i64> [[VQRSHL_V1_I]]) #4
4053// CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
4054// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <1 x i64>
4055// CHECK:   ret <1 x i64> [[TMP2]]
4056int64x1_t test_vqrshl_s64(int64x1_t a, int64x1_t b) {
4057  return vqrshl_s64(a, b);
4058}
4059
4060// CHECK-LABEL: define <8 x i8> @test_vqrshl_u8(<8 x i8> %a, <8 x i8> %b) #0 {
4061// CHECK:   [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4
4062// CHECK:   ret <8 x i8> [[VQRSHL_V_I]]
4063uint8x8_t test_vqrshl_u8(uint8x8_t a, int8x8_t b) {
4064  return vqrshl_u8(a, b);
4065}
4066
4067// CHECK-LABEL: define <4 x i16> @test_vqrshl_u16(<4 x i16> %a, <4 x i16> %b) #0 {
4068// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4069// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4070// CHECK:   [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4071// CHECK:   [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4072// CHECK:   [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> [[VQRSHL_V_I]], <4 x i16> [[VQRSHL_V1_I]]) #4
4073// CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
4074// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <4 x i16>
4075// CHECK:   ret <4 x i16> [[TMP2]]
4076uint16x4_t test_vqrshl_u16(uint16x4_t a, int16x4_t b) {
4077  return vqrshl_u16(a, b);
4078}
4079
4080// CHECK-LABEL: define <2 x i32> @test_vqrshl_u32(<2 x i32> %a, <2 x i32> %b) #0 {
4081// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4082// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4083// CHECK:   [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4084// CHECK:   [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4085// CHECK:   [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> [[VQRSHL_V_I]], <2 x i32> [[VQRSHL_V1_I]]) #4
4086// CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
4087// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <2 x i32>
4088// CHECK:   ret <2 x i32> [[TMP2]]
4089uint32x2_t test_vqrshl_u32(uint32x2_t a, int32x2_t b) {
4090  return vqrshl_u32(a, b);
4091}
4092
4093// CHECK-LABEL: define <1 x i64> @test_vqrshl_u64(<1 x i64> %a, <1 x i64> %b) #0 {
4094// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
4095// CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
4096// CHECK:   [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
4097// CHECK:   [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
4098// CHECK:   [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> [[VQRSHL_V_I]], <1 x i64> [[VQRSHL_V1_I]]) #4
4099// CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
4100// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <1 x i64>
4101// CHECK:   ret <1 x i64> [[TMP2]]
4102uint64x1_t test_vqrshl_u64(uint64x1_t a, int64x1_t b) {
4103  return vqrshl_u64(a, b);
4104}
4105
4106// CHECK-LABEL: define <16 x i8> @test_vqrshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
4107// CHECK:   [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4
4108// CHECK:   ret <16 x i8> [[VQRSHLQ_V_I]]
4109int8x16_t test_vqrshlq_s8(int8x16_t a, int8x16_t b) {
4110  return vqrshlq_s8(a, b);
4111}
4112
4113// CHECK-LABEL: define <8 x i16> @test_vqrshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
4114// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4115// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4116// CHECK:   [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4117// CHECK:   [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4118// CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> [[VQRSHLQ_V_I]], <8 x i16> [[VQRSHLQ_V1_I]]) #4
4119// CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
4120// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <8 x i16>
4121// CHECK:   ret <8 x i16> [[TMP2]]
4122int16x8_t test_vqrshlq_s16(int16x8_t a, int16x8_t b) {
4123  return vqrshlq_s16(a, b);
4124}
4125
4126// CHECK-LABEL: define <4 x i32> @test_vqrshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
4127// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4128// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4129// CHECK:   [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4130// CHECK:   [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4131// CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> [[VQRSHLQ_V_I]], <4 x i32> [[VQRSHLQ_V1_I]]) #4
4132// CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
4133// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <4 x i32>
4134// CHECK:   ret <4 x i32> [[TMP2]]
4135int32x4_t test_vqrshlq_s32(int32x4_t a, int32x4_t b) {
4136  return vqrshlq_s32(a, b);
4137}
4138
4139// CHECK-LABEL: define <2 x i64> @test_vqrshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
4140// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4141// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
4142// CHECK:   [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4143// CHECK:   [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
4144// CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> [[VQRSHLQ_V_I]], <2 x i64> [[VQRSHLQ_V1_I]]) #4
4145// CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
4146// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <2 x i64>
4147// CHECK:   ret <2 x i64> [[TMP2]]
4148int64x2_t test_vqrshlq_s64(int64x2_t a, int64x2_t b) {
4149  return vqrshlq_s64(a, b);
4150}
4151
4152// CHECK-LABEL: define <16 x i8> @test_vqrshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
4153// CHECK:   [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4
4154// CHECK:   ret <16 x i8> [[VQRSHLQ_V_I]]
4155uint8x16_t test_vqrshlq_u8(uint8x16_t a, int8x16_t b) {
4156  return vqrshlq_u8(a, b);
4157}
4158
4159// CHECK-LABEL: define <8 x i16> @test_vqrshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
4160// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4161// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4162// CHECK:   [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4163// CHECK:   [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4164// CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> [[VQRSHLQ_V_I]], <8 x i16> [[VQRSHLQ_V1_I]]) #4
4165// CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
4166// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <8 x i16>
4167// CHECK:   ret <8 x i16> [[TMP2]]
4168uint16x8_t test_vqrshlq_u16(uint16x8_t a, int16x8_t b) {
4169  return vqrshlq_u16(a, b);
4170}
4171
4172// CHECK-LABEL: define <4 x i32> @test_vqrshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
4173// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4174// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4175// CHECK:   [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4176// CHECK:   [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4177// CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> [[VQRSHLQ_V_I]], <4 x i32> [[VQRSHLQ_V1_I]]) #4
4178// CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
4179// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <4 x i32>
4180// CHECK:   ret <4 x i32> [[TMP2]]
4181uint32x4_t test_vqrshlq_u32(uint32x4_t a, int32x4_t b) {
4182  return vqrshlq_u32(a, b);
4183}
4184
4185// CHECK-LABEL: define <2 x i64> @test_vqrshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
4186// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4187// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
4188// CHECK:   [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4189// CHECK:   [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
4190// CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> [[VQRSHLQ_V_I]], <2 x i64> [[VQRSHLQ_V1_I]]) #4
4191// CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
4192// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <2 x i64>
4193// CHECK:   ret <2 x i64> [[TMP2]]
4194uint64x2_t test_vqrshlq_u64(uint64x2_t a, int64x2_t b) {
4195  return vqrshlq_u64(a, b);
4196}
4197
4198// CHECK-LABEL: define <1 x i64> @test_vsli_n_p64(<1 x i64> %a, <1 x i64> %b) #0 {
4199// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
4200// CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
4201// CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
4202// CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
4203// CHECK:   [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 0)
4204// CHECK:   ret <1 x i64> [[VSLI_N2]]
4205poly64x1_t test_vsli_n_p64(poly64x1_t a, poly64x1_t b) {
4206  return vsli_n_p64(a, b, 0);
4207}
4208
4209// CHECK-LABEL: define <2 x i64> @test_vsliq_n_p64(<2 x i64> %a, <2 x i64> %b) #0 {
4210// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4211// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
4212// CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4213// CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
4214// CHECK:   [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 0)
4215// CHECK:   ret <2 x i64> [[VSLI_N2]]
4216poly64x2_t test_vsliq_n_p64(poly64x2_t a, poly64x2_t b) {
4217  return vsliq_n_p64(a, b, 0);
4218}
4219
4220// CHECK-LABEL: define <8 x i8> @test_vmax_s8(<8 x i8> %a, <8 x i8> %b) #0 {
4221// CHECK:   [[VMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8> %a, <8 x i8> %b) #4
4222// CHECK:   ret <8 x i8> [[VMAX_I]]
4223int8x8_t test_vmax_s8(int8x8_t a, int8x8_t b) {
4224  return vmax_s8(a, b);
4225}
4226
4227// CHECK-LABEL: define <4 x i16> @test_vmax_s16(<4 x i16> %a, <4 x i16> %b) #0 {
4228// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4229// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4230// CHECK:   [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4231// CHECK:   [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4232// CHECK:   [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16> [[VMAX_I]], <4 x i16> [[VMAX1_I]]) #4
4233// CHECK:   ret <4 x i16> [[VMAX2_I]]
4234int16x4_t test_vmax_s16(int16x4_t a, int16x4_t b) {
4235  return vmax_s16(a, b);
4236}
4237
4238// CHECK-LABEL: define <2 x i32> @test_vmax_s32(<2 x i32> %a, <2 x i32> %b) #0 {
4239// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4240// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4241// CHECK:   [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4242// CHECK:   [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4243// CHECK:   [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32> [[VMAX_I]], <2 x i32> [[VMAX1_I]]) #4
4244// CHECK:   ret <2 x i32> [[VMAX2_I]]
4245int32x2_t test_vmax_s32(int32x2_t a, int32x2_t b) {
4246  return vmax_s32(a, b);
4247}
4248
4249// CHECK-LABEL: define <8 x i8> @test_vmax_u8(<8 x i8> %a, <8 x i8> %b) #0 {
4250// CHECK:   [[VMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8> %a, <8 x i8> %b) #4
4251// CHECK:   ret <8 x i8> [[VMAX_I]]
4252uint8x8_t test_vmax_u8(uint8x8_t a, uint8x8_t b) {
4253  return vmax_u8(a, b);
4254}
4255
4256// CHECK-LABEL: define <4 x i16> @test_vmax_u16(<4 x i16> %a, <4 x i16> %b) #0 {
4257// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4258// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4259// CHECK:   [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4260// CHECK:   [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4261// CHECK:   [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16> [[VMAX_I]], <4 x i16> [[VMAX1_I]]) #4
4262// CHECK:   ret <4 x i16> [[VMAX2_I]]
4263uint16x4_t test_vmax_u16(uint16x4_t a, uint16x4_t b) {
4264  return vmax_u16(a, b);
4265}
4266
4267// CHECK-LABEL: define <2 x i32> @test_vmax_u32(<2 x i32> %a, <2 x i32> %b) #0 {
4268// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4269// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4270// CHECK:   [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4271// CHECK:   [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4272// CHECK:   [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32> [[VMAX_I]], <2 x i32> [[VMAX1_I]]) #4
4273// CHECK:   ret <2 x i32> [[VMAX2_I]]
4274uint32x2_t test_vmax_u32(uint32x2_t a, uint32x2_t b) {
4275  return vmax_u32(a, b);
4276}
4277
4278// CHECK-LABEL: define <2 x float> @test_vmax_f32(<2 x float> %a, <2 x float> %b) #0 {
4279// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4280// CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4281// CHECK:   [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
4282// CHECK:   [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
4283// CHECK:   [[VMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float> [[VMAX_I]], <2 x float> [[VMAX1_I]]) #4
4284// CHECK:   ret <2 x float> [[VMAX2_I]]
4285float32x2_t test_vmax_f32(float32x2_t a, float32x2_t b) {
4286  return vmax_f32(a, b);
4287}
4288
4289// CHECK-LABEL: define <16 x i8> @test_vmaxq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
4290// CHECK:   [[VMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8> %a, <16 x i8> %b) #4
4291// CHECK:   ret <16 x i8> [[VMAX_I]]
4292int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b) {
4293  return vmaxq_s8(a, b);
4294}
4295
4296// CHECK-LABEL: define <8 x i16> @test_vmaxq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
4297// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4298// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4299// CHECK:   [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4300// CHECK:   [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4301// CHECK:   [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16> [[VMAX_I]], <8 x i16> [[VMAX1_I]]) #4
4302// CHECK:   ret <8 x i16> [[VMAX2_I]]
4303int16x8_t test_vmaxq_s16(int16x8_t a, int16x8_t b) {
4304  return vmaxq_s16(a, b);
4305}
4306
4307// CHECK-LABEL: define <4 x i32> @test_vmaxq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
4308// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4309// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4310// CHECK:   [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4311// CHECK:   [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4312// CHECK:   [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> [[VMAX_I]], <4 x i32> [[VMAX1_I]]) #4
4313// CHECK:   ret <4 x i32> [[VMAX2_I]]
4314int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b) {
4315  return vmaxq_s32(a, b);
4316}
4317
4318// CHECK-LABEL: define <16 x i8> @test_vmaxq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
4319// CHECK:   [[VMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8> %a, <16 x i8> %b) #4
4320// CHECK:   ret <16 x i8> [[VMAX_I]]
4321uint8x16_t test_vmaxq_u8(uint8x16_t a, uint8x16_t b) {
4322  return vmaxq_u8(a, b);
4323}
4324
4325// CHECK-LABEL: define <8 x i16> @test_vmaxq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
4326// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4327// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4328// CHECK:   [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4329// CHECK:   [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4330// CHECK:   [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16> [[VMAX_I]], <8 x i16> [[VMAX1_I]]) #4
4331// CHECK:   ret <8 x i16> [[VMAX2_I]]
4332uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b) {
4333  return vmaxq_u16(a, b);
4334}
4335
4336// CHECK-LABEL: define <4 x i32> @test_vmaxq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
4337// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4338// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4339// CHECK:   [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4340// CHECK:   [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4341// CHECK:   [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> [[VMAX_I]], <4 x i32> [[VMAX1_I]]) #4
4342// CHECK:   ret <4 x i32> [[VMAX2_I]]
4343uint32x4_t test_vmaxq_u32(uint32x4_t a, uint32x4_t b) {
4344  return vmaxq_u32(a, b);
4345}
4346
4347// CHECK-LABEL: define <4 x float> @test_vmaxq_f32(<4 x float> %a, <4 x float> %b) #0 {
4348// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4349// CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4350// CHECK:   [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
4351// CHECK:   [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
4352// CHECK:   [[VMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> [[VMAX_I]], <4 x float> [[VMAX1_I]]) #4
4353// CHECK:   ret <4 x float> [[VMAX2_I]]
4354float32x4_t test_vmaxq_f32(float32x4_t a, float32x4_t b) {
4355  return vmaxq_f32(a, b);
4356}
4357
4358// CHECK-LABEL: define <2 x double> @test_vmaxq_f64(<2 x double> %a, <2 x double> %b) #0 {
4359// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4360// CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4361// CHECK:   [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
4362// CHECK:   [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
4363// CHECK:   [[VMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double> [[VMAX_I]], <2 x double> [[VMAX1_I]]) #4
4364// CHECK:   ret <2 x double> [[VMAX2_I]]
4365float64x2_t test_vmaxq_f64(float64x2_t a, float64x2_t b) {
4366  return vmaxq_f64(a, b);
4367}
4368
4369
4370// CHECK-LABEL: define <8 x i8> @test_vmin_s8(<8 x i8> %a, <8 x i8> %b) #0 {
4371// CHECK:   [[VMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8> %a, <8 x i8> %b) #4
4372// CHECK:   ret <8 x i8> [[VMIN_I]]
4373int8x8_t test_vmin_s8(int8x8_t a, int8x8_t b) {
4374  return vmin_s8(a, b);
4375}
4376
4377// CHECK-LABEL: define <4 x i16> @test_vmin_s16(<4 x i16> %a, <4 x i16> %b) #0 {
4378// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4379// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4380// CHECK:   [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4381// CHECK:   [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4382// CHECK:   [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16> [[VMIN_I]], <4 x i16> [[VMIN1_I]]) #4
4383// CHECK:   ret <4 x i16> [[VMIN2_I]]
4384int16x4_t test_vmin_s16(int16x4_t a, int16x4_t b) {
4385  return vmin_s16(a, b);
4386}
4387
4388// CHECK-LABEL: define <2 x i32> @test_vmin_s32(<2 x i32> %a, <2 x i32> %b) #0 {
4389// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4390// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4391// CHECK:   [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4392// CHECK:   [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4393// CHECK:   [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32> [[VMIN_I]], <2 x i32> [[VMIN1_I]]) #4
4394// CHECK:   ret <2 x i32> [[VMIN2_I]]
4395int32x2_t test_vmin_s32(int32x2_t a, int32x2_t b) {
4396  return vmin_s32(a, b);
4397}
4398
4399// CHECK-LABEL: define <8 x i8> @test_vmin_u8(<8 x i8> %a, <8 x i8> %b) #0 {
4400// CHECK:   [[VMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8> %a, <8 x i8> %b) #4
4401// CHECK:   ret <8 x i8> [[VMIN_I]]
4402uint8x8_t test_vmin_u8(uint8x8_t a, uint8x8_t b) {
4403  return vmin_u8(a, b);
4404}
4405
4406// CHECK-LABEL: define <4 x i16> @test_vmin_u16(<4 x i16> %a, <4 x i16> %b) #0 {
4407// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4408// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4409// CHECK:   [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4410// CHECK:   [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4411// CHECK:   [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16> [[VMIN_I]], <4 x i16> [[VMIN1_I]]) #4
4412// CHECK:   ret <4 x i16> [[VMIN2_I]]
4413uint16x4_t test_vmin_u16(uint16x4_t a, uint16x4_t b) {
4414  return vmin_u16(a, b);
4415}
4416
4417// CHECK-LABEL: define <2 x i32> @test_vmin_u32(<2 x i32> %a, <2 x i32> %b) #0 {
4418// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4419// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4420// CHECK:   [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4421// CHECK:   [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4422// CHECK:   [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32> [[VMIN_I]], <2 x i32> [[VMIN1_I]]) #4
4423// CHECK:   ret <2 x i32> [[VMIN2_I]]
4424uint32x2_t test_vmin_u32(uint32x2_t a, uint32x2_t b) {
4425  return vmin_u32(a, b);
4426}
4427
4428// CHECK-LABEL: define <2 x float> @test_vmin_f32(<2 x float> %a, <2 x float> %b) #0 {
4429// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4430// CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4431// CHECK:   [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
4432// CHECK:   [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
4433// CHECK:   [[VMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float> [[VMIN_I]], <2 x float> [[VMIN1_I]]) #4
4434// CHECK:   ret <2 x float> [[VMIN2_I]]
4435float32x2_t test_vmin_f32(float32x2_t a, float32x2_t b) {
4436  return vmin_f32(a, b);
4437}
4438
4439// CHECK-LABEL: define <16 x i8> @test_vminq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
4440// CHECK:   [[VMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8> %a, <16 x i8> %b) #4
4441// CHECK:   ret <16 x i8> [[VMIN_I]]
4442int8x16_t test_vminq_s8(int8x16_t a, int8x16_t b) {
4443  return vminq_s8(a, b);
4444}
4445
4446// CHECK-LABEL: define <8 x i16> @test_vminq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
4447// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4448// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4449// CHECK:   [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4450// CHECK:   [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4451// CHECK:   [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16> [[VMIN_I]], <8 x i16> [[VMIN1_I]]) #4
4452// CHECK:   ret <8 x i16> [[VMIN2_I]]
4453int16x8_t test_vminq_s16(int16x8_t a, int16x8_t b) {
4454  return vminq_s16(a, b);
4455}
4456
4457// CHECK-LABEL: define <4 x i32> @test_vminq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
4458// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4459// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4460// CHECK:   [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4461// CHECK:   [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4462// CHECK:   [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> [[VMIN_I]], <4 x i32> [[VMIN1_I]]) #4
4463// CHECK:   ret <4 x i32> [[VMIN2_I]]
4464int32x4_t test_vminq_s32(int32x4_t a, int32x4_t b) {
4465  return vminq_s32(a, b);
4466}
4467
4468// CHECK-LABEL: define <16 x i8> @test_vminq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
4469// CHECK:   [[VMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8> %a, <16 x i8> %b) #4
4470// CHECK:   ret <16 x i8> [[VMIN_I]]
4471uint8x16_t test_vminq_u8(uint8x16_t a, uint8x16_t b) {
4472  return vminq_u8(a, b);
4473}
4474
4475// CHECK-LABEL: define <8 x i16> @test_vminq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
4476// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4477// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4478// CHECK:   [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4479// CHECK:   [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4480// CHECK:   [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16> [[VMIN_I]], <8 x i16> [[VMIN1_I]]) #4
4481// CHECK:   ret <8 x i16> [[VMIN2_I]]
4482uint16x8_t test_vminq_u16(uint16x8_t a, uint16x8_t b) {
4483  return vminq_u16(a, b);
4484}
4485
4486// CHECK-LABEL: define <4 x i32> @test_vminq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
4487// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4488// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4489// CHECK:   [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4490// CHECK:   [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4491// CHECK:   [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> [[VMIN_I]], <4 x i32> [[VMIN1_I]]) #4
4492// CHECK:   ret <4 x i32> [[VMIN2_I]]
4493uint32x4_t test_vminq_u32(uint32x4_t a, uint32x4_t b) {
4494  return vminq_u32(a, b);
4495}
4496
4497// CHECK-LABEL: define <4 x float> @test_vminq_f32(<4 x float> %a, <4 x float> %b) #0 {
4498// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4499// CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4500// CHECK:   [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
4501// CHECK:   [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
4502// CHECK:   [[VMIN2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float> [[VMIN_I]], <4 x float> [[VMIN1_I]]) #4
4503// CHECK:   ret <4 x float> [[VMIN2_I]]
4504float32x4_t test_vminq_f32(float32x4_t a, float32x4_t b) {
4505  return vminq_f32(a, b);
4506}
4507
4508// CHECK-LABEL: define <2 x double> @test_vminq_f64(<2 x double> %a, <2 x double> %b) #0 {
4509// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4510// CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4511// CHECK:   [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
4512// CHECK:   [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
4513// CHECK:   [[VMIN2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double> [[VMIN_I]], <2 x double> [[VMIN1_I]]) #4
4514// CHECK:   ret <2 x double> [[VMIN2_I]]
4515float64x2_t test_vminq_f64(float64x2_t a, float64x2_t b) {
4516  return vminq_f64(a, b);
4517}
4518
4519// CHECK-LABEL: define <2 x float> @test_vmaxnm_f32(<2 x float> %a, <2 x float> %b) #0 {
4520// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4521// CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4522// CHECK:   [[VMAXNM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
4523// CHECK:   [[VMAXNM1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
4524// CHECK:   [[VMAXNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnm.v2f32(<2 x float> [[VMAXNM_I]], <2 x float> [[VMAXNM1_I]]) #4
4525// CHECK:   ret <2 x float> [[VMAXNM2_I]]
4526float32x2_t test_vmaxnm_f32(float32x2_t a, float32x2_t b) {
4527  return vmaxnm_f32(a, b);
4528}
4529
4530// CHECK-LABEL: define <4 x float> @test_vmaxnmq_f32(<4 x float> %a, <4 x float> %b) #0 {
4531// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4532// CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4533// CHECK:   [[VMAXNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
4534// CHECK:   [[VMAXNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
4535// CHECK:   [[VMAXNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnm.v4f32(<4 x float> [[VMAXNM_I]], <4 x float> [[VMAXNM1_I]]) #4
4536// CHECK:   ret <4 x float> [[VMAXNM2_I]]
4537float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b) {
4538  return vmaxnmq_f32(a, b);
4539}
4540
4541// CHECK-LABEL: define <2 x double> @test_vmaxnmq_f64(<2 x double> %a, <2 x double> %b) #0 {
4542// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4543// CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4544// CHECK:   [[VMAXNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
4545// CHECK:   [[VMAXNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
4546// CHECK:   [[VMAXNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnm.v2f64(<2 x double> [[VMAXNM_I]], <2 x double> [[VMAXNM1_I]]) #4
4547// CHECK:   ret <2 x double> [[VMAXNM2_I]]
4548float64x2_t test_vmaxnmq_f64(float64x2_t a, float64x2_t b) {
4549  return vmaxnmq_f64(a, b);
4550}
4551
4552// CHECK-LABEL: define <2 x float> @test_vminnm_f32(<2 x float> %a, <2 x float> %b) #0 {
4553// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4554// CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4555// CHECK:   [[VMINNM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
4556// CHECK:   [[VMINNM1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
4557// CHECK:   [[VMINNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnm.v2f32(<2 x float> [[VMINNM_I]], <2 x float> [[VMINNM1_I]]) #4
4558// CHECK:   ret <2 x float> [[VMINNM2_I]]
4559float32x2_t test_vminnm_f32(float32x2_t a, float32x2_t b) {
4560  return vminnm_f32(a, b);
4561}
4562
4563// CHECK-LABEL: define <4 x float> @test_vminnmq_f32(<4 x float> %a, <4 x float> %b) #0 {
4564// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4565// CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4566// CHECK:   [[VMINNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
4567// CHECK:   [[VMINNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
4568// CHECK:   [[VMINNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnm.v4f32(<4 x float> [[VMINNM_I]], <4 x float> [[VMINNM1_I]]) #4
4569// CHECK:   ret <4 x float> [[VMINNM2_I]]
4570float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b) {
4571  return vminnmq_f32(a, b);
4572}
4573
4574// CHECK-LABEL: define <2 x double> @test_vminnmq_f64(<2 x double> %a, <2 x double> %b) #0 {
4575// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4576// CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4577// CHECK:   [[VMINNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
4578// CHECK:   [[VMINNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
4579// CHECK:   [[VMINNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnm.v2f64(<2 x double> [[VMINNM_I]], <2 x double> [[VMINNM1_I]]) #4
4580// CHECK:   ret <2 x double> [[VMINNM2_I]]
4581float64x2_t test_vminnmq_f64(float64x2_t a, float64x2_t b) {
4582  return vminnmq_f64(a, b);
4583}
4584
4585// CHECK-LABEL: define <8 x i8> @test_vpmax_s8(<8 x i8> %a, <8 x i8> %b) #0 {
4586// CHECK:   [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8> %a, <8 x i8> %b) #4
4587// CHECK:   ret <8 x i8> [[VPMAX_I]]
4588int8x8_t test_vpmax_s8(int8x8_t a, int8x8_t b) {
4589  return vpmax_s8(a, b);
4590}
4591
4592// CHECK-LABEL: define <4 x i16> @test_vpmax_s16(<4 x i16> %a, <4 x i16> %b) #0 {
4593// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4594// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4595// CHECK:   [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4596// CHECK:   [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4597// CHECK:   [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16> [[VPMAX_I]], <4 x i16> [[VPMAX1_I]]) #4
4598// CHECK:   ret <4 x i16> [[VPMAX2_I]]
4599int16x4_t test_vpmax_s16(int16x4_t a, int16x4_t b) {
4600  return vpmax_s16(a, b);
4601}
4602
4603// CHECK-LABEL: define <2 x i32> @test_vpmax_s32(<2 x i32> %a, <2 x i32> %b) #0 {
4604// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4605// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4606// CHECK:   [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4607// CHECK:   [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4608// CHECK:   [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32> [[VPMAX_I]], <2 x i32> [[VPMAX1_I]]) #4
4609// CHECK:   ret <2 x i32> [[VPMAX2_I]]
4610int32x2_t test_vpmax_s32(int32x2_t a, int32x2_t b) {
4611  return vpmax_s32(a, b);
4612}
4613
4614// CHECK-LABEL: define <8 x i8> @test_vpmax_u8(<8 x i8> %a, <8 x i8> %b) #0 {
4615// CHECK:   [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8> %a, <8 x i8> %b) #4
4616// CHECK:   ret <8 x i8> [[VPMAX_I]]
4617uint8x8_t test_vpmax_u8(uint8x8_t a, uint8x8_t b) {
4618  return vpmax_u8(a, b);
4619}
4620
4621// CHECK-LABEL: define <4 x i16> @test_vpmax_u16(<4 x i16> %a, <4 x i16> %b) #0 {
4622// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4623// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4624// CHECK:   [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4625// CHECK:   [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4626// CHECK:   [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16> [[VPMAX_I]], <4 x i16> [[VPMAX1_I]]) #4
4627// CHECK:   ret <4 x i16> [[VPMAX2_I]]
4628uint16x4_t test_vpmax_u16(uint16x4_t a, uint16x4_t b) {
4629  return vpmax_u16(a, b);
4630}
4631
4632// CHECK-LABEL: define <2 x i32> @test_vpmax_u32(<2 x i32> %a, <2 x i32> %b) #0 {
4633// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4634// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4635// CHECK:   [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4636// CHECK:   [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4637// CHECK:   [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32> [[VPMAX_I]], <2 x i32> [[VPMAX1_I]]) #4
4638// CHECK:   ret <2 x i32> [[VPMAX2_I]]
4639uint32x2_t test_vpmax_u32(uint32x2_t a, uint32x2_t b) {
4640  return vpmax_u32(a, b);
4641}
4642
4643// CHECK-LABEL: define <2 x float> @test_vpmax_f32(<2 x float> %a, <2 x float> %b) #0 {
4644// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4645// CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4646// CHECK:   [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
4647// CHECK:   [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
4648// CHECK:   [[VPMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float> [[VPMAX_I]], <2 x float> [[VPMAX1_I]]) #4
4649// CHECK:   ret <2 x float> [[VPMAX2_I]]
4650float32x2_t test_vpmax_f32(float32x2_t a, float32x2_t b) {
4651  return vpmax_f32(a, b);
4652}
4653
4654// CHECK-LABEL: define <16 x i8> @test_vpmaxq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
4655// CHECK:   [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8> %a, <16 x i8> %b) #4
4656// CHECK:   ret <16 x i8> [[VPMAX_I]]
4657int8x16_t test_vpmaxq_s8(int8x16_t a, int8x16_t b) {
4658  return vpmaxq_s8(a, b);
4659}
4660
4661// CHECK-LABEL: define <8 x i16> @test_vpmaxq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
4662// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4663// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4664// CHECK:   [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4665// CHECK:   [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4666// CHECK:   [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16> [[VPMAX_I]], <8 x i16> [[VPMAX1_I]]) #4
4667// CHECK:   ret <8 x i16> [[VPMAX2_I]]
4668int16x8_t test_vpmaxq_s16(int16x8_t a, int16x8_t b) {
4669  return vpmaxq_s16(a, b);
4670}
4671
4672// CHECK-LABEL: define <4 x i32> @test_vpmaxq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
4673// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4674// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4675// CHECK:   [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4676// CHECK:   [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4677// CHECK:   [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32> [[VPMAX_I]], <4 x i32> [[VPMAX1_I]]) #4
4678// CHECK:   ret <4 x i32> [[VPMAX2_I]]
4679int32x4_t test_vpmaxq_s32(int32x4_t a, int32x4_t b) {
4680  return vpmaxq_s32(a, b);
4681}
4682
4683// CHECK-LABEL: define <16 x i8> @test_vpmaxq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
4684// CHECK:   [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8> %a, <16 x i8> %b) #4
4685// CHECK:   ret <16 x i8> [[VPMAX_I]]
4686uint8x16_t test_vpmaxq_u8(uint8x16_t a, uint8x16_t b) {
4687  return vpmaxq_u8(a, b);
4688}
4689
4690// CHECK-LABEL: define <8 x i16> @test_vpmaxq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
4691// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4692// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4693// CHECK:   [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4694// CHECK:   [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4695// CHECK:   [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16> [[VPMAX_I]], <8 x i16> [[VPMAX1_I]]) #4
4696// CHECK:   ret <8 x i16> [[VPMAX2_I]]
4697uint16x8_t test_vpmaxq_u16(uint16x8_t a, uint16x8_t b) {
4698  return vpmaxq_u16(a, b);
4699}
4700
4701// CHECK-LABEL: define <4 x i32> @test_vpmaxq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
4702// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4703// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4704// CHECK:   [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4705// CHECK:   [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4706// CHECK:   [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32> [[VPMAX_I]], <4 x i32> [[VPMAX1_I]]) #4
4707// CHECK:   ret <4 x i32> [[VPMAX2_I]]
4708uint32x4_t test_vpmaxq_u32(uint32x4_t a, uint32x4_t b) {
4709  return vpmaxq_u32(a, b);
4710}
4711
4712// CHECK-LABEL: define <4 x float> @test_vpmaxq_f32(<4 x float> %a, <4 x float> %b) #0 {
4713// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4714// CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4715// CHECK:   [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
4716// CHECK:   [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
4717// CHECK:   [[VPMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float> [[VPMAX_I]], <4 x float> [[VPMAX1_I]]) #4
4718// CHECK:   ret <4 x float> [[VPMAX2_I]]
4719float32x4_t test_vpmaxq_f32(float32x4_t a, float32x4_t b) {
4720  return vpmaxq_f32(a, b);
4721}
4722
4723// CHECK-LABEL: define <2 x double> @test_vpmaxq_f64(<2 x double> %a, <2 x double> %b) #0 {
4724// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4725// CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4726// CHECK:   [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
4727// CHECK:   [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
4728// CHECK:   [[VPMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double> [[VPMAX_I]], <2 x double> [[VPMAX1_I]]) #4
4729// CHECK:   ret <2 x double> [[VPMAX2_I]]
4730float64x2_t test_vpmaxq_f64(float64x2_t a, float64x2_t b) {
4731  return vpmaxq_f64(a, b);
4732}
4733
4734// CHECK-LABEL: define <8 x i8> @test_vpmin_s8(<8 x i8> %a, <8 x i8> %b) #0 {
4735// CHECK:   [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8> %a, <8 x i8> %b) #4
4736// CHECK:   ret <8 x i8> [[VPMIN_I]]
4737int8x8_t test_vpmin_s8(int8x8_t a, int8x8_t b) {
4738  return vpmin_s8(a, b);
4739}
4740
4741// CHECK-LABEL: define <4 x i16> @test_vpmin_s16(<4 x i16> %a, <4 x i16> %b) #0 {
4742// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4743// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4744// CHECK:   [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4745// CHECK:   [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4746// CHECK:   [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16> [[VPMIN_I]], <4 x i16> [[VPMIN1_I]]) #4
4747// CHECK:   ret <4 x i16> [[VPMIN2_I]]
4748int16x4_t test_vpmin_s16(int16x4_t a, int16x4_t b) {
4749  return vpmin_s16(a, b);
4750}
4751
4752// CHECK-LABEL: define <2 x i32> @test_vpmin_s32(<2 x i32> %a, <2 x i32> %b) #0 {
4753// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4754// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4755// CHECK:   [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4756// CHECK:   [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4757// CHECK:   [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32> [[VPMIN_I]], <2 x i32> [[VPMIN1_I]]) #4
4758// CHECK:   ret <2 x i32> [[VPMIN2_I]]
4759int32x2_t test_vpmin_s32(int32x2_t a, int32x2_t b) {
4760  return vpmin_s32(a, b);
4761}
4762
4763// CHECK-LABEL: define <8 x i8> @test_vpmin_u8(<8 x i8> %a, <8 x i8> %b) #0 {
4764// CHECK:   [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8> %a, <8 x i8> %b) #4
4765// CHECK:   ret <8 x i8> [[VPMIN_I]]
4766uint8x8_t test_vpmin_u8(uint8x8_t a, uint8x8_t b) {
4767  return vpmin_u8(a, b);
4768}
4769
4770// CHECK-LABEL: define <4 x i16> @test_vpmin_u16(<4 x i16> %a, <4 x i16> %b) #0 {
4771// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4772// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4773// CHECK:   [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4774// CHECK:   [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4775// CHECK:   [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16> [[VPMIN_I]], <4 x i16> [[VPMIN1_I]]) #4
4776// CHECK:   ret <4 x i16> [[VPMIN2_I]]
4777uint16x4_t test_vpmin_u16(uint16x4_t a, uint16x4_t b) {
4778  return vpmin_u16(a, b);
4779}
4780
4781// CHECK-LABEL: define <2 x i32> @test_vpmin_u32(<2 x i32> %a, <2 x i32> %b) #0 {
4782// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4783// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4784// CHECK:   [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4785// CHECK:   [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4786// CHECK:   [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32> [[VPMIN_I]], <2 x i32> [[VPMIN1_I]]) #4
4787// CHECK:   ret <2 x i32> [[VPMIN2_I]]
4788uint32x2_t test_vpmin_u32(uint32x2_t a, uint32x2_t b) {
4789  return vpmin_u32(a, b);
4790}
4791
4792// CHECK-LABEL: define <2 x float> @test_vpmin_f32(<2 x float> %a, <2 x float> %b) #0 {
4793// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4794// CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4795// CHECK:   [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
4796// CHECK:   [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
4797// CHECK:   [[VPMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float> [[VPMIN_I]], <2 x float> [[VPMIN1_I]]) #4
4798// CHECK:   ret <2 x float> [[VPMIN2_I]]
4799float32x2_t test_vpmin_f32(float32x2_t a, float32x2_t b) {
4800  return vpmin_f32(a, b);
4801}
4802
4803// CHECK-LABEL: define <16 x i8> @test_vpminq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
4804// CHECK:   [[VPMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8> %a, <16 x i8> %b) #4
4805// CHECK:   ret <16 x i8> [[VPMIN_I]]
4806int8x16_t test_vpminq_s8(int8x16_t a, int8x16_t b) {
4807  return vpminq_s8(a, b);
4808}
4809
4810// CHECK-LABEL: define <8 x i16> @test_vpminq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
4811// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4812// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4813// CHECK:   [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4814// CHECK:   [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4815// CHECK:   [[VPMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16> [[VPMIN_I]], <8 x i16> [[VPMIN1_I]]) #4
4816// CHECK:   ret <8 x i16> [[VPMIN2_I]]
4817int16x8_t test_vpminq_s16(int16x8_t a, int16x8_t b) {
4818  return vpminq_s16(a, b);
4819}
4820
4821// CHECK-LABEL: define <4 x i32> @test_vpminq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
4822// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4823// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4824// CHECK:   [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4825// CHECK:   [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4826// CHECK:   [[VPMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32> [[VPMIN_I]], <4 x i32> [[VPMIN1_I]]) #4
4827// CHECK:   ret <4 x i32> [[VPMIN2_I]]
4828int32x4_t test_vpminq_s32(int32x4_t a, int32x4_t b) {
4829  return vpminq_s32(a, b);
4830}
4831
4832// CHECK-LABEL: define <16 x i8> @test_vpminq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
4833// CHECK:   [[VPMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8> %a, <16 x i8> %b) #4
4834// CHECK:   ret <16 x i8> [[VPMIN_I]]
4835uint8x16_t test_vpminq_u8(uint8x16_t a, uint8x16_t b) {
4836  return vpminq_u8(a, b);
4837}
4838
4839// CHECK-LABEL: define <8 x i16> @test_vpminq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
4840// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4841// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4842// CHECK:   [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4843// CHECK:   [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4844// CHECK:   [[VPMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16> [[VPMIN_I]], <8 x i16> [[VPMIN1_I]]) #4
4845// CHECK:   ret <8 x i16> [[VPMIN2_I]]
4846uint16x8_t test_vpminq_u16(uint16x8_t a, uint16x8_t b) {
4847  return vpminq_u16(a, b);
4848}
4849
4850// CHECK-LABEL: define <4 x i32> @test_vpminq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
4851// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4852// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4853// CHECK:   [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4854// CHECK:   [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4855// CHECK:   [[VPMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32> [[VPMIN_I]], <4 x i32> [[VPMIN1_I]]) #4
4856// CHECK:   ret <4 x i32> [[VPMIN2_I]]
4857uint32x4_t test_vpminq_u32(uint32x4_t a, uint32x4_t b) {
4858  return vpminq_u32(a, b);
4859}
4860
4861// CHECK-LABEL: define <4 x float> @test_vpminq_f32(<4 x float> %a, <4 x float> %b) #0 {
4862// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4863// CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4864// CHECK:   [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
4865// CHECK:   [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
4866// CHECK:   [[VPMIN2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float> [[VPMIN_I]], <4 x float> [[VPMIN1_I]]) #4
4867// CHECK:   ret <4 x float> [[VPMIN2_I]]
4868float32x4_t test_vpminq_f32(float32x4_t a, float32x4_t b) {
4869  return vpminq_f32(a, b);
4870}
4871
4872// CHECK-LABEL: define <2 x double> @test_vpminq_f64(<2 x double> %a, <2 x double> %b) #0 {
4873// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4874// CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4875// CHECK:   [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
4876// CHECK:   [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
4877// CHECK:   [[VPMIN2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double> [[VPMIN_I]], <2 x double> [[VPMIN1_I]]) #4
4878// CHECK:   ret <2 x double> [[VPMIN2_I]]
4879float64x2_t test_vpminq_f64(float64x2_t a, float64x2_t b) {
4880  return vpminq_f64(a, b);
4881}
4882
4883// CHECK-LABEL: define <2 x float> @test_vpmaxnm_f32(<2 x float> %a, <2 x float> %b) #0 {
4884// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4885// CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4886// CHECK:   [[VPMAXNM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
4887// CHECK:   [[VPMAXNM1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
4888// CHECK:   [[VPMAXNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float> [[VPMAXNM_I]], <2 x float> [[VPMAXNM1_I]]) #4
4889// CHECK:   ret <2 x float> [[VPMAXNM2_I]]
4890float32x2_t test_vpmaxnm_f32(float32x2_t a, float32x2_t b) {
4891  return vpmaxnm_f32(a, b);
4892}
4893
4894// CHECK-LABEL: define <4 x float> @test_vpmaxnmq_f32(<4 x float> %a, <4 x float> %b) #0 {
4895// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4896// CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4897// CHECK:   [[VPMAXNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
4898// CHECK:   [[VPMAXNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
4899// CHECK:   [[VPMAXNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float> [[VPMAXNM_I]], <4 x float> [[VPMAXNM1_I]]) #4
4900// CHECK:   ret <4 x float> [[VPMAXNM2_I]]
4901float32x4_t test_vpmaxnmq_f32(float32x4_t a, float32x4_t b) {
4902  return vpmaxnmq_f32(a, b);
4903}
4904
4905// CHECK-LABEL: define <2 x double> @test_vpmaxnmq_f64(<2 x double> %a, <2 x double> %b) #0 {
4906// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4907// CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4908// CHECK:   [[VPMAXNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
4909// CHECK:   [[VPMAXNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
4910// CHECK:   [[VPMAXNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double> [[VPMAXNM_I]], <2 x double> [[VPMAXNM1_I]]) #4
4911// CHECK:   ret <2 x double> [[VPMAXNM2_I]]
4912float64x2_t test_vpmaxnmq_f64(float64x2_t a, float64x2_t b) {
4913  return vpmaxnmq_f64(a, b);
4914}
4915
4916// CHECK-LABEL: define <2 x float> @test_vpminnm_f32(<2 x float> %a, <2 x float> %b) #0 {
4917// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4918// CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4919// CHECK:   [[VPMINNM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
4920// CHECK:   [[VPMINNM1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
4921// CHECK:   [[VPMINNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float> [[VPMINNM_I]], <2 x float> [[VPMINNM1_I]]) #4
4922// CHECK:   ret <2 x float> [[VPMINNM2_I]]
4923float32x2_t test_vpminnm_f32(float32x2_t a, float32x2_t b) {
4924  return vpminnm_f32(a, b);
4925}
4926
4927// CHECK-LABEL: define <4 x float> @test_vpminnmq_f32(<4 x float> %a, <4 x float> %b) #0 {
4928// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4929// CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4930// CHECK:   [[VPMINNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
4931// CHECK:   [[VPMINNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
4932// CHECK:   [[VPMINNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float> [[VPMINNM_I]], <4 x float> [[VPMINNM1_I]]) #4
4933// CHECK:   ret <4 x float> [[VPMINNM2_I]]
4934float32x4_t test_vpminnmq_f32(float32x4_t a, float32x4_t b) {
4935  return vpminnmq_f32(a, b);
4936}
4937
4938// CHECK-LABEL: define <2 x double> @test_vpminnmq_f64(<2 x double> %a, <2 x double> %b) #0 {
4939// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4940// CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4941// CHECK:   [[VPMINNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
4942// CHECK:   [[VPMINNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
4943// CHECK:   [[VPMINNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double> [[VPMINNM_I]], <2 x double> [[VPMINNM1_I]]) #4
4944// CHECK:   ret <2 x double> [[VPMINNM2_I]]
4945float64x2_t test_vpminnmq_f64(float64x2_t a, float64x2_t b) {
4946  return vpminnmq_f64(a, b);
4947}
4948
4949// CHECK-LABEL: define <8 x i8> @test_vpadd_s8(<8 x i8> %a, <8 x i8> %b) #0 {
4950// CHECK:   [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %a, <8 x i8> %b) #4
4951// CHECK:   ret <8 x i8> [[VPADD_V_I]]
4952int8x8_t test_vpadd_s8(int8x8_t a, int8x8_t b) {
4953  return vpadd_s8(a, b);
4954}
4955
4956// CHECK-LABEL: define <4 x i16> @test_vpadd_s16(<4 x i16> %a, <4 x i16> %b) #0 {
4957// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4958// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4959// CHECK:   [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4960// CHECK:   [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4961// CHECK:   [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> [[VPADD_V_I]], <4 x i16> [[VPADD_V1_I]]) #4
4962// CHECK:   [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
4963// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <4 x i16>
4964// CHECK:   ret <4 x i16> [[TMP2]]
4965int16x4_t test_vpadd_s16(int16x4_t a, int16x4_t b) {
4966  return vpadd_s16(a, b);
4967}
4968
4969// CHECK-LABEL: define <2 x i32> @test_vpadd_s32(<2 x i32> %a, <2 x i32> %b) #0 {
4970// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4971// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4972// CHECK:   [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4973// CHECK:   [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4974// CHECK:   [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> [[VPADD_V_I]], <2 x i32> [[VPADD_V1_I]]) #4
4975// CHECK:   [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
4976// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <2 x i32>
4977// CHECK:   ret <2 x i32> [[TMP2]]
4978int32x2_t test_vpadd_s32(int32x2_t a, int32x2_t b) {
4979  return vpadd_s32(a, b);
4980}
4981
4982// CHECK-LABEL: define <8 x i8> @test_vpadd_u8(<8 x i8> %a, <8 x i8> %b) #0 {
4983// CHECK:   [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %a, <8 x i8> %b) #4
4984// CHECK:   ret <8 x i8> [[VPADD_V_I]]
4985uint8x8_t test_vpadd_u8(uint8x8_t a, uint8x8_t b) {
4986  return vpadd_u8(a, b);
4987}
4988
4989// CHECK-LABEL: define <4 x i16> @test_vpadd_u16(<4 x i16> %a, <4 x i16> %b) #0 {
4990// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4991// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4992// CHECK:   [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4993// CHECK:   [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4994// CHECK:   [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> [[VPADD_V_I]], <4 x i16> [[VPADD_V1_I]]) #4
4995// CHECK:   [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
4996// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <4 x i16>
4997// CHECK:   ret <4 x i16> [[TMP2]]
4998uint16x4_t test_vpadd_u16(uint16x4_t a, uint16x4_t b) {
4999  return vpadd_u16(a, b);
5000}
5001
5002// CHECK-LABEL: define <2 x i32> @test_vpadd_u32(<2 x i32> %a, <2 x i32> %b) #0 {
5003// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5004// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5005// CHECK:   [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5006// CHECK:   [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5007// CHECK:   [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> [[VPADD_V_I]], <2 x i32> [[VPADD_V1_I]]) #4
5008// CHECK:   [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
5009// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <2 x i32>
5010// CHECK:   ret <2 x i32> [[TMP2]]
5011uint32x2_t test_vpadd_u32(uint32x2_t a, uint32x2_t b) {
5012  return vpadd_u32(a, b);
5013}
5014
5015// CHECK-LABEL: define <2 x float> @test_vpadd_f32(<2 x float> %a, <2 x float> %b) #0 {
5016// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
5017// CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
5018// CHECK:   [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
5019// CHECK:   [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
5020// CHECK:   [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float> [[VPADD_V_I]], <2 x float> [[VPADD_V1_I]]) #4
5021// CHECK:   [[VPADD_V3_I:%.*]] = bitcast <2 x float> [[VPADD_V2_I]] to <8 x i8>
5022// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <2 x float>
5023// CHECK:   ret <2 x float> [[TMP2]]
5024float32x2_t test_vpadd_f32(float32x2_t a, float32x2_t b) {
5025  return vpadd_f32(a, b);
5026}
5027
5028// CHECK-LABEL: define <16 x i8> @test_vpaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
5029// CHECK:   [[VPADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %a, <16 x i8> %b) #4
5030// CHECK:   ret <16 x i8> [[VPADDQ_V_I]]
5031int8x16_t test_vpaddq_s8(int8x16_t a, int8x16_t b) {
5032  return vpaddq_s8(a, b);
5033}
5034
5035// CHECK-LABEL: define <8 x i16> @test_vpaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
5036// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5037// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5038// CHECK:   [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5039// CHECK:   [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5040// CHECK:   [[VPADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> [[VPADDQ_V_I]], <8 x i16> [[VPADDQ_V1_I]]) #4
5041// CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VPADDQ_V2_I]] to <16 x i8>
5042// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <8 x i16>
5043// CHECK:   ret <8 x i16> [[TMP2]]
5044int16x8_t test_vpaddq_s16(int16x8_t a, int16x8_t b) {
5045  return vpaddq_s16(a, b);
5046}
5047
5048// CHECK-LABEL: define <4 x i32> @test_vpaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
5049// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5050// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5051// CHECK:   [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5052// CHECK:   [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5053// CHECK:   [[VPADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> [[VPADDQ_V_I]], <4 x i32> [[VPADDQ_V1_I]]) #4
5054// CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VPADDQ_V2_I]] to <16 x i8>
5055// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <4 x i32>
5056// CHECK:   ret <4 x i32> [[TMP2]]
5057int32x4_t test_vpaddq_s32(int32x4_t a, int32x4_t b) {
5058  return vpaddq_s32(a, b);
5059}
5060
5061// CHECK-LABEL: define <16 x i8> @test_vpaddq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
5062// CHECK:   [[VPADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %a, <16 x i8> %b) #4
5063// CHECK:   ret <16 x i8> [[VPADDQ_V_I]]
5064uint8x16_t test_vpaddq_u8(uint8x16_t a, uint8x16_t b) {
5065  return vpaddq_u8(a, b);
5066}
5067
5068// CHECK-LABEL: define <8 x i16> @test_vpaddq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
5069// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5070// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5071// CHECK:   [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5072// CHECK:   [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5073// CHECK:   [[VPADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> [[VPADDQ_V_I]], <8 x i16> [[VPADDQ_V1_I]]) #4
5074// CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VPADDQ_V2_I]] to <16 x i8>
5075// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <8 x i16>
5076// CHECK:   ret <8 x i16> [[TMP2]]
5077uint16x8_t test_vpaddq_u16(uint16x8_t a, uint16x8_t b) {
5078  return vpaddq_u16(a, b);
5079}
5080
5081// CHECK-LABEL: define <4 x i32> @test_vpaddq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
5082// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5083// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5084// CHECK:   [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5085// CHECK:   [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5086// CHECK:   [[VPADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> [[VPADDQ_V_I]], <4 x i32> [[VPADDQ_V1_I]]) #4
5087// CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VPADDQ_V2_I]] to <16 x i8>
5088// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <4 x i32>
5089// CHECK:   ret <4 x i32> [[TMP2]]
5090uint32x4_t test_vpaddq_u32(uint32x4_t a, uint32x4_t b) {
5091  return vpaddq_u32(a, b);
5092}
5093
5094// CHECK-LABEL: define <4 x float> @test_vpaddq_f32(<4 x float> %a, <4 x float> %b) #0 {
5095// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
5096// CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
5097// CHECK:   [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
5098// CHECK:   [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
5099// CHECK:   [[VPADDQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float> [[VPADDQ_V_I]], <4 x float> [[VPADDQ_V1_I]]) #4
5100// CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <4 x float> [[VPADDQ_V2_I]] to <16 x i8>
5101// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <4 x float>
5102// CHECK:   ret <4 x float> [[TMP2]]
5103float32x4_t test_vpaddq_f32(float32x4_t a, float32x4_t b) {
5104  return vpaddq_f32(a, b);
5105}
5106
5107// CHECK-LABEL: define <2 x double> @test_vpaddq_f64(<2 x double> %a, <2 x double> %b) #0 {
5108// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
5109// CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
5110// CHECK:   [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
5111// CHECK:   [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
5112// CHECK:   [[VPADDQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double> [[VPADDQ_V_I]], <2 x double> [[VPADDQ_V1_I]]) #4
5113// CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <2 x double> [[VPADDQ_V2_I]] to <16 x i8>
5114// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <2 x double>
5115// CHECK:   ret <2 x double> [[TMP2]]
5116float64x2_t test_vpaddq_f64(float64x2_t a, float64x2_t b) {
5117  return vpaddq_f64(a, b);
5118}
5119
5120// CHECK-LABEL: define <4 x i16> @test_vqdmulh_s16(<4 x i16> %a, <4 x i16> %b) #0 {
5121// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5122// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5123// CHECK:   [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5124// CHECK:   [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5125// CHECK:   [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[VQDMULH_V_I]], <4 x i16> [[VQDMULH_V1_I]]) #4
5126// CHECK:   [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8>
5127// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <4 x i16>
5128// CHECK:   ret <4 x i16> [[TMP2]]
5129int16x4_t test_vqdmulh_s16(int16x4_t a, int16x4_t b) {
5130  return vqdmulh_s16(a, b);
5131}
5132
5133// CHECK-LABEL: define <2 x i32> @test_vqdmulh_s32(<2 x i32> %a, <2 x i32> %b) #0 {
5134// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5135// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5136// CHECK:   [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5137// CHECK:   [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5138// CHECK:   [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> [[VQDMULH_V_I]], <2 x i32> [[VQDMULH_V1_I]]) #4
5139// CHECK:   [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8>
5140// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <2 x i32>
5141// CHECK:   ret <2 x i32> [[TMP2]]
5142int32x2_t test_vqdmulh_s32(int32x2_t a, int32x2_t b) {
5143  return vqdmulh_s32(a, b);
5144}
5145
5146// CHECK-LABEL: define <8 x i16> @test_vqdmulhq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
5147// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5148// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5149// CHECK:   [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5150// CHECK:   [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5151// CHECK:   [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> [[VQDMULHQ_V_I]], <8 x i16> [[VQDMULHQ_V1_I]]) #4
5152// CHECK:   [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8>
5153// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <8 x i16>
5154// CHECK:   ret <8 x i16> [[TMP2]]
5155int16x8_t test_vqdmulhq_s16(int16x8_t a, int16x8_t b) {
5156  return vqdmulhq_s16(a, b);
5157}
5158
5159// CHECK-LABEL: define <4 x i32> @test_vqdmulhq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
5160// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5161// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5162// CHECK:   [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5163// CHECK:   [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5164// CHECK:   [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> [[VQDMULHQ_V_I]], <4 x i32> [[VQDMULHQ_V1_I]]) #4
5165// CHECK:   [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8>
5166// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <4 x i32>
5167// CHECK:   ret <4 x i32> [[TMP2]]
5168int32x4_t test_vqdmulhq_s32(int32x4_t a, int32x4_t b) {
5169  return vqdmulhq_s32(a, b);
5170}
5171
5172// CHECK-LABEL: define <4 x i16> @test_vqrdmulh_s16(<4 x i16> %a, <4 x i16> %b) #0 {
5173// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5174// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5175// CHECK:   [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5176// CHECK:   [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5177// CHECK:   [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[VQRDMULH_V_I]], <4 x i16> [[VQRDMULH_V1_I]]) #4
5178// CHECK:   [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8>
5179// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <4 x i16>
5180// CHECK:   ret <4 x i16> [[TMP2]]
5181int16x4_t test_vqrdmulh_s16(int16x4_t a, int16x4_t b) {
5182  return vqrdmulh_s16(a, b);
5183}
5184
5185// CHECK-LABEL: define <2 x i32> @test_vqrdmulh_s32(<2 x i32> %a, <2 x i32> %b) #0 {
5186// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5187// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5188// CHECK:   [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5189// CHECK:   [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5190// CHECK:   [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> [[VQRDMULH_V_I]], <2 x i32> [[VQRDMULH_V1_I]]) #4
5191// CHECK:   [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8>
5192// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <2 x i32>
5193// CHECK:   ret <2 x i32> [[TMP2]]
5194int32x2_t test_vqrdmulh_s32(int32x2_t a, int32x2_t b) {
5195  return vqrdmulh_s32(a, b);
5196}
5197
5198// CHECK-LABEL: define <8 x i16> @test_vqrdmulhq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
5199// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5200// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5201// CHECK:   [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5202// CHECK:   [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5203// CHECK:   [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> [[VQRDMULHQ_V_I]], <8 x i16> [[VQRDMULHQ_V1_I]]) #4
5204// CHECK:   [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8>
5205// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <8 x i16>
5206// CHECK:   ret <8 x i16> [[TMP2]]
5207int16x8_t test_vqrdmulhq_s16(int16x8_t a, int16x8_t b) {
5208  return vqrdmulhq_s16(a, b);
5209}
5210
5211// CHECK-LABEL: define <4 x i32> @test_vqrdmulhq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
5212// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5213// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5214// CHECK:   [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5215// CHECK:   [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5216// CHECK:   [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> [[VQRDMULHQ_V_I]], <4 x i32> [[VQRDMULHQ_V1_I]]) #4
5217// CHECK:   [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8>
5218// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <4 x i32>
5219// CHECK:   ret <4 x i32> [[TMP2]]
5220int32x4_t test_vqrdmulhq_s32(int32x4_t a, int32x4_t b) {
5221  return vqrdmulhq_s32(a, b);
5222}
5223
5224// CHECK-LABEL: define <2 x float> @test_vmulx_f32(<2 x float> %a, <2 x float> %b) #0 {
5225// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
5226// CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
5227// CHECK:   [[VMULX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
5228// CHECK:   [[VMULX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
5229// CHECK:   [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> [[VMULX_I]], <2 x float> [[VMULX1_I]]) #4
5230// CHECK:   ret <2 x float> [[VMULX2_I]]
5231float32x2_t test_vmulx_f32(float32x2_t a, float32x2_t b) {
5232  return vmulx_f32(a, b);
5233}
5234
5235// CHECK-LABEL: define <4 x float> @test_vmulxq_f32(<4 x float> %a, <4 x float> %b) #0 {
5236// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
5237// CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
5238// CHECK:   [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
5239// CHECK:   [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
5240// CHECK:   [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> [[VMULX_I]], <4 x float> [[VMULX1_I]]) #4
5241// CHECK:   ret <4 x float> [[VMULX2_I]]
5242float32x4_t test_vmulxq_f32(float32x4_t a, float32x4_t b) {
5243  return vmulxq_f32(a, b);
5244}
5245
5246// CHECK-LABEL: define <2 x double> @test_vmulxq_f64(<2 x double> %a, <2 x double> %b) #0 {
5247// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
5248// CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
5249// CHECK:   [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
5250// CHECK:   [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
5251// CHECK:   [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> [[VMULX_I]], <2 x double> [[VMULX1_I]]) #4
5252// CHECK:   ret <2 x double> [[VMULX2_I]]
5253float64x2_t test_vmulxq_f64(float64x2_t a, float64x2_t b) {
5254  return vmulxq_f64(a, b);
5255}
5256
5257// CHECK-LABEL: define <8 x i8> @test_vshl_n_s8(<8 x i8> %a) #0 {
5258// CHECK:   [[VSHL_N:%.*]] = shl <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
5259// CHECK:   ret <8 x i8> [[VSHL_N]]
5260int8x8_t test_vshl_n_s8(int8x8_t a) {
5261  return vshl_n_s8(a, 3);
5262}
5263
5264// CHECK-LABEL: define <4 x i16> @test_vshl_n_s16(<4 x i16> %a) #0 {
5265// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5266// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5267// CHECK:   [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
5268// CHECK:   ret <4 x i16> [[VSHL_N]]
5269int16x4_t test_vshl_n_s16(int16x4_t a) {
5270  return vshl_n_s16(a, 3);
5271}
5272
5273// CHECK-LABEL: define <2 x i32> @test_vshl_n_s32(<2 x i32> %a) #0 {
5274// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5275// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5276// CHECK:   [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], <i32 3, i32 3>
5277// CHECK:   ret <2 x i32> [[VSHL_N]]
5278int32x2_t test_vshl_n_s32(int32x2_t a) {
5279  return vshl_n_s32(a, 3);
5280}
5281
5282// CHECK-LABEL: define <16 x i8> @test_vshlq_n_s8(<16 x i8> %a) #0 {
5283// CHECK:   [[VSHL_N:%.*]] = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
5284// CHECK:   ret <16 x i8> [[VSHL_N]]
5285int8x16_t test_vshlq_n_s8(int8x16_t a) {
5286  return vshlq_n_s8(a, 3);
5287}
5288
5289// CHECK-LABEL: define <8 x i16> @test_vshlq_n_s16(<8 x i16> %a) #0 {
5290// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5291// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5292// CHECK:   [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
5293// CHECK:   ret <8 x i16> [[VSHL_N]]
5294int16x8_t test_vshlq_n_s16(int16x8_t a) {
5295  return vshlq_n_s16(a, 3);
5296}
5297
5298// CHECK-LABEL: define <4 x i32> @test_vshlq_n_s32(<4 x i32> %a) #0 {
5299// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5300// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5301// CHECK:   [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
5302// CHECK:   ret <4 x i32> [[VSHL_N]]
5303int32x4_t test_vshlq_n_s32(int32x4_t a) {
5304  return vshlq_n_s32(a, 3);
5305}
5306
5307// CHECK-LABEL: define <2 x i64> @test_vshlq_n_s64(<2 x i64> %a) #0 {
5308// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5309// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5310// CHECK:   [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], <i64 3, i64 3>
5311// CHECK:   ret <2 x i64> [[VSHL_N]]
5312int64x2_t test_vshlq_n_s64(int64x2_t a) {
5313  return vshlq_n_s64(a, 3);
5314}
5315
5316// CHECK-LABEL: define <8 x i8> @test_vshl_n_u8(<8 x i8> %a) #0 {
5317// CHECK:   [[VSHL_N:%.*]] = shl <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
5318// CHECK:   ret <8 x i8> [[VSHL_N]]
5319int8x8_t test_vshl_n_u8(int8x8_t a) {
5320  return vshl_n_u8(a, 3);
5321}
5322
5323// CHECK-LABEL: define <4 x i16> @test_vshl_n_u16(<4 x i16> %a) #0 {
5324// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5325// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5326// CHECK:   [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
5327// CHECK:   ret <4 x i16> [[VSHL_N]]
5328int16x4_t test_vshl_n_u16(int16x4_t a) {
5329  return vshl_n_u16(a, 3);
5330}
5331
5332// CHECK-LABEL: define <2 x i32> @test_vshl_n_u32(<2 x i32> %a) #0 {
5333// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5334// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5335// CHECK:   [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], <i32 3, i32 3>
5336// CHECK:   ret <2 x i32> [[VSHL_N]]
5337int32x2_t test_vshl_n_u32(int32x2_t a) {
5338  return vshl_n_u32(a, 3);
5339}
5340
5341// CHECK-LABEL: define <16 x i8> @test_vshlq_n_u8(<16 x i8> %a) #0 {
5342// CHECK:   [[VSHL_N:%.*]] = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
5343// CHECK:   ret <16 x i8> [[VSHL_N]]
5344int8x16_t test_vshlq_n_u8(int8x16_t a) {
5345  return vshlq_n_u8(a, 3);
5346}
5347
5348// CHECK-LABEL: define <8 x i16> @test_vshlq_n_u16(<8 x i16> %a) #0 {
5349// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5350// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5351// CHECK:   [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
5352// CHECK:   ret <8 x i16> [[VSHL_N]]
5353int16x8_t test_vshlq_n_u16(int16x8_t a) {
5354  return vshlq_n_u16(a, 3);
5355}
5356
5357// CHECK-LABEL: define <4 x i32> @test_vshlq_n_u32(<4 x i32> %a) #0 {
5358// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5359// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5360// CHECK:   [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
5361// CHECK:   ret <4 x i32> [[VSHL_N]]
5362int32x4_t test_vshlq_n_u32(int32x4_t a) {
5363  return vshlq_n_u32(a, 3);
5364}
5365
5366// CHECK-LABEL: define <2 x i64> @test_vshlq_n_u64(<2 x i64> %a) #0 {
5367// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5368// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5369// CHECK:   [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], <i64 3, i64 3>
5370// CHECK:   ret <2 x i64> [[VSHL_N]]
5371int64x2_t test_vshlq_n_u64(int64x2_t a) {
5372  return vshlq_n_u64(a, 3);
5373}
5374
5375// CHECK-LABEL: define <8 x i8> @test_vshr_n_s8(<8 x i8> %a) #0 {
5376// CHECK:   [[VSHR_N:%.*]] = ashr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
5377// CHECK:   ret <8 x i8> [[VSHR_N]]
5378int8x8_t test_vshr_n_s8(int8x8_t a) {
5379  return vshr_n_s8(a, 3);
5380}
5381
5382// CHECK-LABEL: define <4 x i16> @test_vshr_n_s16(<4 x i16> %a) #0 {
5383// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5384// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5385// CHECK:   [[VSHR_N:%.*]] = ashr <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
5386// CHECK:   ret <4 x i16> [[VSHR_N]]
5387int16x4_t test_vshr_n_s16(int16x4_t a) {
5388  return vshr_n_s16(a, 3);
5389}
5390
5391// CHECK-LABEL: define <2 x i32> @test_vshr_n_s32(<2 x i32> %a) #0 {
5392// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5393// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5394// CHECK:   [[VSHR_N:%.*]] = ashr <2 x i32> [[TMP1]], <i32 3, i32 3>
5395// CHECK:   ret <2 x i32> [[VSHR_N]]
5396int32x2_t test_vshr_n_s32(int32x2_t a) {
5397  return vshr_n_s32(a, 3);
5398}
5399
5400// CHECK-LABEL: define <16 x i8> @test_vshrq_n_s8(<16 x i8> %a) #0 {
5401// CHECK:   [[VSHR_N:%.*]] = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
5402// CHECK:   ret <16 x i8> [[VSHR_N]]
5403int8x16_t test_vshrq_n_s8(int8x16_t a) {
5404  return vshrq_n_s8(a, 3);
5405}
5406
5407// CHECK-LABEL: define <8 x i16> @test_vshrq_n_s16(<8 x i16> %a) #0 {
5408// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5409// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5410// CHECK:   [[VSHR_N:%.*]] = ashr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
5411// CHECK:   ret <8 x i16> [[VSHR_N]]
5412int16x8_t test_vshrq_n_s16(int16x8_t a) {
5413  return vshrq_n_s16(a, 3);
5414}
5415
5416// CHECK-LABEL: define <4 x i32> @test_vshrq_n_s32(<4 x i32> %a) #0 {
5417// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5418// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5419// CHECK:   [[VSHR_N:%.*]] = ashr <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
5420// CHECK:   ret <4 x i32> [[VSHR_N]]
5421int32x4_t test_vshrq_n_s32(int32x4_t a) {
5422  return vshrq_n_s32(a, 3);
5423}
5424
5425// CHECK-LABEL: define <2 x i64> @test_vshrq_n_s64(<2 x i64> %a) #0 {
5426// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5427// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5428// CHECK:   [[VSHR_N:%.*]] = ashr <2 x i64> [[TMP1]], <i64 3, i64 3>
5429// CHECK:   ret <2 x i64> [[VSHR_N]]
5430int64x2_t test_vshrq_n_s64(int64x2_t a) {
5431  return vshrq_n_s64(a, 3);
5432}
5433
5434// CHECK-LABEL: define <8 x i8> @test_vshr_n_u8(<8 x i8> %a) #0 {
5435// CHECK:   [[VSHR_N:%.*]] = lshr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
5436// CHECK:   ret <8 x i8> [[VSHR_N]]
5437int8x8_t test_vshr_n_u8(int8x8_t a) {
5438  return vshr_n_u8(a, 3);
5439}
5440
5441// CHECK-LABEL: define <4 x i16> @test_vshr_n_u16(<4 x i16> %a) #0 {
5442// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5443// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5444// CHECK:   [[VSHR_N:%.*]] = lshr <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
5445// CHECK:   ret <4 x i16> [[VSHR_N]]
5446int16x4_t test_vshr_n_u16(int16x4_t a) {
5447  return vshr_n_u16(a, 3);
5448}
5449
5450// CHECK-LABEL: define <2 x i32> @test_vshr_n_u32(<2 x i32> %a) #0 {
5451// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5452// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5453// CHECK:   [[VSHR_N:%.*]] = lshr <2 x i32> [[TMP1]], <i32 3, i32 3>
5454// CHECK:   ret <2 x i32> [[VSHR_N]]
5455int32x2_t test_vshr_n_u32(int32x2_t a) {
5456  return vshr_n_u32(a, 3);
5457}
5458
5459// CHECK-LABEL: define <16 x i8> @test_vshrq_n_u8(<16 x i8> %a) #0 {
5460// CHECK:   [[VSHR_N:%.*]] = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
5461// CHECK:   ret <16 x i8> [[VSHR_N]]
5462int8x16_t test_vshrq_n_u8(int8x16_t a) {
5463  return vshrq_n_u8(a, 3);
5464}
5465
5466// CHECK-LABEL: define <8 x i16> @test_vshrq_n_u16(<8 x i16> %a) #0 {
5467// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5468// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5469// CHECK:   [[VSHR_N:%.*]] = lshr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
5470// CHECK:   ret <8 x i16> [[VSHR_N]]
5471int16x8_t test_vshrq_n_u16(int16x8_t a) {
5472  return vshrq_n_u16(a, 3);
5473}
5474
5475// CHECK-LABEL: define <4 x i32> @test_vshrq_n_u32(<4 x i32> %a) #0 {
5476// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5477// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5478// CHECK:   [[VSHR_N:%.*]] = lshr <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
5479// CHECK:   ret <4 x i32> [[VSHR_N]]
5480int32x4_t test_vshrq_n_u32(int32x4_t a) {
5481  return vshrq_n_u32(a, 3);
5482}
5483
5484// CHECK-LABEL: define <2 x i64> @test_vshrq_n_u64(<2 x i64> %a) #0 {
5485// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5486// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5487// CHECK:   [[VSHR_N:%.*]] = lshr <2 x i64> [[TMP1]], <i64 3, i64 3>
5488// CHECK:   ret <2 x i64> [[VSHR_N]]
5489int64x2_t test_vshrq_n_u64(int64x2_t a) {
5490  return vshrq_n_u64(a, 3);
5491}
5492
5493// CHECK-LABEL: define <8 x i8> @test_vsra_n_s8(<8 x i8> %a, <8 x i8> %b) #0 {
5494// CHECK:   [[VSRA_N:%.*]] = ashr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
5495// CHECK:   [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]]
5496// CHECK:   ret <8 x i8> [[TMP0]]
5497int8x8_t test_vsra_n_s8(int8x8_t a, int8x8_t b) {
5498  return vsra_n_s8(a, b, 3);
5499}
5500
5501// CHECK-LABEL: define <4 x i16> @test_vsra_n_s16(<4 x i16> %a, <4 x i16> %b) #0 {
5502// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5503// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5504// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5505// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5506// CHECK:   [[VSRA_N:%.*]] = ashr <4 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3>
5507// CHECK:   [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]]
5508// CHECK:   ret <4 x i16> [[TMP4]]
5509int16x4_t test_vsra_n_s16(int16x4_t a, int16x4_t b) {
5510  return vsra_n_s16(a, b, 3);
5511}
5512
5513// CHECK-LABEL: define <2 x i32> @test_vsra_n_s32(<2 x i32> %a, <2 x i32> %b) #0 {
5514// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5515// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5516// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5517// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5518// CHECK:   [[VSRA_N:%.*]] = ashr <2 x i32> [[TMP3]], <i32 3, i32 3>
5519// CHECK:   [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]]
5520// CHECK:   ret <2 x i32> [[TMP4]]
5521int32x2_t test_vsra_n_s32(int32x2_t a, int32x2_t b) {
5522  return vsra_n_s32(a, b, 3);
5523}
5524
5525// CHECK-LABEL: define <16 x i8> @test_vsraq_n_s8(<16 x i8> %a, <16 x i8> %b) #0 {
5526// CHECK:   [[VSRA_N:%.*]] = ashr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
5527// CHECK:   [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]]
5528// CHECK:   ret <16 x i8> [[TMP0]]
5529int8x16_t test_vsraq_n_s8(int8x16_t a, int8x16_t b) {
5530  return vsraq_n_s8(a, b, 3);
5531}
5532
5533// CHECK-LABEL: define <8 x i16> @test_vsraq_n_s16(<8 x i16> %a, <8 x i16> %b) #0 {
5534// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5535// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5536// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5537// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5538// CHECK:   [[VSRA_N:%.*]] = ashr <8 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
5539// CHECK:   [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]]
5540// CHECK:   ret <8 x i16> [[TMP4]]
5541int16x8_t test_vsraq_n_s16(int16x8_t a, int16x8_t b) {
5542  return vsraq_n_s16(a, b, 3);
5543}
5544
5545// CHECK-LABEL: define <4 x i32> @test_vsraq_n_s32(<4 x i32> %a, <4 x i32> %b) #0 {
5546// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5547// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5548// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5549// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5550// CHECK:   [[VSRA_N:%.*]] = ashr <4 x i32> [[TMP3]], <i32 3, i32 3, i32 3, i32 3>
5551// CHECK:   [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]]
5552// CHECK:   ret <4 x i32> [[TMP4]]
5553int32x4_t test_vsraq_n_s32(int32x4_t a, int32x4_t b) {
5554  return vsraq_n_s32(a, b, 3);
5555}
5556
5557// CHECK-LABEL: define <2 x i64> @test_vsraq_n_s64(<2 x i64> %a, <2 x i64> %b) #0 {
5558// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5559// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5560// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5561// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5562// CHECK:   [[VSRA_N:%.*]] = ashr <2 x i64> [[TMP3]], <i64 3, i64 3>
5563// CHECK:   [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]]
5564// CHECK:   ret <2 x i64> [[TMP4]]
5565int64x2_t test_vsraq_n_s64(int64x2_t a, int64x2_t b) {
5566  return vsraq_n_s64(a, b, 3);
5567}
5568
5569// CHECK-LABEL: define <8 x i8> @test_vsra_n_u8(<8 x i8> %a, <8 x i8> %b) #0 {
5570// CHECK:   [[VSRA_N:%.*]] = lshr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
5571// CHECK:   [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]]
5572// CHECK:   ret <8 x i8> [[TMP0]]
5573int8x8_t test_vsra_n_u8(int8x8_t a, int8x8_t b) {
5574  return vsra_n_u8(a, b, 3);
5575}
5576
5577// CHECK-LABEL: define <4 x i16> @test_vsra_n_u16(<4 x i16> %a, <4 x i16> %b) #0 {
5578// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5579// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5580// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5581// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5582// CHECK:   [[VSRA_N:%.*]] = lshr <4 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3>
5583// CHECK:   [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]]
5584// CHECK:   ret <4 x i16> [[TMP4]]
5585int16x4_t test_vsra_n_u16(int16x4_t a, int16x4_t b) {
5586  return vsra_n_u16(a, b, 3);
5587}
5588
5589// CHECK-LABEL: define <2 x i32> @test_vsra_n_u32(<2 x i32> %a, <2 x i32> %b) #0 {
5590// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5591// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5592// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5593// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5594// CHECK:   [[VSRA_N:%.*]] = lshr <2 x i32> [[TMP3]], <i32 3, i32 3>
5595// CHECK:   [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]]
5596// CHECK:   ret <2 x i32> [[TMP4]]
5597int32x2_t test_vsra_n_u32(int32x2_t a, int32x2_t b) {
5598  return vsra_n_u32(a, b, 3);
5599}
5600
5601// CHECK-LABEL: define <16 x i8> @test_vsraq_n_u8(<16 x i8> %a, <16 x i8> %b) #0 {
5602// CHECK:   [[VSRA_N:%.*]] = lshr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
5603// CHECK:   [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]]
5604// CHECK:   ret <16 x i8> [[TMP0]]
5605int8x16_t test_vsraq_n_u8(int8x16_t a, int8x16_t b) {
5606  return vsraq_n_u8(a, b, 3);
5607}
5608
5609// CHECK-LABEL: define <8 x i16> @test_vsraq_n_u16(<8 x i16> %a, <8 x i16> %b) #0 {
5610// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5611// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5612// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5613// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5614// CHECK:   [[VSRA_N:%.*]] = lshr <8 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
5615// CHECK:   [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]]
5616// CHECK:   ret <8 x i16> [[TMP4]]
5617int16x8_t test_vsraq_n_u16(int16x8_t a, int16x8_t b) {
5618  return vsraq_n_u16(a, b, 3);
5619}
5620
5621// CHECK-LABEL: define <4 x i32> @test_vsraq_n_u32(<4 x i32> %a, <4 x i32> %b) #0 {
5622// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5623// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5624// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5625// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5626// CHECK:   [[VSRA_N:%.*]] = lshr <4 x i32> [[TMP3]], <i32 3, i32 3, i32 3, i32 3>
5627// CHECK:   [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]]
5628// CHECK:   ret <4 x i32> [[TMP4]]
5629int32x4_t test_vsraq_n_u32(int32x4_t a, int32x4_t b) {
5630  return vsraq_n_u32(a, b, 3);
5631}
5632
5633// CHECK-LABEL: define <2 x i64> @test_vsraq_n_u64(<2 x i64> %a, <2 x i64> %b) #0 {
5634// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5635// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5636// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5637// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5638// CHECK:   [[VSRA_N:%.*]] = lshr <2 x i64> [[TMP3]], <i64 3, i64 3>
5639// CHECK:   [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]]
5640// CHECK:   ret <2 x i64> [[TMP4]]
5641int64x2_t test_vsraq_n_u64(int64x2_t a, int64x2_t b) {
5642  return vsraq_n_u64(a, b, 3);
5643}
5644
5645// CHECK-LABEL: define <8 x i8> @test_vrshr_n_s8(<8 x i8> %a) #0 {
5646// CHECK:   [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %a, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5647// CHECK:   ret <8 x i8> [[VRSHR_N]]
5648int8x8_t test_vrshr_n_s8(int8x8_t a) {
5649  return vrshr_n_s8(a, 3);
5650}
5651
5652// CHECK-LABEL: define <4 x i16> @test_vrshr_n_s16(<4 x i16> %a) #0 {
5653// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5654// CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5655// CHECK:   [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
5656// CHECK:   ret <4 x i16> [[VRSHR_N1]]
5657int16x4_t test_vrshr_n_s16(int16x4_t a) {
5658  return vrshr_n_s16(a, 3);
5659}
5660
5661// CHECK-LABEL: define <2 x i32> @test_vrshr_n_s32(<2 x i32> %a) #0 {
5662// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5663// CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5664// CHECK:   [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
5665// CHECK:   ret <2 x i32> [[VRSHR_N1]]
5666int32x2_t test_vrshr_n_s32(int32x2_t a) {
5667  return vrshr_n_s32(a, 3);
5668}
5669
5670// CHECK-LABEL: define <16 x i8> @test_vrshrq_n_s8(<16 x i8> %a) #0 {
5671// CHECK:   [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %a, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5672// CHECK:   ret <16 x i8> [[VRSHR_N]]
5673int8x16_t test_vrshrq_n_s8(int8x16_t a) {
5674  return vrshrq_n_s8(a, 3);
5675}
5676
5677// CHECK-LABEL: define <8 x i16> @test_vrshrq_n_s16(<8 x i16> %a) #0 {
5678// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5679// CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5680// CHECK:   [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
5681// CHECK:   ret <8 x i16> [[VRSHR_N1]]
5682int16x8_t test_vrshrq_n_s16(int16x8_t a) {
5683  return vrshrq_n_s16(a, 3);
5684}
5685
5686// CHECK-LABEL: define <4 x i32> @test_vrshrq_n_s32(<4 x i32> %a) #0 {
5687// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5688// CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5689// CHECK:   [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
5690// CHECK:   ret <4 x i32> [[VRSHR_N1]]
5691int32x4_t test_vrshrq_n_s32(int32x4_t a) {
5692  return vrshrq_n_s32(a, 3);
5693}
5694
5695// CHECK-LABEL: define <2 x i64> @test_vrshrq_n_s64(<2 x i64> %a) #0 {
5696// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5697// CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5698// CHECK:   [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
5699// CHECK:   ret <2 x i64> [[VRSHR_N1]]
5700int64x2_t test_vrshrq_n_s64(int64x2_t a) {
5701  return vrshrq_n_s64(a, 3);
5702}
5703
5704// CHECK-LABEL: define <8 x i8> @test_vrshr_n_u8(<8 x i8> %a) #0 {
5705// CHECK:   [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %a, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5706// CHECK:   ret <8 x i8> [[VRSHR_N]]
5707int8x8_t test_vrshr_n_u8(int8x8_t a) {
5708  return vrshr_n_u8(a, 3);
5709}
5710
5711// CHECK-LABEL: define <4 x i16> @test_vrshr_n_u16(<4 x i16> %a) #0 {
5712// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5713// CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5714// CHECK:   [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
5715// CHECK:   ret <4 x i16> [[VRSHR_N1]]
5716int16x4_t test_vrshr_n_u16(int16x4_t a) {
5717  return vrshr_n_u16(a, 3);
5718}
5719
5720// CHECK-LABEL: define <2 x i32> @test_vrshr_n_u32(<2 x i32> %a) #0 {
5721// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5722// CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5723// CHECK:   [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
5724// CHECK:   ret <2 x i32> [[VRSHR_N1]]
5725int32x2_t test_vrshr_n_u32(int32x2_t a) {
5726  return vrshr_n_u32(a, 3);
5727}
5728
5729// CHECK-LABEL: define <16 x i8> @test_vrshrq_n_u8(<16 x i8> %a) #0 {
5730// CHECK:   [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %a, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5731// CHECK:   ret <16 x i8> [[VRSHR_N]]
5732int8x16_t test_vrshrq_n_u8(int8x16_t a) {
5733  return vrshrq_n_u8(a, 3);
5734}
5735
5736// CHECK-LABEL: define <8 x i16> @test_vrshrq_n_u16(<8 x i16> %a) #0 {
5737// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5738// CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5739// CHECK:   [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
5740// CHECK:   ret <8 x i16> [[VRSHR_N1]]
5741int16x8_t test_vrshrq_n_u16(int16x8_t a) {
5742  return vrshrq_n_u16(a, 3);
5743}
5744
5745// CHECK-LABEL: define <4 x i32> @test_vrshrq_n_u32(<4 x i32> %a) #0 {
5746// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5747// CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5748// CHECK:   [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
5749// CHECK:   ret <4 x i32> [[VRSHR_N1]]
5750int32x4_t test_vrshrq_n_u32(int32x4_t a) {
5751  return vrshrq_n_u32(a, 3);
5752}
5753
5754// CHECK-LABEL: define <2 x i64> @test_vrshrq_n_u64(<2 x i64> %a) #0 {
5755// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5756// CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5757// CHECK:   [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
5758// CHECK:   ret <2 x i64> [[VRSHR_N1]]
5759int64x2_t test_vrshrq_n_u64(int64x2_t a) {
5760  return vrshrq_n_u64(a, 3);
5761}
5762
5763// CHECK-LABEL: define <8 x i8> @test_vrsra_n_s8(<8 x i8> %a, <8 x i8> %b) #0 {
5764// CHECK:   [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %b, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5765// CHECK:   [[TMP0:%.*]] = add <8 x i8> %a, [[VRSHR_N]]
5766// CHECK:   ret <8 x i8> [[TMP0]]
5767int8x8_t test_vrsra_n_s8(int8x8_t a, int8x8_t b) {
5768  return vrsra_n_s8(a, b, 3);
5769}
5770
5771// CHECK-LABEL: define <4 x i16> @test_vrsra_n_s16(<4 x i16> %a, <4 x i16> %b) #0 {
5772// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5773// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5774// CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5775// CHECK:   [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
5776// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5777// CHECK:   [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]]
5778// CHECK:   ret <4 x i16> [[TMP3]]
5779int16x4_t test_vrsra_n_s16(int16x4_t a, int16x4_t b) {
5780  return vrsra_n_s16(a, b, 3);
5781}
5782
5783// CHECK-LABEL: define <2 x i32> @test_vrsra_n_s32(<2 x i32> %a, <2 x i32> %b) #0 {
5784// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5785// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5786// CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5787// CHECK:   [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
5788// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5789// CHECK:   [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]]
5790// CHECK:   ret <2 x i32> [[TMP3]]
5791int32x2_t test_vrsra_n_s32(int32x2_t a, int32x2_t b) {
5792  return vrsra_n_s32(a, b, 3);
5793}
5794
5795// CHECK-LABEL: define <16 x i8> @test_vrsraq_n_s8(<16 x i8> %a, <16 x i8> %b) #0 {
5796// CHECK:   [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %b, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5797// CHECK:   [[TMP0:%.*]] = add <16 x i8> %a, [[VRSHR_N]]
5798// CHECK:   ret <16 x i8> [[TMP0]]
5799int8x16_t test_vrsraq_n_s8(int8x16_t a, int8x16_t b) {
5800  return vrsraq_n_s8(a, b, 3);
5801}
5802
5803// CHECK-LABEL: define <8 x i16> @test_vrsraq_n_s16(<8 x i16> %a, <8 x i16> %b) #0 {
5804// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5805// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5806// CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5807// CHECK:   [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
5808// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5809// CHECK:   [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]]
5810// CHECK:   ret <8 x i16> [[TMP3]]
5811int16x8_t test_vrsraq_n_s16(int16x8_t a, int16x8_t b) {
5812  return vrsraq_n_s16(a, b, 3);
5813}
5814
5815// CHECK-LABEL: define <4 x i32> @test_vrsraq_n_s32(<4 x i32> %a, <4 x i32> %b) #0 {
5816// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5817// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5818// CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5819// CHECK:   [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
5820// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5821// CHECK:   [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]]
5822// CHECK:   ret <4 x i32> [[TMP3]]
5823int32x4_t test_vrsraq_n_s32(int32x4_t a, int32x4_t b) {
5824  return vrsraq_n_s32(a, b, 3);
5825}
5826
5827// CHECK-LABEL: define <2 x i64> @test_vrsraq_n_s64(<2 x i64> %a, <2 x i64> %b) #0 {
5828// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5829// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5830// CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5831// CHECK:   [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
5832// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5833// CHECK:   [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]]
5834// CHECK:   ret <2 x i64> [[TMP3]]
5835int64x2_t test_vrsraq_n_s64(int64x2_t a, int64x2_t b) {
5836  return vrsraq_n_s64(a, b, 3);
5837}
5838
5839// CHECK-LABEL: define <8 x i8> @test_vrsra_n_u8(<8 x i8> %a, <8 x i8> %b) #0 {
5840// CHECK:   [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %b, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5841// CHECK:   [[TMP0:%.*]] = add <8 x i8> %a, [[VRSHR_N]]
5842// CHECK:   ret <8 x i8> [[TMP0]]
5843int8x8_t test_vrsra_n_u8(int8x8_t a, int8x8_t b) {
5844  return vrsra_n_u8(a, b, 3);
5845}
5846
5847// CHECK-LABEL: define <4 x i16> @test_vrsra_n_u16(<4 x i16> %a, <4 x i16> %b) #0 {
5848// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5849// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5850// CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5851// CHECK:   [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
5852// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5853// CHECK:   [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]]
5854// CHECK:   ret <4 x i16> [[TMP3]]
5855int16x4_t test_vrsra_n_u16(int16x4_t a, int16x4_t b) {
5856  return vrsra_n_u16(a, b, 3);
5857}
5858
5859// CHECK-LABEL: define <2 x i32> @test_vrsra_n_u32(<2 x i32> %a, <2 x i32> %b) #0 {
5860// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5861// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5862// CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5863// CHECK:   [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
5864// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5865// CHECK:   [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]]
5866// CHECK:   ret <2 x i32> [[TMP3]]
5867int32x2_t test_vrsra_n_u32(int32x2_t a, int32x2_t b) {
5868  return vrsra_n_u32(a, b, 3);
5869}
5870
5871// CHECK-LABEL: define <16 x i8> @test_vrsraq_n_u8(<16 x i8> %a, <16 x i8> %b) #0 {
5872// CHECK:   [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %b, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5873// CHECK:   [[TMP0:%.*]] = add <16 x i8> %a, [[VRSHR_N]]
5874// CHECK:   ret <16 x i8> [[TMP0]]
5875int8x16_t test_vrsraq_n_u8(int8x16_t a, int8x16_t b) {
5876  return vrsraq_n_u8(a, b, 3);
5877}
5878
5879// CHECK-LABEL: define <8 x i16> @test_vrsraq_n_u16(<8 x i16> %a, <8 x i16> %b) #0 {
5880// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5881// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5882// CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5883// CHECK:   [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
5884// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5885// CHECK:   [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]]
5886// CHECK:   ret <8 x i16> [[TMP3]]
5887int16x8_t test_vrsraq_n_u16(int16x8_t a, int16x8_t b) {
5888  return vrsraq_n_u16(a, b, 3);
5889}
5890
5891// CHECK-LABEL: define <4 x i32> @test_vrsraq_n_u32(<4 x i32> %a, <4 x i32> %b) #0 {
5892// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5893// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5894// CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5895// CHECK:   [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
5896// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5897// CHECK:   [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]]
5898// CHECK:   ret <4 x i32> [[TMP3]]
5899int32x4_t test_vrsraq_n_u32(int32x4_t a, int32x4_t b) {
5900  return vrsraq_n_u32(a, b, 3);
5901}
5902
5903// CHECK-LABEL: define <2 x i64> @test_vrsraq_n_u64(<2 x i64> %a, <2 x i64> %b) #0 {
5904// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5905// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5906// CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5907// CHECK:   [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
5908// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5909// CHECK:   [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]]
5910// CHECK:   ret <2 x i64> [[TMP3]]
5911int64x2_t test_vrsraq_n_u64(int64x2_t a, int64x2_t b) {
5912  return vrsraq_n_u64(a, b, 3);
5913}
5914
5915// CHECK-LABEL: define <8 x i8> @test_vsri_n_s8(<8 x i8> %a, <8 x i8> %b) #0 {
5916// CHECK:   [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5917// CHECK:   ret <8 x i8> [[VSRI_N]]
5918int8x8_t test_vsri_n_s8(int8x8_t a, int8x8_t b) {
5919  return vsri_n_s8(a, b, 3);
5920}
5921
5922// CHECK-LABEL: define <4 x i16> @test_vsri_n_s16(<4 x i16> %a, <4 x i16> %b) #0 {
5923// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5924// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5925// CHECK:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5926// CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5927// CHECK:   [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 3)
5928// CHECK:   ret <4 x i16> [[VSRI_N2]]
5929int16x4_t test_vsri_n_s16(int16x4_t a, int16x4_t b) {
5930  return vsri_n_s16(a, b, 3);
5931}
5932
5933// CHECK-LABEL: define <2 x i32> @test_vsri_n_s32(<2 x i32> %a, <2 x i32> %b) #0 {
5934// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5935// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5936// CHECK:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5937// CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5938// CHECK:   [[VSRI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> [[VSRI_N]], <2 x i32> [[VSRI_N1]], i32 3)
5939// CHECK:   ret <2 x i32> [[VSRI_N2]]
5940int32x2_t test_vsri_n_s32(int32x2_t a, int32x2_t b) {
5941  return vsri_n_s32(a, b, 3);
5942}
5943
5944// CHECK-LABEL: define <16 x i8> @test_vsriq_n_s8(<16 x i8> %a, <16 x i8> %b) #0 {
5945// CHECK:   [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
5946// CHECK:   ret <16 x i8> [[VSRI_N]]
5947int8x16_t test_vsriq_n_s8(int8x16_t a, int8x16_t b) {
5948  return vsriq_n_s8(a, b, 3);
5949}
5950
5951// CHECK-LABEL: define <8 x i16> @test_vsriq_n_s16(<8 x i16> %a, <8 x i16> %b) #0 {
5952// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5953// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5954// CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5955// CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5956// CHECK:   [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 3)
5957// CHECK:   ret <8 x i16> [[VSRI_N2]]
5958int16x8_t test_vsriq_n_s16(int16x8_t a, int16x8_t b) {
5959  return vsriq_n_s16(a, b, 3);
5960}
5961
5962// CHECK-LABEL: define <4 x i32> @test_vsriq_n_s32(<4 x i32> %a, <4 x i32> %b) #0 {
5963// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5964// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5965// CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5966// CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5967// CHECK:   [[VSRI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> [[VSRI_N]], <4 x i32> [[VSRI_N1]], i32 3)
5968// CHECK:   ret <4 x i32> [[VSRI_N2]]
5969int32x4_t test_vsriq_n_s32(int32x4_t a, int32x4_t b) {
5970  return vsriq_n_s32(a, b, 3);
5971}
5972
5973// CHECK-LABEL: define <2 x i64> @test_vsriq_n_s64(<2 x i64> %a, <2 x i64> %b) #0 {
5974// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5975// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5976// CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5977// CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5978// CHECK:   [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 3)
5979// CHECK:   ret <2 x i64> [[VSRI_N2]]
5980int64x2_t test_vsriq_n_s64(int64x2_t a, int64x2_t b) {
5981  return vsriq_n_s64(a, b, 3);
5982}
5983
5984// CHECK-LABEL: define <8 x i8> @test_vsri_n_u8(<8 x i8> %a, <8 x i8> %b) #0 {
5985// CHECK:   [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5986// CHECK:   ret <8 x i8> [[VSRI_N]]
5987int8x8_t test_vsri_n_u8(int8x8_t a, int8x8_t b) {
5988  return vsri_n_u8(a, b, 3);
5989}
5990
5991// CHECK-LABEL: define <4 x i16> @test_vsri_n_u16(<4 x i16> %a, <4 x i16> %b) #0 {
5992// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5993// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5994// CHECK:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5995// CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5996// CHECK:   [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 3)
5997// CHECK:   ret <4 x i16> [[VSRI_N2]]
5998int16x4_t test_vsri_n_u16(int16x4_t a, int16x4_t b) {
5999  return vsri_n_u16(a, b, 3);
6000}
6001
6002// CHECK-LABEL: define <2 x i32> @test_vsri_n_u32(<2 x i32> %a, <2 x i32> %b) #0 {
6003// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6004// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6005// CHECK:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6006// CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
6007// CHECK:   [[VSRI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> [[VSRI_N]], <2 x i32> [[VSRI_N1]], i32 3)
6008// CHECK:   ret <2 x i32> [[VSRI_N2]]
6009int32x2_t test_vsri_n_u32(int32x2_t a, int32x2_t b) {
6010  return vsri_n_u32(a, b, 3);
6011}
6012
6013// CHECK-LABEL: define <16 x i8> @test_vsriq_n_u8(<16 x i8> %a, <16 x i8> %b) #0 {
6014// CHECK:   [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
6015// CHECK:   ret <16 x i8> [[VSRI_N]]
6016int8x16_t test_vsriq_n_u8(int8x16_t a, int8x16_t b) {
6017  return vsriq_n_u8(a, b, 3);
6018}
6019
6020// CHECK-LABEL: define <8 x i16> @test_vsriq_n_u16(<8 x i16> %a, <8 x i16> %b) #0 {
6021// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6022// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6023// CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6024// CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
6025// CHECK:   [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 3)
6026// CHECK:   ret <8 x i16> [[VSRI_N2]]
6027int16x8_t test_vsriq_n_u16(int16x8_t a, int16x8_t b) {
6028  return vsriq_n_u16(a, b, 3);
6029}
6030
6031// CHECK-LABEL: define <4 x i32> @test_vsriq_n_u32(<4 x i32> %a, <4 x i32> %b) #0 {
6032// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6033// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6034// CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6035// CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
6036// CHECK:   [[VSRI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> [[VSRI_N]], <4 x i32> [[VSRI_N1]], i32 3)
6037// CHECK:   ret <4 x i32> [[VSRI_N2]]
6038int32x4_t test_vsriq_n_u32(int32x4_t a, int32x4_t b) {
6039  return vsriq_n_u32(a, b, 3);
6040}
6041
6042// CHECK-LABEL: define <2 x i64> @test_vsriq_n_u64(<2 x i64> %a, <2 x i64> %b) #0 {
6043// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6044// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6045// CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6046// CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
6047// CHECK:   [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 3)
6048// CHECK:   ret <2 x i64> [[VSRI_N2]]
6049int64x2_t test_vsriq_n_u64(int64x2_t a, int64x2_t b) {
6050  return vsriq_n_u64(a, b, 3);
6051}
6052
6053// CHECK-LABEL: define <8 x i8> @test_vsri_n_p8(<8 x i8> %a, <8 x i8> %b) #0 {
6054// CHECK:   [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
6055// CHECK:   ret <8 x i8> [[VSRI_N]]
6056poly8x8_t test_vsri_n_p8(poly8x8_t a, poly8x8_t b) {
6057  return vsri_n_p8(a, b, 3);
6058}
6059
6060// CHECK-LABEL: define <4 x i16> @test_vsri_n_p16(<4 x i16> %a, <4 x i16> %b) #0 {
6061// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6062// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6063// CHECK:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6064// CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
6065// CHECK:   [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 15)
6066// CHECK:   ret <4 x i16> [[VSRI_N2]]
6067poly16x4_t test_vsri_n_p16(poly16x4_t a, poly16x4_t b) {
6068  return vsri_n_p16(a, b, 15);
6069}
6070
6071// CHECK-LABEL: define <16 x i8> @test_vsriq_n_p8(<16 x i8> %a, <16 x i8> %b) #0 {
6072// CHECK:   [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
6073// CHECK:   ret <16 x i8> [[VSRI_N]]
6074poly8x16_t test_vsriq_n_p8(poly8x16_t a, poly8x16_t b) {
6075  return vsriq_n_p8(a, b, 3);
6076}
6077
6078// CHECK-LABEL: define <8 x i16> @test_vsriq_n_p16(<8 x i16> %a, <8 x i16> %b) #0 {
6079// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6080// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6081// CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6082// CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
6083// CHECK:   [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 15)
6084// CHECK:   ret <8 x i16> [[VSRI_N2]]
6085poly16x8_t test_vsriq_n_p16(poly16x8_t a, poly16x8_t b) {
6086  return vsriq_n_p16(a, b, 15);
6087}
6088
6089// CHECK-LABEL: define <8 x i8> @test_vsli_n_s8(<8 x i8> %a, <8 x i8> %b) #0 {
6090// CHECK:   [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
6091// CHECK:   ret <8 x i8> [[VSLI_N]]
6092int8x8_t test_vsli_n_s8(int8x8_t a, int8x8_t b) {
6093  return vsli_n_s8(a, b, 3);
6094}
6095
6096// CHECK-LABEL: define <4 x i16> @test_vsli_n_s16(<4 x i16> %a, <4 x i16> %b) #0 {
6097// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6098// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6099// CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6100// CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
6101// CHECK:   [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 3)
6102// CHECK:   ret <4 x i16> [[VSLI_N2]]
6103int16x4_t test_vsli_n_s16(int16x4_t a, int16x4_t b) {
6104  return vsli_n_s16(a, b, 3);
6105}
6106
6107// CHECK-LABEL: define <2 x i32> @test_vsli_n_s32(<2 x i32> %a, <2 x i32> %b) #0 {
6108// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6109// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6110// CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6111// CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
6112// CHECK:   [[VSLI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], i32 3)
6113// CHECK:   ret <2 x i32> [[VSLI_N2]]
6114int32x2_t test_vsli_n_s32(int32x2_t a, int32x2_t b) {
6115  return vsli_n_s32(a, b, 3);
6116}
6117
6118// CHECK-LABEL: define <16 x i8> @test_vsliq_n_s8(<16 x i8> %a, <16 x i8> %b) #0 {
6119// CHECK:   [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
6120// CHECK:   ret <16 x i8> [[VSLI_N]]
6121int8x16_t test_vsliq_n_s8(int8x16_t a, int8x16_t b) {
6122  return vsliq_n_s8(a, b, 3);
6123}
6124
6125// CHECK-LABEL: define <8 x i16> @test_vsliq_n_s16(<8 x i16> %a, <8 x i16> %b) #0 {
6126// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6127// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6128// CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6129// CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
6130// CHECK:   [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 3)
6131// CHECK:   ret <8 x i16> [[VSLI_N2]]
6132int16x8_t test_vsliq_n_s16(int16x8_t a, int16x8_t b) {
6133  return vsliq_n_s16(a, b, 3);
6134}
6135
6136// CHECK-LABEL: define <4 x i32> @test_vsliq_n_s32(<4 x i32> %a, <4 x i32> %b) #0 {
6137// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6138// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6139// CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6140// CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
6141// CHECK:   [[VSLI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], i32 3)
6142// CHECK:   ret <4 x i32> [[VSLI_N2]]
6143int32x4_t test_vsliq_n_s32(int32x4_t a, int32x4_t b) {
6144  return vsliq_n_s32(a, b, 3);
6145}
6146
6147// CHECK-LABEL: define <2 x i64> @test_vsliq_n_s64(<2 x i64> %a, <2 x i64> %b) #0 {
6148// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6149// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6150// CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6151// CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
6152// CHECK:   [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 3)
6153// CHECK:   ret <2 x i64> [[VSLI_N2]]
6154int64x2_t test_vsliq_n_s64(int64x2_t a, int64x2_t b) {
6155  return vsliq_n_s64(a, b, 3);
6156}
6157
6158// CHECK-LABEL: define <8 x i8> @test_vsli_n_u8(<8 x i8> %a, <8 x i8> %b) #0 {
6159// CHECK:   [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
6160// CHECK:   ret <8 x i8> [[VSLI_N]]
6161uint8x8_t test_vsli_n_u8(uint8x8_t a, uint8x8_t b) {
6162  return vsli_n_u8(a, b, 3);
6163}
6164
6165// CHECK-LABEL: define <4 x i16> @test_vsli_n_u16(<4 x i16> %a, <4 x i16> %b) #0 {
6166// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6167// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6168// CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6169// CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
6170// CHECK:   [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 3)
6171// CHECK:   ret <4 x i16> [[VSLI_N2]]
6172uint16x4_t test_vsli_n_u16(uint16x4_t a, uint16x4_t b) {
6173  return vsli_n_u16(a, b, 3);
6174}
6175
6176// CHECK-LABEL: define <2 x i32> @test_vsli_n_u32(<2 x i32> %a, <2 x i32> %b) #0 {
6177// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6178// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6179// CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6180// CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
6181// CHECK:   [[VSLI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], i32 3)
6182// CHECK:   ret <2 x i32> [[VSLI_N2]]
6183uint32x2_t test_vsli_n_u32(uint32x2_t a, uint32x2_t b) {
6184  return vsli_n_u32(a, b, 3);
6185}
6186
6187// CHECK-LABEL: define <16 x i8> @test_vsliq_n_u8(<16 x i8> %a, <16 x i8> %b) #0 {
6188// CHECK:   [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
6189// CHECK:   ret <16 x i8> [[VSLI_N]]
6190uint8x16_t test_vsliq_n_u8(uint8x16_t a, uint8x16_t b) {
6191  return vsliq_n_u8(a, b, 3);
6192}
6193
6194// CHECK-LABEL: define <8 x i16> @test_vsliq_n_u16(<8 x i16> %a, <8 x i16> %b) #0 {
6195// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6196// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6197// CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6198// CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
6199// CHECK:   [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 3)
6200// CHECK:   ret <8 x i16> [[VSLI_N2]]
6201uint16x8_t test_vsliq_n_u16(uint16x8_t a, uint16x8_t b) {
6202  return vsliq_n_u16(a, b, 3);
6203}
6204
6205// CHECK-LABEL: define <4 x i32> @test_vsliq_n_u32(<4 x i32> %a, <4 x i32> %b) #0 {
6206// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6207// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6208// CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6209// CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
6210// CHECK:   [[VSLI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], i32 3)
6211// CHECK:   ret <4 x i32> [[VSLI_N2]]
6212uint32x4_t test_vsliq_n_u32(uint32x4_t a, uint32x4_t b) {
6213  return vsliq_n_u32(a, b, 3);
6214}
6215
6216// CHECK-LABEL: define <2 x i64> @test_vsliq_n_u64(<2 x i64> %a, <2 x i64> %b) #0 {
6217// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6218// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6219// CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6220// CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
6221// CHECK:   [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 3)
6222// CHECK:   ret <2 x i64> [[VSLI_N2]]
6223uint64x2_t test_vsliq_n_u64(uint64x2_t a, uint64x2_t b) {
6224  return vsliq_n_u64(a, b, 3);
6225}
6226
6227// CHECK-LABEL: define <8 x i8> @test_vsli_n_p8(<8 x i8> %a, <8 x i8> %b) #0 {
6228// CHECK:   [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
6229// CHECK:   ret <8 x i8> [[VSLI_N]]
6230poly8x8_t test_vsli_n_p8(poly8x8_t a, poly8x8_t b) {
6231  return vsli_n_p8(a, b, 3);
6232}
6233
6234// CHECK-LABEL: define <4 x i16> @test_vsli_n_p16(<4 x i16> %a, <4 x i16> %b) #0 {
6235// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6236// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6237// CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6238// CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
6239// CHECK:   [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 15)
6240// CHECK:   ret <4 x i16> [[VSLI_N2]]
6241poly16x4_t test_vsli_n_p16(poly16x4_t a, poly16x4_t b) {
6242  return vsli_n_p16(a, b, 15);
6243}
6244
6245// CHECK-LABEL: define <16 x i8> @test_vsliq_n_p8(<16 x i8> %a, <16 x i8> %b) #0 {
6246// CHECK:   [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
6247// CHECK:   ret <16 x i8> [[VSLI_N]]
6248poly8x16_t test_vsliq_n_p8(poly8x16_t a, poly8x16_t b) {
6249  return vsliq_n_p8(a, b, 3);
6250}
6251
6252// CHECK-LABEL: define <8 x i16> @test_vsliq_n_p16(<8 x i16> %a, <8 x i16> %b) #0 {
6253// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6254// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6255// CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6256// CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
6257// CHECK:   [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 15)
6258// CHECK:   ret <8 x i16> [[VSLI_N2]]
6259poly16x8_t test_vsliq_n_p16(poly16x8_t a, poly16x8_t b) {
6260  return vsliq_n_p16(a, b, 15);
6261}
6262
6263// CHECK-LABEL: define <8 x i8> @test_vqshlu_n_s8(<8 x i8> %a) #0 {
6264// CHECK:   [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %a, <8 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
6265// CHECK:   ret <8 x i8> [[VQSHLU_N]]
6266int8x8_t test_vqshlu_n_s8(int8x8_t a) {
6267  return vqshlu_n_s8(a, 3);
6268}
6269
6270// CHECK-LABEL: define <4 x i16> @test_vqshlu_n_s16(<4 x i16> %a) #0 {
6271// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6272// CHECK:   [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6273// CHECK:   [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> <i16 3, i16 3, i16 3, i16 3>)
6274// CHECK:   ret <4 x i16> [[VQSHLU_N1]]
6275int16x4_t test_vqshlu_n_s16(int16x4_t a) {
6276  return vqshlu_n_s16(a, 3);
6277}
6278
6279// CHECK-LABEL: define <2 x i32> @test_vqshlu_n_s32(<2 x i32> %a) #0 {
6280// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6281// CHECK:   [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6282// CHECK:   [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> <i32 3, i32 3>)
6283// CHECK:   ret <2 x i32> [[VQSHLU_N1]]
6284int32x2_t test_vqshlu_n_s32(int32x2_t a) {
6285  return vqshlu_n_s32(a, 3);
6286}
6287
6288// CHECK-LABEL: define <16 x i8> @test_vqshluq_n_s8(<16 x i8> %a) #0 {
6289// CHECK:   [[VQSHLU_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %a, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
6290// CHECK:   ret <16 x i8> [[VQSHLU_N]]
6291int8x16_t test_vqshluq_n_s8(int8x16_t a) {
6292  return vqshluq_n_s8(a, 3);
6293}
6294
6295// CHECK-LABEL: define <8 x i16> @test_vqshluq_n_s16(<8 x i16> %a) #0 {
6296// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6297// CHECK:   [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6298// CHECK:   [[VQSHLU_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[VQSHLU_N]], <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
6299// CHECK:   ret <8 x i16> [[VQSHLU_N1]]
6300int16x8_t test_vqshluq_n_s16(int16x8_t a) {
6301  return vqshluq_n_s16(a, 3);
6302}
6303
6304// CHECK-LABEL: define <4 x i32> @test_vqshluq_n_s32(<4 x i32> %a) #0 {
6305// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6306// CHECK:   [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6307// CHECK:   [[VQSHLU_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[VQSHLU_N]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
6308// CHECK:   ret <4 x i32> [[VQSHLU_N1]]
6309int32x4_t test_vqshluq_n_s32(int32x4_t a) {
6310  return vqshluq_n_s32(a, 3);
6311}
6312
6313// CHECK-LABEL: define <2 x i64> @test_vqshluq_n_s64(<2 x i64> %a) #0 {
6314// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6315// CHECK:   [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6316// CHECK:   [[VQSHLU_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[VQSHLU_N]], <2 x i64> <i64 3, i64 3>)
6317// CHECK:   ret <2 x i64> [[VQSHLU_N1]]
6318int64x2_t test_vqshluq_n_s64(int64x2_t a) {
6319  return vqshluq_n_s64(a, 3);
6320}
6321
6322// CHECK-LABEL: define <8 x i8> @test_vshrn_n_s16(<8 x i16> %a) #0 {
6323// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6324// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6325// CHECK:   [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6326// CHECK:   [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
6327// CHECK:   ret <8 x i8> [[VSHRN_N]]
6328int8x8_t test_vshrn_n_s16(int16x8_t a) {
6329  return vshrn_n_s16(a, 3);
6330}
6331
6332// CHECK-LABEL: define <4 x i16> @test_vshrn_n_s32(<4 x i32> %a) #0 {
6333// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6334// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6335// CHECK:   [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
6336// CHECK:   [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
6337// CHECK:   ret <4 x i16> [[VSHRN_N]]
6338int16x4_t test_vshrn_n_s32(int32x4_t a) {
6339  return vshrn_n_s32(a, 9);
6340}
6341
6342// CHECK-LABEL: define <2 x i32> @test_vshrn_n_s64(<2 x i64> %a) #0 {
6343// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6344// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6345// CHECK:   [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], <i64 19, i64 19>
6346// CHECK:   [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
6347// CHECK:   ret <2 x i32> [[VSHRN_N]]
6348int32x2_t test_vshrn_n_s64(int64x2_t a) {
6349  return vshrn_n_s64(a, 19);
6350}
6351
6352// CHECK-LABEL: define <8 x i8> @test_vshrn_n_u16(<8 x i16> %a) #0 {
6353// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6354// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6355// CHECK:   [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6356// CHECK:   [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
6357// CHECK:   ret <8 x i8> [[VSHRN_N]]
6358uint8x8_t test_vshrn_n_u16(uint16x8_t a) {
6359  return vshrn_n_u16(a, 3);
6360}
6361
6362// CHECK-LABEL: define <4 x i16> @test_vshrn_n_u32(<4 x i32> %a) #0 {
6363// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6364// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6365// CHECK:   [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
6366// CHECK:   [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
6367// CHECK:   ret <4 x i16> [[VSHRN_N]]
6368uint16x4_t test_vshrn_n_u32(uint32x4_t a) {
6369  return vshrn_n_u32(a, 9);
6370}
6371
6372// CHECK-LABEL: define <2 x i32> @test_vshrn_n_u64(<2 x i64> %a) #0 {
6373// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6374// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6375// CHECK:   [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], <i64 19, i64 19>
6376// CHECK:   [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
6377// CHECK:   ret <2 x i32> [[VSHRN_N]]
6378uint32x2_t test_vshrn_n_u64(uint64x2_t a) {
6379  return vshrn_n_u64(a, 19);
6380}
6381
6382// CHECK-LABEL: define <16 x i8> @test_vshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) #0 {
6383// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6384// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6385// CHECK:   [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6386// CHECK:   [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
6387// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VSHRN_N]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6388// CHECK:   ret <16 x i8> [[SHUFFLE_I]]
6389int8x16_t test_vshrn_high_n_s16(int8x8_t a, int16x8_t b) {
6390  return vshrn_high_n_s16(a, b, 3);
6391}
6392
6393// CHECK-LABEL: define <8 x i16> @test_vshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) #0 {
6394// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6395// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6396// CHECK:   [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
6397// CHECK:   [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
6398// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VSHRN_N]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6399// CHECK:   ret <8 x i16> [[SHUFFLE_I]]
6400int16x8_t test_vshrn_high_n_s32(int16x4_t a, int32x4_t b) {
6401  return vshrn_high_n_s32(a, b, 9);
6402}
6403
6404// CHECK-LABEL: define <4 x i32> @test_vshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) #0 {
6405// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6406// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6407// CHECK:   [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], <i64 19, i64 19>
6408// CHECK:   [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
6409// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VSHRN_N]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6410// CHECK:   ret <4 x i32> [[SHUFFLE_I]]
6411int32x4_t test_vshrn_high_n_s64(int32x2_t a, int64x2_t b) {
6412  return vshrn_high_n_s64(a, b, 19);
6413}
6414
6415// CHECK-LABEL: define <16 x i8> @test_vshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) #0 {
6416// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6417// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6418// CHECK:   [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6419// CHECK:   [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
6420// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VSHRN_N]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6421// CHECK:   ret <16 x i8> [[SHUFFLE_I]]
6422uint8x16_t test_vshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
6423  return vshrn_high_n_u16(a, b, 3);
6424}
6425
6426// CHECK-LABEL: define <8 x i16> @test_vshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) #0 {
6427// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6428// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6429// CHECK:   [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
6430// CHECK:   [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
6431// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VSHRN_N]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6432// CHECK:   ret <8 x i16> [[SHUFFLE_I]]
6433uint16x8_t test_vshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
6434  return vshrn_high_n_u32(a, b, 9);
6435}
6436
6437// CHECK-LABEL: define <4 x i32> @test_vshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) #0 {
6438// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6439// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6440// CHECK:   [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], <i64 19, i64 19>
6441// CHECK:   [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
6442// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VSHRN_N]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6443// CHECK:   ret <4 x i32> [[SHUFFLE_I]]
6444uint32x4_t test_vshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
6445  return vshrn_high_n_u64(a, b, 19);
6446}
6447
6448// CHECK-LABEL: define <8 x i8> @test_vqshrun_n_s16(<8 x i16> %a) #0 {
6449// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6450// CHECK:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6451// CHECK:   [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[VQSHRUN_N]], i32 3)
6452// CHECK:   ret <8 x i8> [[VQSHRUN_N1]]
6453int8x8_t test_vqshrun_n_s16(int16x8_t a) {
6454  return vqshrun_n_s16(a, 3);
6455}
6456
6457// CHECK-LABEL: define <4 x i16> @test_vqshrun_n_s32(<4 x i32> %a) #0 {
6458// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6459// CHECK:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6460// CHECK:   [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[VQSHRUN_N]], i32 9)
6461// CHECK:   ret <4 x i16> [[VQSHRUN_N1]]
6462int16x4_t test_vqshrun_n_s32(int32x4_t a) {
6463  return vqshrun_n_s32(a, 9);
6464}
6465
6466// CHECK-LABEL: define <2 x i32> @test_vqshrun_n_s64(<2 x i64> %a) #0 {
6467// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6468// CHECK:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6469// CHECK:   [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[VQSHRUN_N]], i32 19)
6470// CHECK:   ret <2 x i32> [[VQSHRUN_N1]]
6471int32x2_t test_vqshrun_n_s64(int64x2_t a) {
6472  return vqshrun_n_s64(a, 19);
6473}
6474
6475// CHECK-LABEL: define <16 x i8> @test_vqshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) #0 {
6476// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6477// CHECK:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6478// CHECK:   [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[VQSHRUN_N]], i32 3)
6479// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRUN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6480// CHECK:   ret <16 x i8> [[SHUFFLE_I]]
6481int8x16_t test_vqshrun_high_n_s16(int8x8_t a, int16x8_t b) {
6482  return vqshrun_high_n_s16(a, b, 3);
6483}
6484
6485// CHECK-LABEL: define <8 x i16> @test_vqshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) #0 {
6486// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6487// CHECK:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6488// CHECK:   [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[VQSHRUN_N]], i32 9)
6489// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRUN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6490// CHECK:   ret <8 x i16> [[SHUFFLE_I]]
6491int16x8_t test_vqshrun_high_n_s32(int16x4_t a, int32x4_t b) {
6492  return vqshrun_high_n_s32(a, b, 9);
6493}
6494
6495// CHECK-LABEL: define <4 x i32> @test_vqshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) #0 {
6496// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6497// CHECK:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6498// CHECK:   [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[VQSHRUN_N]], i32 19)
6499// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRUN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6500// CHECK:   ret <4 x i32> [[SHUFFLE_I]]
6501int32x4_t test_vqshrun_high_n_s64(int32x2_t a, int64x2_t b) {
6502  return vqshrun_high_n_s64(a, b, 19);
6503}
6504
6505// CHECK-LABEL: define <8 x i8> @test_vrshrn_n_s16(<8 x i16> %a) #0 {
6506// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6507// CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6508// CHECK:   [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
6509// CHECK:   ret <8 x i8> [[VRSHRN_N1]]
6510int8x8_t test_vrshrn_n_s16(int16x8_t a) {
6511  return vrshrn_n_s16(a, 3);
6512}
6513
6514// CHECK-LABEL: define <4 x i16> @test_vrshrn_n_s32(<4 x i32> %a) #0 {
6515// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6516// CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6517// CHECK:   [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
6518// CHECK:   ret <4 x i16> [[VRSHRN_N1]]
6519int16x4_t test_vrshrn_n_s32(int32x4_t a) {
6520  return vrshrn_n_s32(a, 9);
6521}
6522
6523// CHECK-LABEL: define <2 x i32> @test_vrshrn_n_s64(<2 x i64> %a) #0 {
6524// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6525// CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6526// CHECK:   [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
6527// CHECK:   ret <2 x i32> [[VRSHRN_N1]]
6528int32x2_t test_vrshrn_n_s64(int64x2_t a) {
6529  return vrshrn_n_s64(a, 19);
6530}
6531
6532// CHECK-LABEL: define <8 x i8> @test_vrshrn_n_u16(<8 x i16> %a) #0 {
6533// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6534// CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6535// CHECK:   [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
6536// CHECK:   ret <8 x i8> [[VRSHRN_N1]]
6537uint8x8_t test_vrshrn_n_u16(uint16x8_t a) {
6538  return vrshrn_n_u16(a, 3);
6539}
6540
6541// CHECK-LABEL: define <4 x i16> @test_vrshrn_n_u32(<4 x i32> %a) #0 {
6542// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6543// CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6544// CHECK:   [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
6545// CHECK:   ret <4 x i16> [[VRSHRN_N1]]
6546uint16x4_t test_vrshrn_n_u32(uint32x4_t a) {
6547  return vrshrn_n_u32(a, 9);
6548}
6549
6550// CHECK-LABEL: define <2 x i32> @test_vrshrn_n_u64(<2 x i64> %a) #0 {
6551// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6552// CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6553// CHECK:   [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
6554// CHECK:   ret <2 x i32> [[VRSHRN_N1]]
6555uint32x2_t test_vrshrn_n_u64(uint64x2_t a) {
6556  return vrshrn_n_u64(a, 19);
6557}
6558
6559// CHECK-LABEL: define <16 x i8> @test_vrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) #0 {
6560// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6561// CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6562// CHECK:   [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
6563// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6564// CHECK:   ret <16 x i8> [[SHUFFLE_I]]
6565int8x16_t test_vrshrn_high_n_s16(int8x8_t a, int16x8_t b) {
6566  return vrshrn_high_n_s16(a, b, 3);
6567}
6568
6569// CHECK-LABEL: define <8 x i16> @test_vrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) #0 {
6570// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6571// CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6572// CHECK:   [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
6573// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6574// CHECK:   ret <8 x i16> [[SHUFFLE_I]]
6575int16x8_t test_vrshrn_high_n_s32(int16x4_t a, int32x4_t b) {
6576  return vrshrn_high_n_s32(a, b, 9);
6577}
6578
6579// CHECK-LABEL: define <4 x i32> @test_vrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) #0 {
6580// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6581// CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6582// CHECK:   [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
6583// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6584// CHECK:   ret <4 x i32> [[SHUFFLE_I]]
6585int32x4_t test_vrshrn_high_n_s64(int32x2_t a, int64x2_t b) {
6586  return vrshrn_high_n_s64(a, b, 19);
6587}
6588
6589// CHECK-LABEL: define <16 x i8> @test_vrshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) #0 {
6590// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6591// CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6592// CHECK:   [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
6593// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6594// CHECK:   ret <16 x i8> [[SHUFFLE_I]]
6595uint8x16_t test_vrshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
6596  return vrshrn_high_n_u16(a, b, 3);
6597}
6598
6599// CHECK-LABEL: define <8 x i16> @test_vrshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) #0 {
6600// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6601// CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6602// CHECK:   [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
6603// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6604// CHECK:   ret <8 x i16> [[SHUFFLE_I]]
6605uint16x8_t test_vrshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
6606  return vrshrn_high_n_u32(a, b, 9);
6607}
6608
6609// CHECK-LABEL: define <4 x i32> @test_vrshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) #0 {
6610// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6611// CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6612// CHECK:   [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
6613// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6614// CHECK:   ret <4 x i32> [[SHUFFLE_I]]
6615uint32x4_t test_vrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
6616  return vrshrn_high_n_u64(a, b, 19);
6617}
6618
6619// CHECK-LABEL: define <8 x i8> @test_vqrshrun_n_s16(<8 x i16> %a) #0 {
6620// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6621// CHECK:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6622// CHECK:   [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 3)
6623// CHECK:   ret <8 x i8> [[VQRSHRUN_N1]]
6624int8x8_t test_vqrshrun_n_s16(int16x8_t a) {
6625  return vqrshrun_n_s16(a, 3);
6626}
6627
6628// CHECK-LABEL: define <4 x i16> @test_vqrshrun_n_s32(<4 x i32> %a) #0 {
6629// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6630// CHECK:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6631// CHECK:   [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 9)
6632// CHECK:   ret <4 x i16> [[VQRSHRUN_N1]]
6633int16x4_t test_vqrshrun_n_s32(int32x4_t a) {
6634  return vqrshrun_n_s32(a, 9);
6635}
6636
6637// CHECK-LABEL: define <2 x i32> @test_vqrshrun_n_s64(<2 x i64> %a) #0 {
6638// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6639// CHECK:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6640// CHECK:   [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 19)
6641// CHECK:   ret <2 x i32> [[VQRSHRUN_N1]]
6642int32x2_t test_vqrshrun_n_s64(int64x2_t a) {
6643  return vqrshrun_n_s64(a, 19);
6644}
6645
6646// CHECK-LABEL: define <16 x i8> @test_vqrshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) #0 {
6647// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6648// CHECK:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6649// CHECK:   [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 3)
6650// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRUN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6651// CHECK:   ret <16 x i8> [[SHUFFLE_I]]
6652int8x16_t test_vqrshrun_high_n_s16(int8x8_t a, int16x8_t b) {
6653  return vqrshrun_high_n_s16(a, b, 3);
6654}
6655
6656// CHECK-LABEL: define <8 x i16> @test_vqrshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) #0 {
6657// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6658// CHECK:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6659// CHECK:   [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 9)
6660// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRUN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6661// CHECK:   ret <8 x i16> [[SHUFFLE_I]]
6662int16x8_t test_vqrshrun_high_n_s32(int16x4_t a, int32x4_t b) {
6663  return vqrshrun_high_n_s32(a, b, 9);
6664}
6665
6666// CHECK-LABEL: define <4 x i32> @test_vqrshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) #0 {
6667// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6668// CHECK:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6669// CHECK:   [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 19)
6670// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRUN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6671// CHECK:   ret <4 x i32> [[SHUFFLE_I]]
6672int32x4_t test_vqrshrun_high_n_s64(int32x2_t a, int64x2_t b) {
6673  return vqrshrun_high_n_s64(a, b, 19);
6674}
6675
6676// CHECK-LABEL: define <8 x i8> @test_vqshrn_n_s16(<8 x i16> %a) #0 {
6677// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6678// CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6679// CHECK:   [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
6680// CHECK:   ret <8 x i8> [[VQSHRN_N1]]
6681int8x8_t test_vqshrn_n_s16(int16x8_t a) {
6682  return vqshrn_n_s16(a, 3);
6683}
6684
6685// CHECK-LABEL: define <4 x i16> @test_vqshrn_n_s32(<4 x i32> %a) #0 {
6686// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6687// CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6688// CHECK:   [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
6689// CHECK:   ret <4 x i16> [[VQSHRN_N1]]
6690int16x4_t test_vqshrn_n_s32(int32x4_t a) {
6691  return vqshrn_n_s32(a, 9);
6692}
6693
6694// CHECK-LABEL: define <2 x i32> @test_vqshrn_n_s64(<2 x i64> %a) #0 {
6695// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6696// CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6697// CHECK:   [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
6698// CHECK:   ret <2 x i32> [[VQSHRN_N1]]
6699int32x2_t test_vqshrn_n_s64(int64x2_t a) {
6700  return vqshrn_n_s64(a, 19);
6701}
6702
6703// CHECK-LABEL: define <8 x i8> @test_vqshrn_n_u16(<8 x i16> %a) #0 {
6704// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6705// CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6706// CHECK:   [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
6707// CHECK:   ret <8 x i8> [[VQSHRN_N1]]
6708uint8x8_t test_vqshrn_n_u16(uint16x8_t a) {
6709  return vqshrn_n_u16(a, 3);
6710}
6711
6712// CHECK-LABEL: define <4 x i16> @test_vqshrn_n_u32(<4 x i32> %a) #0 {
6713// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6714// CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6715// CHECK:   [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
6716// CHECK:   ret <4 x i16> [[VQSHRN_N1]]
6717uint16x4_t test_vqshrn_n_u32(uint32x4_t a) {
6718  return vqshrn_n_u32(a, 9);
6719}
6720
6721// CHECK-LABEL: define <2 x i32> @test_vqshrn_n_u64(<2 x i64> %a) #0 {
6722// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6723// CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6724// CHECK:   [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
6725// CHECK:   ret <2 x i32> [[VQSHRN_N1]]
6726uint32x2_t test_vqshrn_n_u64(uint64x2_t a) {
6727  return vqshrn_n_u64(a, 19);
6728}
6729
6730// CHECK-LABEL: define <16 x i8> @test_vqshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) #0 {
6731// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6732// CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6733// CHECK:   [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
6734// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6735// CHECK:   ret <16 x i8> [[SHUFFLE_I]]
6736int8x16_t test_vqshrn_high_n_s16(int8x8_t a, int16x8_t b) {
6737  return vqshrn_high_n_s16(a, b, 3);
6738}
6739
6740// CHECK-LABEL: define <8 x i16> @test_vqshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) #0 {
6741// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6742// CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6743// CHECK:   [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
6744// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6745// CHECK:   ret <8 x i16> [[SHUFFLE_I]]
6746int16x8_t test_vqshrn_high_n_s32(int16x4_t a, int32x4_t b) {
6747  return vqshrn_high_n_s32(a, b, 9);
6748}
6749
6750// CHECK-LABEL: define <4 x i32> @test_vqshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) #0 {
6751// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6752// CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6753// CHECK:   [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
6754// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6755// CHECK:   ret <4 x i32> [[SHUFFLE_I]]
6756int32x4_t test_vqshrn_high_n_s64(int32x2_t a, int64x2_t b) {
6757  return vqshrn_high_n_s64(a, b, 19);
6758}
6759
6760// CHECK-LABEL: define <16 x i8> @test_vqshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) #0 {
6761// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6762// CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6763// CHECK:   [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
6764// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6765// CHECK:   ret <16 x i8> [[SHUFFLE_I]]
6766uint8x16_t test_vqshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
6767  return vqshrn_high_n_u16(a, b, 3);
6768}
6769
6770// CHECK-LABEL: define <8 x i16> @test_vqshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) #0 {
6771// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6772// CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6773// CHECK:   [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
6774// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6775// CHECK:   ret <8 x i16> [[SHUFFLE_I]]
6776uint16x8_t test_vqshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
6777  return vqshrn_high_n_u32(a, b, 9);
6778}
6779
6780// CHECK-LABEL: define <4 x i32> @test_vqshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) #0 {
6781// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6782// CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6783// CHECK:   [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
6784// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6785// CHECK:   ret <4 x i32> [[SHUFFLE_I]]
6786uint32x4_t test_vqshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
6787  return vqshrn_high_n_u64(a, b, 19);
6788}
6789
6790// CHECK-LABEL: define <8 x i8> @test_vqrshrn_n_s16(<8 x i16> %a) #0 {
6791// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6792// CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6793// CHECK:   [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
6794// CHECK:   ret <8 x i8> [[VQRSHRN_N1]]
6795int8x8_t test_vqrshrn_n_s16(int16x8_t a) {
6796  return vqrshrn_n_s16(a, 3);
6797}
6798
6799// CHECK-LABEL: define <4 x i16> @test_vqrshrn_n_s32(<4 x i32> %a) #0 {
6800// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6801// CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6802// CHECK:   [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
6803// CHECK:   ret <4 x i16> [[VQRSHRN_N1]]
6804int16x4_t test_vqrshrn_n_s32(int32x4_t a) {
6805  return vqrshrn_n_s32(a, 9);
6806}
6807
6808// CHECK-LABEL: define <2 x i32> @test_vqrshrn_n_s64(<2 x i64> %a) #0 {
6809// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6810// CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6811// CHECK:   [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
6812// CHECK:   ret <2 x i32> [[VQRSHRN_N1]]
6813int32x2_t test_vqrshrn_n_s64(int64x2_t a) {
6814  return vqrshrn_n_s64(a, 19);
6815}
6816
6817// CHECK-LABEL: define <8 x i8> @test_vqrshrn_n_u16(<8 x i16> %a) #0 {
6818// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6819// CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6820// CHECK:   [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
6821// CHECK:   ret <8 x i8> [[VQRSHRN_N1]]
6822uint8x8_t test_vqrshrn_n_u16(uint16x8_t a) {
6823  return vqrshrn_n_u16(a, 3);
6824}
6825
6826// CHECK-LABEL: define <4 x i16> @test_vqrshrn_n_u32(<4 x i32> %a) #0 {
6827// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6828// CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6829// CHECK:   [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
6830// CHECK:   ret <4 x i16> [[VQRSHRN_N1]]
6831uint16x4_t test_vqrshrn_n_u32(uint32x4_t a) {
6832  return vqrshrn_n_u32(a, 9);
6833}
6834
6835// CHECK-LABEL: define <2 x i32> @test_vqrshrn_n_u64(<2 x i64> %a) #0 {
6836// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6837// CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6838// CHECK:   [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
6839// CHECK:   ret <2 x i32> [[VQRSHRN_N1]]
6840uint32x2_t test_vqrshrn_n_u64(uint64x2_t a) {
6841  return vqrshrn_n_u64(a, 19);
6842}
6843
6844// CHECK-LABEL: define <16 x i8> @test_vqrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) #0 {
6845// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6846// CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6847// CHECK:   [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
6848// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6849// CHECK:   ret <16 x i8> [[SHUFFLE_I]]
6850int8x16_t test_vqrshrn_high_n_s16(int8x8_t a, int16x8_t b) {
6851  return vqrshrn_high_n_s16(a, b, 3);
6852}
6853
6854// CHECK-LABEL: define <8 x i16> @test_vqrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) #0 {
6855// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6856// CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6857// CHECK:   [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
6858// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6859// CHECK:   ret <8 x i16> [[SHUFFLE_I]]
6860int16x8_t test_vqrshrn_high_n_s32(int16x4_t a, int32x4_t b) {
6861  return vqrshrn_high_n_s32(a, b, 9);
6862}
6863
6864// CHECK-LABEL: define <4 x i32> @test_vqrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) #0 {
6865// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6866// CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6867// CHECK:   [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
6868// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6869// CHECK:   ret <4 x i32> [[SHUFFLE_I]]
6870int32x4_t test_vqrshrn_high_n_s64(int32x2_t a, int64x2_t b) {
6871  return vqrshrn_high_n_s64(a, b, 19);
6872}
6873
6874// CHECK-LABEL: define <16 x i8> @test_vqrshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) #0 {
6875// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6876// CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6877// CHECK:   [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
6878// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6879// CHECK:   ret <16 x i8> [[SHUFFLE_I]]
6880uint8x16_t test_vqrshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
6881  return vqrshrn_high_n_u16(a, b, 3);
6882}
6883
6884// CHECK-LABEL: define <8 x i16> @test_vqrshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) #0 {
6885// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6886// CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6887// CHECK:   [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
6888// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6889// CHECK:   ret <8 x i16> [[SHUFFLE_I]]
6890uint16x8_t test_vqrshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
6891  return vqrshrn_high_n_u32(a, b, 9);
6892}
6893
6894// CHECK-LABEL: define <4 x i32> @test_vqrshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) #0 {
6895// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6896// CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6897// CHECK:   [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
6898// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6899// CHECK:   ret <4 x i32> [[SHUFFLE_I]]
6900uint32x4_t test_vqrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
6901  return vqrshrn_high_n_u64(a, b, 19);
6902}
6903
6904// CHECK-LABEL: define <8 x i16> @test_vshll_n_s8(<8 x i8> %a) #0 {
6905// CHECK:   [[TMP0:%.*]] = sext <8 x i8> %a to <8 x i16>
6906// CHECK:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6907// CHECK:   ret <8 x i16> [[VSHLL_N]]
6908int16x8_t test_vshll_n_s8(int8x8_t a) {
6909  return vshll_n_s8(a, 3);
6910}
6911
6912// CHECK-LABEL: define <4 x i32> @test_vshll_n_s16(<4 x i16> %a) #0 {
6913// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6914// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6915// CHECK:   [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
6916// CHECK:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
6917// CHECK:   ret <4 x i32> [[VSHLL_N]]
6918int32x4_t test_vshll_n_s16(int16x4_t a) {
6919  return vshll_n_s16(a, 9);
6920}
6921
6922// CHECK-LABEL: define <2 x i64> @test_vshll_n_s32(<2 x i32> %a) #0 {
6923// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6924// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6925// CHECK:   [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
6926// CHECK:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
6927// CHECK:   ret <2 x i64> [[VSHLL_N]]
6928int64x2_t test_vshll_n_s32(int32x2_t a) {
6929  return vshll_n_s32(a, 19);
6930}
6931
6932// CHECK-LABEL: define <8 x i16> @test_vshll_n_u8(<8 x i8> %a) #0 {
6933// CHECK:   [[TMP0:%.*]] = zext <8 x i8> %a to <8 x i16>
6934// CHECK:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6935// CHECK:   ret <8 x i16> [[VSHLL_N]]
6936uint16x8_t test_vshll_n_u8(uint8x8_t a) {
6937  return vshll_n_u8(a, 3);
6938}
6939
6940// CHECK-LABEL: define <4 x i32> @test_vshll_n_u16(<4 x i16> %a) #0 {
6941// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6942// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6943// CHECK:   [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
6944// CHECK:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
6945// CHECK:   ret <4 x i32> [[VSHLL_N]]
6946uint32x4_t test_vshll_n_u16(uint16x4_t a) {
6947  return vshll_n_u16(a, 9);
6948}
6949
6950// CHECK-LABEL: define <2 x i64> @test_vshll_n_u32(<2 x i32> %a) #0 {
6951// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6952// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6953// CHECK:   [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
6954// CHECK:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
6955// CHECK:   ret <2 x i64> [[VSHLL_N]]
6956uint64x2_t test_vshll_n_u32(uint32x2_t a) {
6957  return vshll_n_u32(a, 19);
6958}
6959
6960// CHECK-LABEL: define <8 x i16> @test_vshll_high_n_s8(<16 x i8> %a) #0 {
6961// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6962// CHECK:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I]] to <8 x i16>
6963// CHECK:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6964// CHECK:   ret <8 x i16> [[VSHLL_N]]
6965int16x8_t test_vshll_high_n_s8(int8x16_t a) {
6966  return vshll_high_n_s8(a, 3);
6967}
6968
6969// CHECK-LABEL: define <4 x i32> @test_vshll_high_n_s16(<8 x i16> %a) #0 {
6970// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6971// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
6972// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6973// CHECK:   [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
6974// CHECK:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
6975// CHECK:   ret <4 x i32> [[VSHLL_N]]
6976int32x4_t test_vshll_high_n_s16(int16x8_t a) {
6977  return vshll_high_n_s16(a, 9);
6978}
6979
6980// CHECK-LABEL: define <2 x i64> @test_vshll_high_n_s32(<4 x i32> %a) #0 {
6981// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6982// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
6983// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6984// CHECK:   [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
6985// CHECK:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
6986// CHECK:   ret <2 x i64> [[VSHLL_N]]
6987int64x2_t test_vshll_high_n_s32(int32x4_t a) {
6988  return vshll_high_n_s32(a, 19);
6989}
6990
6991// CHECK-LABEL: define <8 x i16> @test_vshll_high_n_u8(<16 x i8> %a) #0 {
6992// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6993// CHECK:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I]] to <8 x i16>
6994// CHECK:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6995// CHECK:   ret <8 x i16> [[VSHLL_N]]
6996uint16x8_t test_vshll_high_n_u8(uint8x16_t a) {
6997  return vshll_high_n_u8(a, 3);
6998}
6999
7000// CHECK-LABEL: define <4 x i32> @test_vshll_high_n_u16(<8 x i16> %a) #0 {
7001// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7002// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
7003// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7004// CHECK:   [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
7005// CHECK:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
7006// CHECK:   ret <4 x i32> [[VSHLL_N]]
7007uint32x4_t test_vshll_high_n_u16(uint16x8_t a) {
7008  return vshll_high_n_u16(a, 9);
7009}
7010
7011// CHECK-LABEL: define <2 x i64> @test_vshll_high_n_u32(<4 x i32> %a) #0 {
7012// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7013// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
7014// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7015// CHECK:   [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
7016// CHECK:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
7017// CHECK:   ret <2 x i64> [[VSHLL_N]]
7018uint64x2_t test_vshll_high_n_u32(uint32x4_t a) {
7019  return vshll_high_n_u32(a, 19);
7020}
7021
7022// CHECK-LABEL: define <8 x i16> @test_vmovl_s8(<8 x i8> %a) #0 {
7023// CHECK:   [[VMOVL_I:%.*]] = sext <8 x i8> %a to <8 x i16>
7024// CHECK:   ret <8 x i16> [[VMOVL_I]]
7025int16x8_t test_vmovl_s8(int8x8_t a) {
7026  return vmovl_s8(a);
7027}
7028
7029// CHECK-LABEL: define <4 x i32> @test_vmovl_s16(<4 x i16> %a) #0 {
7030// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7031// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7032// CHECK:   [[VMOVL_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
7033// CHECK:   ret <4 x i32> [[VMOVL_I]]
7034int32x4_t test_vmovl_s16(int16x4_t a) {
7035  return vmovl_s16(a);
7036}
7037
7038// CHECK-LABEL: define <2 x i64> @test_vmovl_s32(<2 x i32> %a) #0 {
7039// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7040// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7041// CHECK:   [[VMOVL_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
7042// CHECK:   ret <2 x i64> [[VMOVL_I]]
7043int64x2_t test_vmovl_s32(int32x2_t a) {
7044  return vmovl_s32(a);
7045}
7046
7047// CHECK-LABEL: define <8 x i16> @test_vmovl_u8(<8 x i8> %a) #0 {
7048// CHECK:   [[VMOVL_I:%.*]] = zext <8 x i8> %a to <8 x i16>
7049// CHECK:   ret <8 x i16> [[VMOVL_I]]
7050uint16x8_t test_vmovl_u8(uint8x8_t a) {
7051  return vmovl_u8(a);
7052}
7053
7054// CHECK-LABEL: define <4 x i32> @test_vmovl_u16(<4 x i16> %a) #0 {
7055// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7056// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7057// CHECK:   [[VMOVL_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
7058// CHECK:   ret <4 x i32> [[VMOVL_I]]
7059uint32x4_t test_vmovl_u16(uint16x4_t a) {
7060  return vmovl_u16(a);
7061}
7062
7063// CHECK-LABEL: define <2 x i64> @test_vmovl_u32(<2 x i32> %a) #0 {
7064// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7065// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7066// CHECK:   [[VMOVL_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
7067// CHECK:   ret <2 x i64> [[VMOVL_I]]
7068uint64x2_t test_vmovl_u32(uint32x2_t a) {
7069  return vmovl_u32(a);
7070}
7071
7072// CHECK-LABEL: define <8 x i16> @test_vmovl_high_s8(<16 x i8> %a) #0 {
7073// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7074// CHECK:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I]] to <8 x i16>
7075// CHECK:   ret <8 x i16> [[TMP0]]
7076int16x8_t test_vmovl_high_s8(int8x16_t a) {
7077  return vmovl_high_s8(a);
7078}
7079
7080// CHECK-LABEL: define <4 x i32> @test_vmovl_high_s16(<8 x i16> %a) #0 {
7081// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7082// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7083// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7084// CHECK:   [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
7085// CHECK:   ret <4 x i32> [[TMP2]]
7086int32x4_t test_vmovl_high_s16(int16x8_t a) {
7087  return vmovl_high_s16(a);
7088}
7089
7090// CHECK-LABEL: define <2 x i64> @test_vmovl_high_s32(<4 x i32> %a) #0 {
7091// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7092// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7093// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7094// CHECK:   [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
7095// CHECK:   ret <2 x i64> [[TMP2]]
7096int64x2_t test_vmovl_high_s32(int32x4_t a) {
7097  return vmovl_high_s32(a);
7098}
7099
7100// CHECK-LABEL: define <8 x i16> @test_vmovl_high_u8(<16 x i8> %a) #0 {
7101// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7102// CHECK:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I]] to <8 x i16>
7103// CHECK:   ret <8 x i16> [[TMP0]]
7104uint16x8_t test_vmovl_high_u8(uint8x16_t a) {
7105  return vmovl_high_u8(a);
7106}
7107
7108// CHECK-LABEL: define <4 x i32> @test_vmovl_high_u16(<8 x i16> %a) #0 {
7109// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7110// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7111// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7112// CHECK:   [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
7113// CHECK:   ret <4 x i32> [[TMP2]]
7114uint32x4_t test_vmovl_high_u16(uint16x8_t a) {
7115  return vmovl_high_u16(a);
7116}
7117
7118// CHECK-LABEL: define <2 x i64> @test_vmovl_high_u32(<4 x i32> %a) #0 {
7119// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7120// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7121// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7122// CHECK:   [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
7123// CHECK:   ret <2 x i64> [[TMP2]]
7124uint64x2_t test_vmovl_high_u32(uint32x4_t a) {
7125  return vmovl_high_u32(a);
7126}
7127
7128// CHECK-LABEL: define <2 x float> @test_vcvt_n_f32_s32(<2 x i32> %a) #0 {
7129// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7130// CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7131// CHECK:   [[VCVT_N1:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 31)
7132// CHECK:   ret <2 x float> [[VCVT_N1]]
7133float32x2_t test_vcvt_n_f32_s32(int32x2_t a) {
7134  return vcvt_n_f32_s32(a, 31);
7135}
7136
7137// CHECK-LABEL: define <4 x float> @test_vcvtq_n_f32_s32(<4 x i32> %a) #0 {
7138// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7139// CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
7140// CHECK:   [[VCVT_N1:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 31)
7141// CHECK:   ret <4 x float> [[VCVT_N1]]
7142float32x4_t test_vcvtq_n_f32_s32(int32x4_t a) {
7143  return vcvtq_n_f32_s32(a, 31);
7144}
7145
7146// CHECK-LABEL: define <2 x double> @test_vcvtq_n_f64_s64(<2 x i64> %a) #0 {
7147// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7148// CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
7149// CHECK:   [[VCVT_N1:%.*]] = call <2 x double> @llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64> [[VCVT_N]], i32 50)
7150// CHECK:   ret <2 x double> [[VCVT_N1]]
7151float64x2_t test_vcvtq_n_f64_s64(int64x2_t a) {
7152  return vcvtq_n_f64_s64(a, 50);
7153}
7154
7155// CHECK-LABEL: define <2 x float> @test_vcvt_n_f32_u32(<2 x i32> %a) #0 {
7156// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7157// CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7158// CHECK:   [[VCVT_N1:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 31)
7159// CHECK:   ret <2 x float> [[VCVT_N1]]
7160float32x2_t test_vcvt_n_f32_u32(uint32x2_t a) {
7161  return vcvt_n_f32_u32(a, 31);
7162}
7163
7164// CHECK-LABEL: define <4 x float> @test_vcvtq_n_f32_u32(<4 x i32> %a) #0 {
7165// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7166// CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
7167// CHECK:   [[VCVT_N1:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 31)
7168// CHECK:   ret <4 x float> [[VCVT_N1]]
7169float32x4_t test_vcvtq_n_f32_u32(uint32x4_t a) {
7170  return vcvtq_n_f32_u32(a, 31);
7171}
7172
7173// CHECK-LABEL: define <2 x double> @test_vcvtq_n_f64_u64(<2 x i64> %a) #0 {
7174// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7175// CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
7176// CHECK:   [[VCVT_N1:%.*]] = call <2 x double> @llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64> [[VCVT_N]], i32 50)
7177// CHECK:   ret <2 x double> [[VCVT_N1]]
7178float64x2_t test_vcvtq_n_f64_u64(uint64x2_t a) {
7179  return vcvtq_n_f64_u64(a, 50);
7180}
7181
7182// CHECK-LABEL: define <2 x i32> @test_vcvt_n_s32_f32(<2 x float> %a) #0 {
7183// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
7184// CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
7185// CHECK:   [[VCVT_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 31)
7186// CHECK:   ret <2 x i32> [[VCVT_N1]]
7187int32x2_t test_vcvt_n_s32_f32(float32x2_t a) {
7188  return vcvt_n_s32_f32(a, 31);
7189}
7190
7191// CHECK-LABEL: define <4 x i32> @test_vcvtq_n_s32_f32(<4 x float> %a) #0 {
7192// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
7193// CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
7194// CHECK:   [[VCVT_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 31)
7195// CHECK:   ret <4 x i32> [[VCVT_N1]]
7196int32x4_t test_vcvtq_n_s32_f32(float32x4_t a) {
7197  return vcvtq_n_s32_f32(a, 31);
7198}
7199
7200// CHECK-LABEL: define <2 x i64> @test_vcvtq_n_s64_f64(<2 x double> %a) #0 {
7201// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
7202// CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
7203// CHECK:   [[VCVT_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double> [[VCVT_N]], i32 50)
7204// CHECK:   ret <2 x i64> [[VCVT_N1]]
7205int64x2_t test_vcvtq_n_s64_f64(float64x2_t a) {
7206  return vcvtq_n_s64_f64(a, 50);
7207}
7208
7209// CHECK-LABEL: define <2 x i32> @test_vcvt_n_u32_f32(<2 x float> %a) #0 {
7210// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
7211// CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
7212// CHECK:   [[VCVT_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 31)
7213// CHECK:   ret <2 x i32> [[VCVT_N1]]
7214uint32x2_t test_vcvt_n_u32_f32(float32x2_t a) {
7215  return vcvt_n_u32_f32(a, 31);
7216}
7217
7218// CHECK-LABEL: define <4 x i32> @test_vcvtq_n_u32_f32(<4 x float> %a) #0 {
7219// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
7220// CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
7221// CHECK:   [[VCVT_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 31)
7222// CHECK:   ret <4 x i32> [[VCVT_N1]]
7223uint32x4_t test_vcvtq_n_u32_f32(float32x4_t a) {
7224  return vcvtq_n_u32_f32(a, 31);
7225}
7226
7227// CHECK-LABEL: define <2 x i64> @test_vcvtq_n_u64_f64(<2 x double> %a) #0 {
7228// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
7229// CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
7230// CHECK:   [[VCVT_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double> [[VCVT_N]], i32 50)
7231// CHECK:   ret <2 x i64> [[VCVT_N1]]
7232uint64x2_t test_vcvtq_n_u64_f64(float64x2_t a) {
7233  return vcvtq_n_u64_f64(a, 50);
7234}
7235
7236// CHECK-LABEL: define <8 x i16> @test_vaddl_s8(<8 x i8> %a, <8 x i8> %b) #0 {
7237// CHECK:   [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16>
7238// CHECK:   [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16>
7239// CHECK:   [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
7240// CHECK:   ret <8 x i16> [[ADD_I]]
7241int16x8_t test_vaddl_s8(int8x8_t a, int8x8_t b) {
7242  return vaddl_s8(a, b);
7243}
7244
7245// CHECK-LABEL: define <4 x i32> @test_vaddl_s16(<4 x i16> %a, <4 x i16> %b) #0 {
7246// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7247// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7248// CHECK:   [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
7249// CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7250// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
7251// CHECK:   [[VMOVL_I4_I:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32>
7252// CHECK:   [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
7253// CHECK:   ret <4 x i32> [[ADD_I]]
7254int32x4_t test_vaddl_s16(int16x4_t a, int16x4_t b) {
7255  return vaddl_s16(a, b);
7256}
7257
7258// CHECK-LABEL: define <2 x i64> @test_vaddl_s32(<2 x i32> %a, <2 x i32> %b) #0 {
7259// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7260// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7261// CHECK:   [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
7262// CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7263// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
7264// CHECK:   [[VMOVL_I4_I:%.*]] = sext <2 x i32> [[TMP3]] to <2 x i64>
7265// CHECK:   [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
7266// CHECK:   ret <2 x i64> [[ADD_I]]
7267int64x2_t test_vaddl_s32(int32x2_t a, int32x2_t b) {
7268  return vaddl_s32(a, b);
7269}
7270
7271// CHECK-LABEL: define <8 x i16> @test_vaddl_u8(<8 x i8> %a, <8 x i8> %b) #0 {
7272// CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16>
7273// CHECK:   [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16>
7274// CHECK:   [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
7275// CHECK:   ret <8 x i16> [[ADD_I]]
7276uint16x8_t test_vaddl_u8(uint8x8_t a, uint8x8_t b) {
7277  return vaddl_u8(a, b);
7278}
7279
7280// CHECK-LABEL: define <4 x i32> @test_vaddl_u16(<4 x i16> %a, <4 x i16> %b) #0 {
7281// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7282// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7283// CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
7284// CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7285// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
7286// CHECK:   [[VMOVL_I4_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
7287// CHECK:   [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
7288// CHECK:   ret <4 x i32> [[ADD_I]]
7289uint32x4_t test_vaddl_u16(uint16x4_t a, uint16x4_t b) {
7290  return vaddl_u16(a, b);
7291}
7292
7293// CHECK-LABEL: define <2 x i64> @test_vaddl_u32(<2 x i32> %a, <2 x i32> %b) #0 {
7294// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7295// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7296// CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
7297// CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7298// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
7299// CHECK:   [[VMOVL_I4_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
7300// CHECK:   [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
7301// CHECK:   ret <2 x i64> [[ADD_I]]
7302uint64x2_t test_vaddl_u32(uint32x2_t a, uint32x2_t b) {
7303  return vaddl_u32(a, b);
7304}
7305
7306// CHECK-LABEL: define <8 x i16> @test_vaddl_high_s8(<16 x i8> %a, <16 x i8> %b) #0 {
7307// CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7308// CHECK:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
7309// CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7310// CHECK:   [[TMP1:%.*]] = sext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
7311// CHECK:   [[ADD_I:%.*]] = add <8 x i16> [[TMP0]], [[TMP1]]
7312// CHECK:   ret <8 x i16> [[ADD_I]]
7313int16x8_t test_vaddl_high_s8(int8x16_t a, int8x16_t b) {
7314  return vaddl_high_s8(a, b);
7315}
7316
7317// CHECK-LABEL: define <4 x i32> @test_vaddl_high_s16(<8 x i16> %a, <8 x i16> %b) #0 {
7318// CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7319// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
7320// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7321// CHECK:   [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
7322// CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7323// CHECK:   [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
7324// CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
7325// CHECK:   [[TMP5:%.*]] = sext <4 x i16> [[TMP4]] to <4 x i32>
7326// CHECK:   [[ADD_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP5]]
7327// CHECK:   ret <4 x i32> [[ADD_I]]
7328int32x4_t test_vaddl_high_s16(int16x8_t a, int16x8_t b) {
7329  return vaddl_high_s16(a, b);
7330}
7331
7332// CHECK-LABEL: define <2 x i64> @test_vaddl_high_s32(<4 x i32> %a, <4 x i32> %b) #0 {
7333// CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7334// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
7335// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7336// CHECK:   [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
7337// CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7338// CHECK:   [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
7339// CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
7340// CHECK:   [[TMP5:%.*]] = sext <2 x i32> [[TMP4]] to <2 x i64>
7341// CHECK:   [[ADD_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP5]]
7342// CHECK:   ret <2 x i64> [[ADD_I]]
7343int64x2_t test_vaddl_high_s32(int32x4_t a, int32x4_t b) {
7344  return vaddl_high_s32(a, b);
7345}
7346
7347// CHECK-LABEL: define <8 x i16> @test_vaddl_high_u8(<16 x i8> %a, <16 x i8> %b) #0 {
7348// CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7349// CHECK:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
7350// CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7351// CHECK:   [[TMP1:%.*]] = zext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
7352// CHECK:   [[ADD_I:%.*]] = add <8 x i16> [[TMP0]], [[TMP1]]
7353// CHECK:   ret <8 x i16> [[ADD_I]]
7354uint16x8_t test_vaddl_high_u8(uint8x16_t a, uint8x16_t b) {
7355  return vaddl_high_u8(a, b);
7356}
7357
7358// CHECK-LABEL: define <4 x i32> @test_vaddl_high_u16(<8 x i16> %a, <8 x i16> %b) #0 {
7359// CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7360// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
7361// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7362// CHECK:   [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
7363// CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7364// CHECK:   [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
7365// CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
7366// CHECK:   [[TMP5:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32>
7367// CHECK:   [[ADD_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP5]]
7368// CHECK:   ret <4 x i32> [[ADD_I]]
7369uint32x4_t test_vaddl_high_u16(uint16x8_t a, uint16x8_t b) {
7370  return vaddl_high_u16(a, b);
7371}
7372
7373// CHECK-LABEL: define <2 x i64> @test_vaddl_high_u32(<4 x i32> %a, <4 x i32> %b) #0 {
7374// CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7375// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
7376// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7377// CHECK:   [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
7378// CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7379// CHECK:   [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
7380// CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
7381// CHECK:   [[TMP5:%.*]] = zext <2 x i32> [[TMP4]] to <2 x i64>
7382// CHECK:   [[ADD_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP5]]
7383// CHECK:   ret <2 x i64> [[ADD_I]]
7384uint64x2_t test_vaddl_high_u32(uint32x4_t a, uint32x4_t b) {
7385  return vaddl_high_u32(a, b);
7386}
7387
7388// CHECK-LABEL: define <8 x i16> @test_vaddw_s8(<8 x i16> %a, <8 x i8> %b) #0 {
7389// CHECK:   [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16>
7390// CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]]
7391// CHECK:   ret <8 x i16> [[ADD_I]]
7392int16x8_t test_vaddw_s8(int16x8_t a, int8x8_t b) {
7393  return vaddw_s8(a, b);
7394}
7395
7396// CHECK-LABEL: define <4 x i32> @test_vaddw_s16(<4 x i32> %a, <4 x i16> %b) #0 {
7397// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7398// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7399// CHECK:   [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
7400// CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]]
7401// CHECK:   ret <4 x i32> [[ADD_I]]
7402int32x4_t test_vaddw_s16(int32x4_t a, int16x4_t b) {
7403  return vaddw_s16(a, b);
7404}
7405
7406// CHECK-LABEL: define <2 x i64> @test_vaddw_s32(<2 x i64> %a, <2 x i32> %b) #0 {
7407// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7408// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7409// CHECK:   [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
7410// CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]]
7411// CHECK:   ret <2 x i64> [[ADD_I]]
7412int64x2_t test_vaddw_s32(int64x2_t a, int32x2_t b) {
7413  return vaddw_s32(a, b);
7414}
7415
7416// CHECK-LABEL: define <8 x i16> @test_vaddw_u8(<8 x i16> %a, <8 x i8> %b) #0 {
7417// CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16>
7418// CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]]
7419// CHECK:   ret <8 x i16> [[ADD_I]]
7420uint16x8_t test_vaddw_u8(uint16x8_t a, uint8x8_t b) {
7421  return vaddw_u8(a, b);
7422}
7423
7424// CHECK-LABEL: define <4 x i32> @test_vaddw_u16(<4 x i32> %a, <4 x i16> %b) #0 {
7425// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7426// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7427// CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
7428// CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]]
7429// CHECK:   ret <4 x i32> [[ADD_I]]
7430uint32x4_t test_vaddw_u16(uint32x4_t a, uint16x4_t b) {
7431  return vaddw_u16(a, b);
7432}
7433
7434// CHECK-LABEL: define <2 x i64> @test_vaddw_u32(<2 x i64> %a, <2 x i32> %b) #0 {
7435// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7436// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7437// CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
7438// CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]]
7439// CHECK:   ret <2 x i64> [[ADD_I]]
7440uint64x2_t test_vaddw_u32(uint64x2_t a, uint32x2_t b) {
7441  return vaddw_u32(a, b);
7442}
7443
7444// CHECK-LABEL: define <8 x i16> @test_vaddw_high_s8(<8 x i16> %a, <16 x i8> %b) #0 {
7445// CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7446// CHECK:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
7447// CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[TMP0]]
7448// CHECK:   ret <8 x i16> [[ADD_I]]
7449int16x8_t test_vaddw_high_s8(int16x8_t a, int8x16_t b) {
7450  return vaddw_high_s8(a, b);
7451}
7452
7453// CHECK-LABEL: define <4 x i32> @test_vaddw_high_s16(<4 x i32> %a, <8 x i16> %b) #0 {
7454// CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7455// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
7456// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7457// CHECK:   [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
7458// CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP2]]
7459// CHECK:   ret <4 x i32> [[ADD_I]]
7460int32x4_t test_vaddw_high_s16(int32x4_t a, int16x8_t b) {
7461  return vaddw_high_s16(a, b);
7462}
7463
7464// CHECK-LABEL: define <2 x i64> @test_vaddw_high_s32(<2 x i64> %a, <4 x i32> %b) #0 {
7465// CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7466// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
7467// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7468// CHECK:   [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
7469// CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[TMP2]]
7470// CHECK:   ret <2 x i64> [[ADD_I]]
7471int64x2_t test_vaddw_high_s32(int64x2_t a, int32x4_t b) {
7472  return vaddw_high_s32(a, b);
7473}
7474
7475// CHECK-LABEL: define <8 x i16> @test_vaddw_high_u8(<8 x i16> %a, <16 x i8> %b) #0 {
7476// CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7477// CHECK:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
7478// CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[TMP0]]
7479// CHECK:   ret <8 x i16> [[ADD_I]]
7480uint16x8_t test_vaddw_high_u8(uint16x8_t a, uint8x16_t b) {
7481  return vaddw_high_u8(a, b);
7482}
7483
7484// CHECK-LABEL: define <4 x i32> @test_vaddw_high_u16(<4 x i32> %a, <8 x i16> %b) #0 {
7485// CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7486// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
7487// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7488// CHECK:   [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
7489// CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP2]]
7490// CHECK:   ret <4 x i32> [[ADD_I]]
7491uint32x4_t test_vaddw_high_u16(uint32x4_t a, uint16x8_t b) {
7492  return vaddw_high_u16(a, b);
7493}
7494
7495// CHECK-LABEL: define <2 x i64> @test_vaddw_high_u32(<2 x i64> %a, <4 x i32> %b) #0 {
7496// CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7497// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
7498// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7499// CHECK:   [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
7500// CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[TMP2]]
7501// CHECK:   ret <2 x i64> [[ADD_I]]
7502uint64x2_t test_vaddw_high_u32(uint64x2_t a, uint32x4_t b) {
7503  return vaddw_high_u32(a, b);
7504}
7505
7506// CHECK-LABEL: define <8 x i16> @test_vsubl_s8(<8 x i8> %a, <8 x i8> %b) #0 {
7507// CHECK:   [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16>
7508// CHECK:   [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16>
7509// CHECK:   [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
7510// CHECK:   ret <8 x i16> [[SUB_I]]
7511int16x8_t test_vsubl_s8(int8x8_t a, int8x8_t b) {
7512  return vsubl_s8(a, b);
7513}
7514
7515// CHECK-LABEL: define <4 x i32> @test_vsubl_s16(<4 x i16> %a, <4 x i16> %b) #0 {
7516// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7517// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7518// CHECK:   [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
7519// CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7520// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
7521// CHECK:   [[VMOVL_I4_I:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32>
7522// CHECK:   [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
7523// CHECK:   ret <4 x i32> [[SUB_I]]
7524int32x4_t test_vsubl_s16(int16x4_t a, int16x4_t b) {
7525  return vsubl_s16(a, b);
7526}
7527
7528// CHECK-LABEL: define <2 x i64> @test_vsubl_s32(<2 x i32> %a, <2 x i32> %b) #0 {
7529// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7530// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7531// CHECK:   [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
7532// CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7533// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
7534// CHECK:   [[VMOVL_I4_I:%.*]] = sext <2 x i32> [[TMP3]] to <2 x i64>
7535// CHECK:   [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
7536// CHECK:   ret <2 x i64> [[SUB_I]]
7537int64x2_t test_vsubl_s32(int32x2_t a, int32x2_t b) {
7538  return vsubl_s32(a, b);
7539}
7540
7541// CHECK-LABEL: define <8 x i16> @test_vsubl_u8(<8 x i8> %a, <8 x i8> %b) #0 {
7542// CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16>
7543// CHECK:   [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16>
7544// CHECK:   [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
7545// CHECK:   ret <8 x i16> [[SUB_I]]
7546uint16x8_t test_vsubl_u8(uint8x8_t a, uint8x8_t b) {
7547  return vsubl_u8(a, b);
7548}
7549
7550// CHECK-LABEL: define <4 x i32> @test_vsubl_u16(<4 x i16> %a, <4 x i16> %b) #0 {
7551// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7552// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7553// CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
7554// CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7555// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
7556// CHECK:   [[VMOVL_I4_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
7557// CHECK:   [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
7558// CHECK:   ret <4 x i32> [[SUB_I]]
7559uint32x4_t test_vsubl_u16(uint16x4_t a, uint16x4_t b) {
7560  return vsubl_u16(a, b);
7561}
7562
7563// CHECK-LABEL: define <2 x i64> @test_vsubl_u32(<2 x i32> %a, <2 x i32> %b) #0 {
7564// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7565// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7566// CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
7567// CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7568// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
7569// CHECK:   [[VMOVL_I4_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
7570// CHECK:   [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
7571// CHECK:   ret <2 x i64> [[SUB_I]]
7572uint64x2_t test_vsubl_u32(uint32x2_t a, uint32x2_t b) {
7573  return vsubl_u32(a, b);
7574}
7575
7576// CHECK-LABEL: define <8 x i16> @test_vsubl_high_s8(<16 x i8> %a, <16 x i8> %b) #0 {
7577// CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7578// CHECK:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
7579// CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7580// CHECK:   [[TMP1:%.*]] = sext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
7581// CHECK:   [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]]
7582// CHECK:   ret <8 x i16> [[SUB_I]]
7583int16x8_t test_vsubl_high_s8(int8x16_t a, int8x16_t b) {
7584  return vsubl_high_s8(a, b);
7585}
7586
7587// CHECK-LABEL: define <4 x i32> @test_vsubl_high_s16(<8 x i16> %a, <8 x i16> %b) #0 {
7588// CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7589// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
7590// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7591// CHECK:   [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
7592// CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7593// CHECK:   [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
7594// CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
7595// CHECK:   [[TMP5:%.*]] = sext <4 x i16> [[TMP4]] to <4 x i32>
7596// CHECK:   [[SUB_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP5]]
7597// CHECK:   ret <4 x i32> [[SUB_I]]
7598int32x4_t test_vsubl_high_s16(int16x8_t a, int16x8_t b) {
7599  return vsubl_high_s16(a, b);
7600}
7601
7602// CHECK-LABEL: define <2 x i64> @test_vsubl_high_s32(<4 x i32> %a, <4 x i32> %b) #0 {
7603// CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7604// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
7605// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7606// CHECK:   [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
7607// CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7608// CHECK:   [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
7609// CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
7610// CHECK:   [[TMP5:%.*]] = sext <2 x i32> [[TMP4]] to <2 x i64>
7611// CHECK:   [[SUB_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP5]]
7612// CHECK:   ret <2 x i64> [[SUB_I]]
7613int64x2_t test_vsubl_high_s32(int32x4_t a, int32x4_t b) {
7614  return vsubl_high_s32(a, b);
7615}
7616
7617// CHECK-LABEL: define <8 x i16> @test_vsubl_high_u8(<16 x i8> %a, <16 x i8> %b) #0 {
7618// CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7619// CHECK:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
7620// CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7621// CHECK:   [[TMP1:%.*]] = zext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
7622// CHECK:   [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]]
7623// CHECK:   ret <8 x i16> [[SUB_I]]
7624uint16x8_t test_vsubl_high_u8(uint8x16_t a, uint8x16_t b) {
7625  return vsubl_high_u8(a, b);
7626}
7627
7628// CHECK-LABEL: define <4 x i32> @test_vsubl_high_u16(<8 x i16> %a, <8 x i16> %b) #0 {
7629// CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7630// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
7631// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7632// CHECK:   [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
7633// CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7634// CHECK:   [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
7635// CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
7636// CHECK:   [[TMP5:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32>
7637// CHECK:   [[SUB_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP5]]
7638// CHECK:   ret <4 x i32> [[SUB_I]]
7639uint32x4_t test_vsubl_high_u16(uint16x8_t a, uint16x8_t b) {
7640  return vsubl_high_u16(a, b);
7641}
7642
7643// CHECK-LABEL: define <2 x i64> @test_vsubl_high_u32(<4 x i32> %a, <4 x i32> %b) #0 {
7644// CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7645// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
7646// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7647// CHECK:   [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
7648// CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7649// CHECK:   [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
7650// CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
7651// CHECK:   [[TMP5:%.*]] = zext <2 x i32> [[TMP4]] to <2 x i64>
7652// CHECK:   [[SUB_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP5]]
7653// CHECK:   ret <2 x i64> [[SUB_I]]
7654uint64x2_t test_vsubl_high_u32(uint32x4_t a, uint32x4_t b) {
7655  return vsubl_high_u32(a, b);
7656}
7657
7658// CHECK-LABEL: define <8 x i16> @test_vsubw_s8(<8 x i16> %a, <8 x i8> %b) #0 {
7659// CHECK:   [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16>
7660// CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]]
7661// CHECK:   ret <8 x i16> [[SUB_I]]
7662int16x8_t test_vsubw_s8(int16x8_t a, int8x8_t b) {
7663  return vsubw_s8(a, b);
7664}
7665
7666// CHECK-LABEL: define <4 x i32> @test_vsubw_s16(<4 x i32> %a, <4 x i16> %b) #0 {
7667// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7668// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7669// CHECK:   [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
7670// CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]]
7671// CHECK:   ret <4 x i32> [[SUB_I]]
7672int32x4_t test_vsubw_s16(int32x4_t a, int16x4_t b) {
7673  return vsubw_s16(a, b);
7674}
7675
7676// CHECK-LABEL: define <2 x i64> @test_vsubw_s32(<2 x i64> %a, <2 x i32> %b) #0 {
7677// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7678// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7679// CHECK:   [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
7680// CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]]
7681// CHECK:   ret <2 x i64> [[SUB_I]]
7682int64x2_t test_vsubw_s32(int64x2_t a, int32x2_t b) {
7683  return vsubw_s32(a, b);
7684}
7685
7686// CHECK-LABEL: define <8 x i16> @test_vsubw_u8(<8 x i16> %a, <8 x i8> %b) #0 {
7687// CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16>
7688// CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]]
7689// CHECK:   ret <8 x i16> [[SUB_I]]
7690uint16x8_t test_vsubw_u8(uint16x8_t a, uint8x8_t b) {
7691  return vsubw_u8(a, b);
7692}
7693
7694// CHECK-LABEL: define <4 x i32> @test_vsubw_u16(<4 x i32> %a, <4 x i16> %b) #0 {
7695// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7696// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7697// CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
7698// CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]]
7699// CHECK:   ret <4 x i32> [[SUB_I]]
7700uint32x4_t test_vsubw_u16(uint32x4_t a, uint16x4_t b) {
7701  return vsubw_u16(a, b);
7702}
7703
7704// CHECK-LABEL: define <2 x i64> @test_vsubw_u32(<2 x i64> %a, <2 x i32> %b) #0 {
7705// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7706// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7707// CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
7708// CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]]
7709// CHECK:   ret <2 x i64> [[SUB_I]]
7710uint64x2_t test_vsubw_u32(uint64x2_t a, uint32x2_t b) {
7711  return vsubw_u32(a, b);
7712}
7713
7714// CHECK-LABEL: define <8 x i16> @test_vsubw_high_s8(<8 x i16> %a, <16 x i8> %b) #0 {
7715// CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7716// CHECK:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
7717// CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[TMP0]]
7718// CHECK:   ret <8 x i16> [[SUB_I]]
7719int16x8_t test_vsubw_high_s8(int16x8_t a, int8x16_t b) {
7720  return vsubw_high_s8(a, b);
7721}
7722
7723// CHECK-LABEL: define <4 x i32> @test_vsubw_high_s16(<4 x i32> %a, <8 x i16> %b) #0 {
7724// CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7725// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
7726// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7727// CHECK:   [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
7728// CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[TMP2]]
7729// CHECK:   ret <4 x i32> [[SUB_I]]
7730int32x4_t test_vsubw_high_s16(int32x4_t a, int16x8_t b) {
7731  return vsubw_high_s16(a, b);
7732}
7733
7734// CHECK-LABEL: define <2 x i64> @test_vsubw_high_s32(<2 x i64> %a, <4 x i32> %b) #0 {
7735// CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7736// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
7737// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7738// CHECK:   [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
7739// CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[TMP2]]
7740// CHECK:   ret <2 x i64> [[SUB_I]]
7741int64x2_t test_vsubw_high_s32(int64x2_t a, int32x4_t b) {
7742  return vsubw_high_s32(a, b);
7743}
7744
7745// CHECK-LABEL: define <8 x i16> @test_vsubw_high_u8(<8 x i16> %a, <16 x i8> %b) #0 {
7746// CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7747// CHECK:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
7748// CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[TMP0]]
7749// CHECK:   ret <8 x i16> [[SUB_I]]
7750uint16x8_t test_vsubw_high_u8(uint16x8_t a, uint8x16_t b) {
7751  return vsubw_high_u8(a, b);
7752}
7753
7754// CHECK-LABEL: define <4 x i32> @test_vsubw_high_u16(<4 x i32> %a, <8 x i16> %b) #0 {
7755// CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7756// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
7757// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7758// CHECK:   [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
7759// CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[TMP2]]
7760// CHECK:   ret <4 x i32> [[SUB_I]]
7761uint32x4_t test_vsubw_high_u16(uint32x4_t a, uint16x8_t b) {
7762  return vsubw_high_u16(a, b);
7763}
7764
7765// CHECK-LABEL: define <2 x i64> @test_vsubw_high_u32(<2 x i64> %a, <4 x i32> %b) #0 {
7766// CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7767// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
7768// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7769// CHECK:   [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
7770// CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[TMP2]]
7771// CHECK:   ret <2 x i64> [[SUB_I]]
7772uint64x2_t test_vsubw_high_u32(uint64x2_t a, uint32x4_t b) {
7773  return vsubw_high_u32(a, b);
7774}
7775
7776// CHECK-LABEL: define <8 x i8> @test_vaddhn_s16(<8 x i16> %a, <8 x i16> %b) #0 {
7777// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7778// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7779// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
7780// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
7781// CHECK:   [[VADDHN_I:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]]
7782// CHECK:   [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7783// CHECK:   [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
7784// CHECK:   ret <8 x i8> [[VADDHN2_I]]
7785int8x8_t test_vaddhn_s16(int16x8_t a, int16x8_t b) {
7786  return vaddhn_s16(a, b);
7787}
7788
7789// CHECK-LABEL: define <4 x i16> @test_vaddhn_s32(<4 x i32> %a, <4 x i32> %b) #0 {
7790// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7791// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7792// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
7793// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
7794// CHECK:   [[VADDHN_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]]
7795// CHECK:   [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16>
7796// CHECK:   [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
7797// CHECK:   ret <4 x i16> [[VADDHN2_I]]
7798int16x4_t test_vaddhn_s32(int32x4_t a, int32x4_t b) {
7799  return vaddhn_s32(a, b);
7800}
7801
7802// CHECK-LABEL: define <2 x i32> @test_vaddhn_s64(<2 x i64> %a, <2 x i64> %b) #0 {
7803// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7804// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7805// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
7806// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
7807// CHECK:   [[VADDHN_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
7808// CHECK:   [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32>
7809// CHECK:   [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
7810// CHECK:   ret <2 x i32> [[VADDHN2_I]]
7811int32x2_t test_vaddhn_s64(int64x2_t a, int64x2_t b) {
7812  return vaddhn_s64(a, b);
7813}
7814
7815// CHECK-LABEL: define <8 x i8> @test_vaddhn_u16(<8 x i16> %a, <8 x i16> %b) #0 {
7816// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7817// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7818// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
7819// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
7820// CHECK:   [[VADDHN_I:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]]
7821// CHECK:   [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7822// CHECK:   [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
7823// CHECK:   ret <8 x i8> [[VADDHN2_I]]
7824uint8x8_t test_vaddhn_u16(uint16x8_t a, uint16x8_t b) {
7825  return vaddhn_u16(a, b);
7826}
7827
7828// CHECK-LABEL: define <4 x i16> @test_vaddhn_u32(<4 x i32> %a, <4 x i32> %b) #0 {
7829// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7830// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7831// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
7832// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
7833// CHECK:   [[VADDHN_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]]
7834// CHECK:   [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16>
7835// CHECK:   [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
7836// CHECK:   ret <4 x i16> [[VADDHN2_I]]
7837uint16x4_t test_vaddhn_u32(uint32x4_t a, uint32x4_t b) {
7838  return vaddhn_u32(a, b);
7839}
7840
7841// CHECK-LABEL: define <2 x i32> @test_vaddhn_u64(<2 x i64> %a, <2 x i64> %b) #0 {
7842// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7843// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7844// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
7845// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
7846// CHECK:   [[VADDHN_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
7847// CHECK:   [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32>
7848// CHECK:   [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
7849// CHECK:   ret <2 x i32> [[VADDHN2_I]]
7850uint32x2_t test_vaddhn_u64(uint64x2_t a, uint64x2_t b) {
7851  return vaddhn_u64(a, b);
7852}
7853
7854// CHECK-LABEL: define <16 x i8> @test_vaddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 {
7855// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7856// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7857// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
7858// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
7859// CHECK:   [[VADDHN_I_I:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]]
7860// CHECK:   [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7861// CHECK:   [[VADDHN2_I_I:%.*]] = trunc <8 x i16> [[VADDHN1_I_I]] to <8 x i8>
7862// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VADDHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7863// CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
7864int8x16_t test_vaddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
7865  return vaddhn_high_s16(r, a, b);
7866}
7867
7868// CHECK-LABEL: define <8 x i16> @test_vaddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 {
7869// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7870// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7871// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
7872// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
7873// CHECK:   [[VADDHN_I_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]]
7874// CHECK:   [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
7875// CHECK:   [[VADDHN2_I_I:%.*]] = trunc <4 x i32> [[VADDHN1_I_I]] to <4 x i16>
7876// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VADDHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7877// CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
7878int16x8_t test_vaddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
7879  return vaddhn_high_s32(r, a, b);
7880}
7881
7882// CHECK-LABEL: define <4 x i32> @test_vaddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 {
7883// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7884// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7885// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
7886// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
7887// CHECK:   [[VADDHN_I_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
7888// CHECK:   [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], <i64 32, i64 32>
7889// CHECK:   [[VADDHN2_I_I:%.*]] = trunc <2 x i64> [[VADDHN1_I_I]] to <2 x i32>
7890// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VADDHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7891// CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
7892int32x4_t test_vaddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
7893  return vaddhn_high_s64(r, a, b);
7894}
7895
7896// CHECK-LABEL: define <16 x i8> @test_vaddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 {
7897// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7898// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7899// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
7900// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
7901// CHECK:   [[VADDHN_I_I:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]]
7902// CHECK:   [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7903// CHECK:   [[VADDHN2_I_I:%.*]] = trunc <8 x i16> [[VADDHN1_I_I]] to <8 x i8>
7904// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VADDHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7905// CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
7906uint8x16_t test_vaddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
7907  return vaddhn_high_u16(r, a, b);
7908}
7909
7910// CHECK-LABEL: define <8 x i16> @test_vaddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 {
7911// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7912// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7913// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
7914// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
7915// CHECK:   [[VADDHN_I_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]]
7916// CHECK:   [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
7917// CHECK:   [[VADDHN2_I_I:%.*]] = trunc <4 x i32> [[VADDHN1_I_I]] to <4 x i16>
7918// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VADDHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7919// CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
7920uint16x8_t test_vaddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
7921  return vaddhn_high_u32(r, a, b);
7922}
7923
7924// CHECK-LABEL: define <4 x i32> @test_vaddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 {
7925// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7926// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7927// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
7928// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
7929// CHECK:   [[VADDHN_I_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
7930// CHECK:   [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], <i64 32, i64 32>
7931// CHECK:   [[VADDHN2_I_I:%.*]] = trunc <2 x i64> [[VADDHN1_I_I]] to <2 x i32>
7932// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VADDHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7933// CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
7934uint32x4_t test_vaddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
7935  return vaddhn_high_u64(r, a, b);
7936}
7937
7938// CHECK-LABEL: define <8 x i8> @test_vraddhn_s16(<8 x i16> %a, <8 x i16> %b) #0 {
7939// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7940// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7941// CHECK:   [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
7942// CHECK:   [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
7943// CHECK:   [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> [[VRADDHN_V_I]], <8 x i16> [[VRADDHN_V1_I]]) #4
7944// CHECK:   ret <8 x i8> [[VRADDHN_V2_I]]
7945int8x8_t test_vraddhn_s16(int16x8_t a, int16x8_t b) {
7946  return vraddhn_s16(a, b);
7947}
7948
7949// CHECK-LABEL: define <4 x i16> @test_vraddhn_s32(<4 x i32> %a, <4 x i32> %b) #0 {
7950// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7951// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7952// CHECK:   [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
7953// CHECK:   [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
7954// CHECK:   [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> [[VRADDHN_V_I]], <4 x i32> [[VRADDHN_V1_I]]) #4
7955// CHECK:   [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
7956// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <4 x i16>
7957// CHECK:   ret <4 x i16> [[TMP2]]
7958int16x4_t test_vraddhn_s32(int32x4_t a, int32x4_t b) {
7959  return vraddhn_s32(a, b);
7960}
7961
7962// CHECK-LABEL: define <2 x i32> @test_vraddhn_s64(<2 x i64> %a, <2 x i64> %b) #0 {
7963// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7964// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7965// CHECK:   [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
7966// CHECK:   [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
7967// CHECK:   [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> [[VRADDHN_V_I]], <2 x i64> [[VRADDHN_V1_I]]) #4
7968// CHECK:   [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
7969// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <2 x i32>
7970// CHECK:   ret <2 x i32> [[TMP2]]
7971int32x2_t test_vraddhn_s64(int64x2_t a, int64x2_t b) {
7972  return vraddhn_s64(a, b);
7973}
7974
7975// CHECK-LABEL: define <8 x i8> @test_vraddhn_u16(<8 x i16> %a, <8 x i16> %b) #0 {
7976// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7977// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7978// CHECK:   [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
7979// CHECK:   [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
7980// CHECK:   [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> [[VRADDHN_V_I]], <8 x i16> [[VRADDHN_V1_I]]) #4
7981// CHECK:   ret <8 x i8> [[VRADDHN_V2_I]]
7982uint8x8_t test_vraddhn_u16(uint16x8_t a, uint16x8_t b) {
7983  return vraddhn_u16(a, b);
7984}
7985
7986// CHECK-LABEL: define <4 x i16> @test_vraddhn_u32(<4 x i32> %a, <4 x i32> %b) #0 {
7987// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7988// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7989// CHECK:   [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
7990// CHECK:   [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
7991// CHECK:   [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> [[VRADDHN_V_I]], <4 x i32> [[VRADDHN_V1_I]]) #4
7992// CHECK:   [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
7993// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <4 x i16>
7994// CHECK:   ret <4 x i16> [[TMP2]]
7995uint16x4_t test_vraddhn_u32(uint32x4_t a, uint32x4_t b) {
7996  return vraddhn_u32(a, b);
7997}
7998
7999// CHECK-LABEL: define <2 x i32> @test_vraddhn_u64(<2 x i64> %a, <2 x i64> %b) #0 {
8000// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8001// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
8002// CHECK:   [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
8003// CHECK:   [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
8004// CHECK:   [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> [[VRADDHN_V_I]], <2 x i64> [[VRADDHN_V1_I]]) #4
8005// CHECK:   [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
8006// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <2 x i32>
8007// CHECK:   ret <2 x i32> [[TMP2]]
8008uint32x2_t test_vraddhn_u64(uint64x2_t a, uint64x2_t b) {
8009  return vraddhn_u64(a, b);
8010}
8011
8012// CHECK-LABEL: define <16 x i8> @test_vraddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 {
8013// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
8014// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
8015// CHECK:   [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
8016// CHECK:   [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
8017// CHECK:   [[VRADDHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> [[VRADDHN_V_I_I]], <8 x i16> [[VRADDHN_V1_I_I]]) #4
8018// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRADDHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8019// CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
8020int8x16_t test_vraddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
8021  return vraddhn_high_s16(r, a, b);
8022}
8023
8024// CHECK-LABEL: define <8 x i16> @test_vraddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 {
8025// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8026// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
8027// CHECK:   [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
8028// CHECK:   [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
8029// CHECK:   [[VRADDHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> [[VRADDHN_V_I_I]], <4 x i32> [[VRADDHN_V1_I_I]]) #4
8030// CHECK:   [[VRADDHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I_I]] to <8 x i8>
8031// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I_I]] to <4 x i16>
8032// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8033// CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
8034int16x8_t test_vraddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
8035  return vraddhn_high_s32(r, a, b);
8036}
8037
8038// CHECK-LABEL: define <4 x i32> @test_vraddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 {
8039// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8040// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
8041// CHECK:   [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
8042// CHECK:   [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
8043// CHECK:   [[VRADDHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> [[VRADDHN_V_I_I]], <2 x i64> [[VRADDHN_V1_I_I]]) #4
8044// CHECK:   [[VRADDHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I_I]] to <8 x i8>
8045// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I_I]] to <2 x i32>
8046// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8047// CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
8048int32x4_t test_vraddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
8049  return vraddhn_high_s64(r, a, b);
8050}
8051
8052// CHECK-LABEL: define <16 x i8> @test_vraddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 {
8053// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
8054// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
8055// CHECK:   [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
8056// CHECK:   [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
8057// CHECK:   [[VRADDHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> [[VRADDHN_V_I_I]], <8 x i16> [[VRADDHN_V1_I_I]]) #4
8058// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRADDHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8059// CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
8060uint8x16_t test_vraddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
8061  return vraddhn_high_u16(r, a, b);
8062}
8063
8064// CHECK-LABEL: define <8 x i16> @test_vraddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 {
8065// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8066// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
8067// CHECK:   [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
8068// CHECK:   [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
8069// CHECK:   [[VRADDHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> [[VRADDHN_V_I_I]], <4 x i32> [[VRADDHN_V1_I_I]]) #4
8070// CHECK:   [[VRADDHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I_I]] to <8 x i8>
8071// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I_I]] to <4 x i16>
8072// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8073// CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
8074uint16x8_t test_vraddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
8075  return vraddhn_high_u32(r, a, b);
8076}
8077
8078// CHECK-LABEL: define <4 x i32> @test_vraddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 {
8079// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8080// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
8081// CHECK:   [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
8082// CHECK:   [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
8083// CHECK:   [[VRADDHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> [[VRADDHN_V_I_I]], <2 x i64> [[VRADDHN_V1_I_I]]) #4
8084// CHECK:   [[VRADDHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I_I]] to <8 x i8>
8085// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I_I]] to <2 x i32>
8086// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8087// CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
8088uint32x4_t test_vraddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
8089  return vraddhn_high_u64(r, a, b);
8090}
8091
8092// CHECK-LABEL: define <8 x i8> @test_vsubhn_s16(<8 x i16> %a, <8 x i16> %b) #0 {
8093// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
8094// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
8095// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
8096// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
8097// CHECK:   [[VSUBHN_I:%.*]] = sub <8 x i16> [[TMP2]], [[TMP3]]
8098// CHECK:   [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
8099// CHECK:   [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
8100// CHECK:   ret <8 x i8> [[VSUBHN2_I]]
8101int8x8_t test_vsubhn_s16(int16x8_t a, int16x8_t b) {
8102  return vsubhn_s16(a, b);
8103}
8104
8105// CHECK-LABEL: define <4 x i16> @test_vsubhn_s32(<4 x i32> %a, <4 x i32> %b) #0 {
8106// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8107// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
8108// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
8109// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
8110// CHECK:   [[VSUBHN_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
8111// CHECK:   [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], <i32 16, i32 16, i32 16, i32 16>
8112// CHECK:   [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
8113// CHECK:   ret <4 x i16> [[VSUBHN2_I]]
8114int16x4_t test_vsubhn_s32(int32x4_t a, int32x4_t b) {
8115  return vsubhn_s32(a, b);
8116}
8117
8118// CHECK-LABEL: define <2 x i32> @test_vsubhn_s64(<2 x i64> %a, <2 x i64> %b) #0 {
8119// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8120// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
8121// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
8122// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
8123// CHECK:   [[VSUBHN_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP3]]
8124// CHECK:   [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], <i64 32, i64 32>
8125// CHECK:   [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
8126// CHECK:   ret <2 x i32> [[VSUBHN2_I]]
8127int32x2_t test_vsubhn_s64(int64x2_t a, int64x2_t b) {
8128  return vsubhn_s64(a, b);
8129}
8130
8131// CHECK-LABEL: define <8 x i8> @test_vsubhn_u16(<8 x i16> %a, <8 x i16> %b) #0 {
8132// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
8133// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
8134// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
8135// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
8136// CHECK:   [[VSUBHN_I:%.*]] = sub <8 x i16> [[TMP2]], [[TMP3]]
8137// CHECK:   [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
8138// CHECK:   [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
8139// CHECK:   ret <8 x i8> [[VSUBHN2_I]]
8140uint8x8_t test_vsubhn_u16(uint16x8_t a, uint16x8_t b) {
8141  return vsubhn_u16(a, b);
8142}
8143
8144// CHECK-LABEL: define <4 x i16> @test_vsubhn_u32(<4 x i32> %a, <4 x i32> %b) #0 {
8145// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8146// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
8147// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
8148// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
8149// CHECK:   [[VSUBHN_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
8150// CHECK:   [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], <i32 16, i32 16, i32 16, i32 16>
8151// CHECK:   [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
8152// CHECK:   ret <4 x i16> [[VSUBHN2_I]]
8153uint16x4_t test_vsubhn_u32(uint32x4_t a, uint32x4_t b) {
8154  return vsubhn_u32(a, b);
8155}
8156
8157// CHECK-LABEL: define <2 x i32> @test_vsubhn_u64(<2 x i64> %a, <2 x i64> %b) #0 {
8158// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8159// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
8160// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
8161// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
8162// CHECK:   [[VSUBHN_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP3]]
8163// CHECK:   [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], <i64 32, i64 32>
8164// CHECK:   [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
8165// CHECK:   ret <2 x i32> [[VSUBHN2_I]]
8166uint32x2_t test_vsubhn_u64(uint64x2_t a, uint64x2_t b) {
8167  return vsubhn_u64(a, b);
8168}
8169
8170// CHECK-LABEL: define <16 x i8> @test_vsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 {
8171// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
8172// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
8173// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
8174// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
8175// CHECK:   [[VSUBHN_I_I:%.*]] = sub <8 x i16> [[TMP2]], [[TMP3]]
8176// CHECK:   [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
8177// CHECK:   [[VSUBHN2_I_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I_I]] to <8 x i8>
8178// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VSUBHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8179// CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
8180int8x16_t test_vsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
8181  return vsubhn_high_s16(r, a, b);
8182}
8183
8184// CHECK-LABEL: define <8 x i16> @test_vsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 {
8185// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8186// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
8187// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
8188// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
8189// CHECK:   [[VSUBHN_I_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
8190// CHECK:   [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
8191// CHECK:   [[VSUBHN2_I_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I_I]] to <4 x i16>
8192// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VSUBHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8193// CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
8194int16x8_t test_vsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
8195  return vsubhn_high_s32(r, a, b);
8196}
8197
8198// CHECK-LABEL: define <4 x i32> @test_vsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 {
8199// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8200// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
8201// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
8202// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
8203// CHECK:   [[VSUBHN_I_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP3]]
8204// CHECK:   [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], <i64 32, i64 32>
8205// CHECK:   [[VSUBHN2_I_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I_I]] to <2 x i32>
8206// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VSUBHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8207// CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
8208int32x4_t test_vsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
8209  return vsubhn_high_s64(r, a, b);
8210}
8211
8212// CHECK-LABEL: define <16 x i8> @test_vsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 {
8213// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
8214// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
8215// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
8216// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
8217// CHECK:   [[VSUBHN_I_I:%.*]] = sub <8 x i16> [[TMP2]], [[TMP3]]
8218// CHECK:   [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
8219// CHECK:   [[VSUBHN2_I_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I_I]] to <8 x i8>
8220// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VSUBHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8221// CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
8222uint8x16_t test_vsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
8223  return vsubhn_high_u16(r, a, b);
8224}
8225
8226// CHECK-LABEL: define <8 x i16> @test_vsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 {
8227// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8228// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
8229// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
8230// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
8231// CHECK:   [[VSUBHN_I_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]]
8232// CHECK:   [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
8233// CHECK:   [[VSUBHN2_I_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I_I]] to <4 x i16>
8234// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VSUBHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8235// CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
8236uint16x8_t test_vsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
8237  return vsubhn_high_u32(r, a, b);
8238}
8239
8240// CHECK-LABEL: define <4 x i32> @test_vsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 {
8241// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8242// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
8243// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
8244// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
8245// CHECK:   [[VSUBHN_I_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP3]]
8246// CHECK:   [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], <i64 32, i64 32>
8247// CHECK:   [[VSUBHN2_I_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I_I]] to <2 x i32>
8248// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VSUBHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8249// CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
8250uint32x4_t test_vsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
8251  return vsubhn_high_u64(r, a, b);
8252}
8253
8254// CHECK-LABEL: define <8 x i8> @test_vrsubhn_s16(<8 x i16> %a, <8 x i16> %b) #0 {
8255// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
8256// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
8257// CHECK:   [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
8258// CHECK:   [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
8259// CHECK:   [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> [[VRSUBHN_V_I]], <8 x i16> [[VRSUBHN_V1_I]]) #4
8260// CHECK:   ret <8 x i8> [[VRSUBHN_V2_I]]
8261int8x8_t test_vrsubhn_s16(int16x8_t a, int16x8_t b) {
8262  return vrsubhn_s16(a, b);
8263}
8264
8265// CHECK-LABEL: define <4 x i16> @test_vrsubhn_s32(<4 x i32> %a, <4 x i32> %b) #0 {
8266// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8267// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
8268// CHECK:   [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
8269// CHECK:   [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
8270// CHECK:   [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> [[VRSUBHN_V_I]], <4 x i32> [[VRSUBHN_V1_I]]) #4
8271// CHECK:   [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
8272// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <4 x i16>
8273// CHECK:   ret <4 x i16> [[TMP2]]
8274int16x4_t test_vrsubhn_s32(int32x4_t a, int32x4_t b) {
8275  return vrsubhn_s32(a, b);
8276}
8277
8278// CHECK-LABEL: define <2 x i32> @test_vrsubhn_s64(<2 x i64> %a, <2 x i64> %b) #0 {
8279// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8280// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
8281// CHECK:   [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
8282// CHECK:   [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
8283// CHECK:   [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> [[VRSUBHN_V_I]], <2 x i64> [[VRSUBHN_V1_I]]) #4
8284// CHECK:   [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
8285// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <2 x i32>
8286// CHECK:   ret <2 x i32> [[TMP2]]
8287int32x2_t test_vrsubhn_s64(int64x2_t a, int64x2_t b) {
8288  return vrsubhn_s64(a, b);
8289}
8290
8291// CHECK-LABEL: define <8 x i8> @test_vrsubhn_u16(<8 x i16> %a, <8 x i16> %b) #0 {
8292// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
8293// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
8294// CHECK:   [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
8295// CHECK:   [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
8296// CHECK:   [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> [[VRSUBHN_V_I]], <8 x i16> [[VRSUBHN_V1_I]]) #4
8297// CHECK:   ret <8 x i8> [[VRSUBHN_V2_I]]
8298uint8x8_t test_vrsubhn_u16(uint16x8_t a, uint16x8_t b) {
8299  return vrsubhn_u16(a, b);
8300}
8301
8302// CHECK-LABEL: define <4 x i16> @test_vrsubhn_u32(<4 x i32> %a, <4 x i32> %b) #0 {
8303// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8304// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
8305// CHECK:   [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
8306// CHECK:   [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
8307// CHECK:   [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> [[VRSUBHN_V_I]], <4 x i32> [[VRSUBHN_V1_I]]) #4
8308// CHECK:   [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
8309// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <4 x i16>
8310// CHECK:   ret <4 x i16> [[TMP2]]
8311uint16x4_t test_vrsubhn_u32(uint32x4_t a, uint32x4_t b) {
8312  return vrsubhn_u32(a, b);
8313}
8314
8315// CHECK-LABEL: define <2 x i32> @test_vrsubhn_u64(<2 x i64> %a, <2 x i64> %b) #0 {
8316// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8317// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
8318// CHECK:   [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
8319// CHECK:   [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
8320// CHECK:   [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> [[VRSUBHN_V_I]], <2 x i64> [[VRSUBHN_V1_I]]) #4
8321// CHECK:   [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
8322// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <2 x i32>
8323// CHECK:   ret <2 x i32> [[TMP2]]
8324uint32x2_t test_vrsubhn_u64(uint64x2_t a, uint64x2_t b) {
8325  return vrsubhn_u64(a, b);
8326}
8327
8328// CHECK-LABEL: define <16 x i8> @test_vrsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 {
8329// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
8330// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
8331// CHECK:   [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
8332// CHECK:   [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
8333// CHECK:   [[VRSUBHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> [[VRSUBHN_V_I_I]], <8 x i16> [[VRSUBHN_V1_I_I]]) #4
8334// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRSUBHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8335// CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
8336int8x16_t test_vrsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
8337  return vrsubhn_high_s16(r, a, b);
8338}
8339
8340// CHECK-LABEL: define <8 x i16> @test_vrsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 {
8341// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8342// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
8343// CHECK:   [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
8344// CHECK:   [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
8345// CHECK:   [[VRSUBHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> [[VRSUBHN_V_I_I]], <4 x i32> [[VRSUBHN_V1_I_I]]) #4
8346// CHECK:   [[VRSUBHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I_I]] to <8 x i8>
8347// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I_I]] to <4 x i16>
8348// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8349// CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
8350int16x8_t test_vrsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
8351  return vrsubhn_high_s32(r, a, b);
8352}
8353
8354// CHECK-LABEL: define <4 x i32> @test_vrsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 {
8355// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8356// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
8357// CHECK:   [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
8358// CHECK:   [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
8359// CHECK:   [[VRSUBHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> [[VRSUBHN_V_I_I]], <2 x i64> [[VRSUBHN_V1_I_I]]) #4
8360// CHECK:   [[VRSUBHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I_I]] to <8 x i8>
8361// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I_I]] to <2 x i32>
8362// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8363// CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
8364int32x4_t test_vrsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
8365  return vrsubhn_high_s64(r, a, b);
8366}
8367
8368// CHECK-LABEL: define <16 x i8> @test_vrsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 {
8369// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
8370// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
8371// CHECK:   [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
8372// CHECK:   [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
8373// CHECK:   [[VRSUBHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> [[VRSUBHN_V_I_I]], <8 x i16> [[VRSUBHN_V1_I_I]]) #4
8374// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRSUBHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8375// CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
8376uint8x16_t test_vrsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
8377  return vrsubhn_high_u16(r, a, b);
8378}
8379
8380// CHECK-LABEL: define <8 x i16> @test_vrsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 {
8381// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8382// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
8383// CHECK:   [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
8384// CHECK:   [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
8385// CHECK:   [[VRSUBHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> [[VRSUBHN_V_I_I]], <4 x i32> [[VRSUBHN_V1_I_I]]) #4
8386// CHECK:   [[VRSUBHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I_I]] to <8 x i8>
8387// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I_I]] to <4 x i16>
8388// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
8389// CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
8390uint16x8_t test_vrsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
8391  return vrsubhn_high_u32(r, a, b);
8392}
8393
8394// CHECK-LABEL: define <4 x i32> @test_vrsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 {
8395// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8396// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
8397// CHECK:   [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
8398// CHECK:   [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
8399// CHECK:   [[VRSUBHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> [[VRSUBHN_V_I_I]], <2 x i64> [[VRSUBHN_V1_I_I]]) #4
8400// CHECK:   [[VRSUBHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I_I]] to <8 x i8>
8401// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I_I]] to <2 x i32>
8402// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
8403// CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
8404uint32x4_t test_vrsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
8405  return vrsubhn_high_u64(r, a, b);
8406}
8407
8408// CHECK-LABEL: define <8 x i16> @test_vabdl_s8(<8 x i8> %a, <8 x i8> %b) #0 {
8409// CHECK:   [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %a, <8 x i8> %b) #4
8410// CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_I_I]] to <8 x i16>
8411// CHECK:   ret <8 x i16> [[VMOVL_I_I]]
8412int16x8_t test_vabdl_s8(int8x8_t a, int8x8_t b) {
8413  return vabdl_s8(a, b);
8414}
8415// CHECK-LABEL: define <4 x i32> @test_vabdl_s16(<4 x i16> %a, <4 x i16> %b) #0 {
8416// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8417// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8418// CHECK:   [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8419// CHECK:   [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8420// CHECK:   [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I_I]], <4 x i16> [[VABD1_I_I]]) #4
8421// CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I]] to <8 x i8>
8422// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
8423// CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
8424// CHECK:   ret <4 x i32> [[VMOVL_I_I]]
8425int32x4_t test_vabdl_s16(int16x4_t a, int16x4_t b) {
8426  return vabdl_s16(a, b);
8427}
8428// CHECK-LABEL: define <2 x i64> @test_vabdl_s32(<2 x i32> %a, <2 x i32> %b) #0 {
8429// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8430// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8431// CHECK:   [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8432// CHECK:   [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8433// CHECK:   [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I_I]], <2 x i32> [[VABD1_I_I]]) #4
8434// CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I]] to <8 x i8>
8435// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
8436// CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
8437// CHECK:   ret <2 x i64> [[VMOVL_I_I]]
8438int64x2_t test_vabdl_s32(int32x2_t a, int32x2_t b) {
8439  return vabdl_s32(a, b);
8440}
8441// CHECK-LABEL: define <8 x i16> @test_vabdl_u8(<8 x i8> %a, <8 x i8> %b) #0 {
8442// CHECK:   [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %a, <8 x i8> %b) #4
8443// CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_I_I]] to <8 x i16>
8444// CHECK:   ret <8 x i16> [[VMOVL_I_I]]
8445uint16x8_t test_vabdl_u8(uint8x8_t a, uint8x8_t b) {
8446  return vabdl_u8(a, b);
8447}
8448// CHECK-LABEL: define <4 x i32> @test_vabdl_u16(<4 x i16> %a, <4 x i16> %b) #0 {
8449// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8450// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8451// CHECK:   [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8452// CHECK:   [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8453// CHECK:   [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I_I]], <4 x i16> [[VABD1_I_I]]) #4
8454// CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I]] to <8 x i8>
8455// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
8456// CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
8457// CHECK:   ret <4 x i32> [[VMOVL_I_I]]
8458uint32x4_t test_vabdl_u16(uint16x4_t a, uint16x4_t b) {
8459  return vabdl_u16(a, b);
8460}
8461// CHECK-LABEL: define <2 x i64> @test_vabdl_u32(<2 x i32> %a, <2 x i32> %b) #0 {
8462// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8463// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8464// CHECK:   [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8465// CHECK:   [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8466// CHECK:   [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I_I]], <2 x i32> [[VABD1_I_I]]) #4
8467// CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I]] to <8 x i8>
8468// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
8469// CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
8470// CHECK:   ret <2 x i64> [[VMOVL_I_I]]
8471uint64x2_t test_vabdl_u32(uint32x2_t a, uint32x2_t b) {
8472  return vabdl_u32(a, b);
8473}
8474
8475// CHECK-LABEL: define <8 x i16> @test_vabal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 {
8476// CHECK:   [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> %c) #4
8477// CHECK:   [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
8478// CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]]
8479// CHECK:   ret <8 x i16> [[ADD_I]]
8480int16x8_t test_vabal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
8481  return vabal_s8(a, b, c);
8482}
8483// CHECK-LABEL: define <4 x i32> @test_vabal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
8484// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8485// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8486// CHECK:   [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8487// CHECK:   [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8488// CHECK:   [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I_I_I]], <4 x i16> [[VABD1_I_I_I]]) #4
8489// CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
8490// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
8491// CHECK:   [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
8492// CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]]
8493// CHECK:   ret <4 x i32> [[ADD_I]]
8494int32x4_t test_vabal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
8495  return vabal_s16(a, b, c);
8496}
8497// CHECK-LABEL: define <2 x i64> @test_vabal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
8498// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8499// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8500// CHECK:   [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8501// CHECK:   [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8502// CHECK:   [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I_I_I]], <2 x i32> [[VABD1_I_I_I]]) #4
8503// CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
8504// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
8505// CHECK:   [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
8506// CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]]
8507// CHECK:   ret <2 x i64> [[ADD_I]]
8508int64x2_t test_vabal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
8509  return vabal_s32(a, b, c);
8510}
8511// CHECK-LABEL: define <8 x i16> @test_vabal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 {
8512// CHECK:   [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %b, <8 x i8> %c) #4
8513// CHECK:   [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
8514// CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]]
8515// CHECK:   ret <8 x i16> [[ADD_I]]
8516uint16x8_t test_vabal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
8517  return vabal_u8(a, b, c);
8518}
8519// CHECK-LABEL: define <4 x i32> @test_vabal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
8520// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8521// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8522// CHECK:   [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8523// CHECK:   [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8524// CHECK:   [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I_I_I]], <4 x i16> [[VABD1_I_I_I]]) #4
8525// CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
8526// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
8527// CHECK:   [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
8528// CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]]
8529// CHECK:   ret <4 x i32> [[ADD_I]]
8530uint32x4_t test_vabal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
8531  return vabal_u16(a, b, c);
8532}
8533// CHECK-LABEL: define <2 x i64> @test_vabal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
8534// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8535// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8536// CHECK:   [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8537// CHECK:   [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8538// CHECK:   [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I_I_I]], <2 x i32> [[VABD1_I_I_I]]) #4
8539// CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
8540// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
8541// CHECK:   [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
8542// CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]]
8543// CHECK:   ret <2 x i64> [[ADD_I]]
8544uint64x2_t test_vabal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
8545  return vabal_u32(a, b, c);
8546}
8547
8548// CHECK-LABEL: define <8 x i16> @test_vabdl_high_s8(<16 x i8> %a, <16 x i8> %b) #0 {
8549// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8550// CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8551// CHECK:   [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4
8552// CHECK:   [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
8553// CHECK:   ret <8 x i16> [[VMOVL_I_I_I]]
8554int16x8_t test_vabdl_high_s8(int8x16_t a, int8x16_t b) {
8555  return vabdl_high_s8(a, b);
8556}
8557// CHECK-LABEL: define <4 x i32> @test_vabdl_high_s16(<8 x i16> %a, <8 x i16> %b) #0 {
8558// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8559// CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8560// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8561// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8562// CHECK:   [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8563// CHECK:   [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8564// CHECK:   [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I_I_I]], <4 x i16> [[VABD1_I_I_I]]) #4
8565// CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
8566// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
8567// CHECK:   [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
8568// CHECK:   ret <4 x i32> [[VMOVL_I_I_I]]
8569int32x4_t test_vabdl_high_s16(int16x8_t a, int16x8_t b) {
8570  return vabdl_high_s16(a, b);
8571}
8572// CHECK-LABEL: define <2 x i64> @test_vabdl_high_s32(<4 x i32> %a, <4 x i32> %b) #0 {
8573// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
8574// CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8575// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8576// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8577// CHECK:   [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8578// CHECK:   [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8579// CHECK:   [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I_I_I]], <2 x i32> [[VABD1_I_I_I]]) #4
8580// CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
8581// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
8582// CHECK:   [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
8583// CHECK:   ret <2 x i64> [[VMOVL_I_I_I]]
8584int64x2_t test_vabdl_high_s32(int32x4_t a, int32x4_t b) {
8585  return vabdl_high_s32(a, b);
8586}
8587// CHECK-LABEL: define <8 x i16> @test_vabdl_high_u8(<16 x i8> %a, <16 x i8> %b) #0 {
8588// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8589// CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8590// CHECK:   [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4
8591// CHECK:   [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
8592// CHECK:   ret <8 x i16> [[VMOVL_I_I_I]]
8593uint16x8_t test_vabdl_high_u8(uint8x16_t a, uint8x16_t b) {
8594  return vabdl_high_u8(a, b);
8595}
8596// CHECK-LABEL: define <4 x i32> @test_vabdl_high_u16(<8 x i16> %a, <8 x i16> %b) #0 {
8597// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8598// CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8599// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8600// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8601// CHECK:   [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8602// CHECK:   [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8603// CHECK:   [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I_I_I]], <4 x i16> [[VABD1_I_I_I]]) #4
8604// CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
8605// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
8606// CHECK:   [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
8607// CHECK:   ret <4 x i32> [[VMOVL_I_I_I]]
8608uint32x4_t test_vabdl_high_u16(uint16x8_t a, uint16x8_t b) {
8609  return vabdl_high_u16(a, b);
8610}
8611// CHECK-LABEL: define <2 x i64> @test_vabdl_high_u32(<4 x i32> %a, <4 x i32> %b) #0 {
8612// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
8613// CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8614// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8615// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8616// CHECK:   [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8617// CHECK:   [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8618// CHECK:   [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I_I_I]], <2 x i32> [[VABD1_I_I_I]]) #4
8619// CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
8620// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
8621// CHECK:   [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
8622// CHECK:   ret <2 x i64> [[VMOVL_I_I_I]]
8623uint64x2_t test_vabdl_high_u32(uint32x4_t a, uint32x4_t b) {
8624  return vabdl_high_u32(a, b);
8625}
8626
8627// CHECK-LABEL: define <8 x i16> @test_vabal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) #0 {
8628// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8629// CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8630// CHECK:   [[VABD_I_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4
8631// CHECK:   [[VMOVL_I_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I_I]] to <8 x i16>
8632// CHECK:   [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I_I]]
8633// CHECK:   ret <8 x i16> [[ADD_I_I]]
8634int16x8_t test_vabal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
8635  return vabal_high_s8(a, b, c);
8636}
8637// CHECK-LABEL: define <4 x i32> @test_vabal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 {
8638// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8639// CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8640// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8641// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8642// CHECK:   [[VABD_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8643// CHECK:   [[VABD1_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8644// CHECK:   [[VABD2_I_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I_I_I_I]], <4 x i16> [[VABD1_I_I_I_I]]) #4
8645// CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I_I]] to <8 x i8>
8646// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
8647// CHECK:   [[VMOVL_I_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
8648// CHECK:   [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I_I]]
8649// CHECK:   ret <4 x i32> [[ADD_I_I]]
8650int32x4_t test_vabal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
8651  return vabal_high_s16(a, b, c);
8652}
8653// CHECK-LABEL: define <2 x i64> @test_vabal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 {
8654// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8655// CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8656// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8657// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8658// CHECK:   [[VABD_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8659// CHECK:   [[VABD1_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8660// CHECK:   [[VABD2_I_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I_I_I_I]], <2 x i32> [[VABD1_I_I_I_I]]) #4
8661// CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I_I]] to <8 x i8>
8662// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
8663// CHECK:   [[VMOVL_I_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
8664// CHECK:   [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I_I]]
8665// CHECK:   ret <2 x i64> [[ADD_I_I]]
8666int64x2_t test_vabal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
8667  return vabal_high_s32(a, b, c);
8668}
8669// CHECK-LABEL: define <8 x i16> @test_vabal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) #0 {
8670// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8671// CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8672// CHECK:   [[VABD_I_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4
8673// CHECK:   [[VMOVL_I_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I_I]] to <8 x i16>
8674// CHECK:   [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I_I]]
8675// CHECK:   ret <8 x i16> [[ADD_I_I]]
8676uint16x8_t test_vabal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
8677  return vabal_high_u8(a, b, c);
8678}
8679// CHECK-LABEL: define <4 x i32> @test_vabal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 {
8680// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8681// CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8682// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8683// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8684// CHECK:   [[VABD_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8685// CHECK:   [[VABD1_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8686// CHECK:   [[VABD2_I_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I_I_I_I]], <4 x i16> [[VABD1_I_I_I_I]]) #4
8687// CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I_I]] to <8 x i8>
8688// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
8689// CHECK:   [[VMOVL_I_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
8690// CHECK:   [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I_I]]
8691// CHECK:   ret <4 x i32> [[ADD_I_I]]
8692uint32x4_t test_vabal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
8693  return vabal_high_u16(a, b, c);
8694}
8695// CHECK-LABEL: define <2 x i64> @test_vabal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 {
8696// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8697// CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8698// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8699// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8700// CHECK:   [[VABD_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8701// CHECK:   [[VABD1_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8702// CHECK:   [[VABD2_I_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I_I_I_I]], <2 x i32> [[VABD1_I_I_I_I]]) #4
8703// CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I_I]] to <8 x i8>
8704// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
8705// CHECK:   [[VMOVL_I_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
8706// CHECK:   [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I_I]]
8707// CHECK:   ret <2 x i64> [[ADD_I_I]]
8708uint64x2_t test_vabal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
8709  return vabal_high_u32(a, b, c);
8710}
8711
8712// CHECK-LABEL: define <8 x i16> @test_vmull_s8(<8 x i8> %a, <8 x i8> %b) #0 {
8713// CHECK:   [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %a, <8 x i8> %b) #4
8714// CHECK:   ret <8 x i16> [[VMULL_I]]
8715int16x8_t test_vmull_s8(int8x8_t a, int8x8_t b) {
8716  return vmull_s8(a, b);
8717}
8718// CHECK-LABEL: define <4 x i32> @test_vmull_s16(<4 x i16> %a, <4 x i16> %b) #0 {
8719// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8720// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8721// CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8722// CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8723// CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4
8724// CHECK:   ret <4 x i32> [[VMULL2_I]]
8725int32x4_t test_vmull_s16(int16x4_t a, int16x4_t b) {
8726  return vmull_s16(a, b);
8727}
8728// CHECK-LABEL: define <2 x i64> @test_vmull_s32(<2 x i32> %a, <2 x i32> %b) #0 {
8729// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8730// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8731// CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8732// CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8733// CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4
8734// CHECK:   ret <2 x i64> [[VMULL2_I]]
8735int64x2_t test_vmull_s32(int32x2_t a, int32x2_t b) {
8736  return vmull_s32(a, b);
8737}
8738// CHECK-LABEL: define <8 x i16> @test_vmull_u8(<8 x i8> %a, <8 x i8> %b) #0 {
8739// CHECK:   [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %a, <8 x i8> %b) #4
8740// CHECK:   ret <8 x i16> [[VMULL_I]]
8741uint16x8_t test_vmull_u8(uint8x8_t a, uint8x8_t b) {
8742  return vmull_u8(a, b);
8743}
8744// CHECK-LABEL: define <4 x i32> @test_vmull_u16(<4 x i16> %a, <4 x i16> %b) #0 {
8745// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8746// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8747// CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8748// CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8749// CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4
8750// CHECK:   ret <4 x i32> [[VMULL2_I]]
8751uint32x4_t test_vmull_u16(uint16x4_t a, uint16x4_t b) {
8752  return vmull_u16(a, b);
8753}
8754// CHECK-LABEL: define <2 x i64> @test_vmull_u32(<2 x i32> %a, <2 x i32> %b) #0 {
8755// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8756// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8757// CHECK:   [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8758// CHECK:   [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8759// CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4
8760// CHECK:   ret <2 x i64> [[VMULL2_I]]
8761uint64x2_t test_vmull_u32(uint32x2_t a, uint32x2_t b) {
8762  return vmull_u32(a, b);
8763}
8764
8765// CHECK-LABEL: define <8 x i16> @test_vmull_high_s8(<16 x i8> %a, <16 x i8> %b) #0 {
8766// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8767// CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8768// CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4
8769// CHECK:   ret <8 x i16> [[VMULL_I_I]]
8770int16x8_t test_vmull_high_s8(int8x16_t a, int8x16_t b) {
8771  return vmull_high_s8(a, b);
8772}
8773// CHECK-LABEL: define <4 x i32> @test_vmull_high_s16(<8 x i16> %a, <8 x i16> %b) #0 {
8774// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8775// CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8776// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8777// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8778// CHECK:   [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8779// CHECK:   [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8780// CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4
8781// CHECK:   ret <4 x i32> [[VMULL2_I_I]]
8782int32x4_t test_vmull_high_s16(int16x8_t a, int16x8_t b) {
8783  return vmull_high_s16(a, b);
8784}
8785// CHECK-LABEL: define <2 x i64> @test_vmull_high_s32(<4 x i32> %a, <4 x i32> %b) #0 {
8786// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
8787// CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8788// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8789// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8790// CHECK:   [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8791// CHECK:   [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8792// CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4
8793// CHECK:   ret <2 x i64> [[VMULL2_I_I]]
8794int64x2_t test_vmull_high_s32(int32x4_t a, int32x4_t b) {
8795  return vmull_high_s32(a, b);
8796}
8797// CHECK-LABEL: define <8 x i16> @test_vmull_high_u8(<16 x i8> %a, <16 x i8> %b) #0 {
8798// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8799// CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8800// CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4
8801// CHECK:   ret <8 x i16> [[VMULL_I_I]]
8802uint16x8_t test_vmull_high_u8(uint8x16_t a, uint8x16_t b) {
8803  return vmull_high_u8(a, b);
8804}
8805// CHECK-LABEL: define <4 x i32> @test_vmull_high_u16(<8 x i16> %a, <8 x i16> %b) #0 {
8806// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8807// CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8808// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8809// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8810// CHECK:   [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8811// CHECK:   [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8812// CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4
8813// CHECK:   ret <4 x i32> [[VMULL2_I_I]]
8814uint32x4_t test_vmull_high_u16(uint16x8_t a, uint16x8_t b) {
8815  return vmull_high_u16(a, b);
8816}
8817// CHECK-LABEL: define <2 x i64> @test_vmull_high_u32(<4 x i32> %a, <4 x i32> %b) #0 {
8818// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
8819// CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8820// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8821// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8822// CHECK:   [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8823// CHECK:   [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8824// CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4
8825// CHECK:   ret <2 x i64> [[VMULL2_I_I]]
8826uint64x2_t test_vmull_high_u32(uint32x4_t a, uint32x4_t b) {
8827  return vmull_high_u32(a, b);
8828}
8829
8830// CHECK-LABEL: define <8 x i16> @test_vmlal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 {
8831// CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c) #4
8832// CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]]
8833// CHECK:   ret <8 x i16> [[ADD_I]]
8834int16x8_t test_vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
8835  return vmlal_s8(a, b, c);
8836}
8837// CHECK-LABEL: define <4 x i32> @test_vmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
8838// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8839// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8840// CHECK:   [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8841// CHECK:   [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8842// CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4
8843// CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
8844// CHECK:   ret <4 x i32> [[ADD_I]]
8845int32x4_t test_vmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
8846  return vmlal_s16(a, b, c);
8847}
8848// CHECK-LABEL: define <2 x i64> @test_vmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
8849// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8850// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8851// CHECK:   [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8852// CHECK:   [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8853// CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4
8854// CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
8855// CHECK:   ret <2 x i64> [[ADD_I]]
8856int64x2_t test_vmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
8857  return vmlal_s32(a, b, c);
8858}
8859// CHECK-LABEL: define <8 x i16> @test_vmlal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 {
8860// CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c) #4
8861// CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]]
8862// CHECK:   ret <8 x i16> [[ADD_I]]
8863uint16x8_t test_vmlal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
8864  return vmlal_u8(a, b, c);
8865}
8866// CHECK-LABEL: define <4 x i32> @test_vmlal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
8867// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8868// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8869// CHECK:   [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8870// CHECK:   [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8871// CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4
8872// CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
8873// CHECK:   ret <4 x i32> [[ADD_I]]
8874uint32x4_t test_vmlal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
8875  return vmlal_u16(a, b, c);
8876}
8877// CHECK-LABEL: define <2 x i64> @test_vmlal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
8878// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8879// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8880// CHECK:   [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8881// CHECK:   [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8882// CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4
8883// CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
8884// CHECK:   ret <2 x i64> [[ADD_I]]
8885uint64x2_t test_vmlal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
8886  return vmlal_u32(a, b, c);
8887}
8888
8889// CHECK-LABEL: define <8 x i16> @test_vmlal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) #0 {
8890// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8891// CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8892// CHECK:   [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4
8893// CHECK:   [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I_I]]
8894// CHECK:   ret <8 x i16> [[ADD_I_I]]
8895int16x8_t test_vmlal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
8896  return vmlal_high_s8(a, b, c);
8897}
8898// CHECK-LABEL: define <4 x i32> @test_vmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 {
8899// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8900// CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8901// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8902// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8903// CHECK:   [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8904// CHECK:   [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8905// CHECK:   [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I_I]], <4 x i16> [[VMULL1_I_I_I]]) #4
8906// CHECK:   [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]]
8907// CHECK:   ret <4 x i32> [[ADD_I_I]]
8908int32x4_t test_vmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
8909  return vmlal_high_s16(a, b, c);
8910}
8911// CHECK-LABEL: define <2 x i64> @test_vmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 {
8912// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8913// CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8914// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8915// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8916// CHECK:   [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8917// CHECK:   [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8918// CHECK:   [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I_I]], <2 x i32> [[VMULL1_I_I_I]]) #4
8919// CHECK:   [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]]
8920// CHECK:   ret <2 x i64> [[ADD_I_I]]
8921int64x2_t test_vmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
8922  return vmlal_high_s32(a, b, c);
8923}
8924// CHECK-LABEL: define <8 x i16> @test_vmlal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) #0 {
8925// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8926// CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8927// CHECK:   [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4
8928// CHECK:   [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I_I]]
8929// CHECK:   ret <8 x i16> [[ADD_I_I]]
8930uint16x8_t test_vmlal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
8931  return vmlal_high_u8(a, b, c);
8932}
8933// CHECK-LABEL: define <4 x i32> @test_vmlal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 {
8934// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8935// CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8936// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8937// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8938// CHECK:   [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8939// CHECK:   [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8940// CHECK:   [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I_I]], <4 x i16> [[VMULL1_I_I_I]]) #4
8941// CHECK:   [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]]
8942// CHECK:   ret <4 x i32> [[ADD_I_I]]
8943uint32x4_t test_vmlal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
8944  return vmlal_high_u16(a, b, c);
8945}
8946// CHECK-LABEL: define <2 x i64> @test_vmlal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 {
8947// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8948// CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8949// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8950// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8951// CHECK:   [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8952// CHECK:   [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8953// CHECK:   [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I_I]], <2 x i32> [[VMULL1_I_I_I]]) #4
8954// CHECK:   [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]]
8955// CHECK:   ret <2 x i64> [[ADD_I_I]]
8956uint64x2_t test_vmlal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
8957  return vmlal_high_u32(a, b, c);
8958}
8959
8960// CHECK-LABEL: define <8 x i16> @test_vmlsl_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 {
8961// CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c) #4
8962// CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]]
8963// CHECK:   ret <8 x i16> [[SUB_I]]
8964int16x8_t test_vmlsl_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
8965  return vmlsl_s8(a, b, c);
8966}
8967// CHECK-LABEL: define <4 x i32> @test_vmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
8968// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8969// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8970// CHECK:   [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8971// CHECK:   [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
8972// CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4
8973// CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
8974// CHECK:   ret <4 x i32> [[SUB_I]]
8975int32x4_t test_vmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
8976  return vmlsl_s16(a, b, c);
8977}
8978// CHECK-LABEL: define <2 x i64> @test_vmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
8979// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8980// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8981// CHECK:   [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8982// CHECK:   [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
8983// CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4
8984// CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
8985// CHECK:   ret <2 x i64> [[SUB_I]]
8986int64x2_t test_vmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
8987  return vmlsl_s32(a, b, c);
8988}
8989// CHECK-LABEL: define <8 x i16> @test_vmlsl_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 {
8990// CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c) #4
8991// CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]]
8992// CHECK:   ret <8 x i16> [[SUB_I]]
8993uint16x8_t test_vmlsl_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
8994  return vmlsl_u8(a, b, c);
8995}
8996// CHECK-LABEL: define <4 x i32> @test_vmlsl_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
8997// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8998// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8999// CHECK:   [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
9000// CHECK:   [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
9001// CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4
9002// CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
9003// CHECK:   ret <4 x i32> [[SUB_I]]
9004uint32x4_t test_vmlsl_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
9005  return vmlsl_u16(a, b, c);
9006}
9007// CHECK-LABEL: define <2 x i64> @test_vmlsl_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
9008// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9009// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
9010// CHECK:   [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
9011// CHECK:   [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
9012// CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4
9013// CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
9014// CHECK:   ret <2 x i64> [[SUB_I]]
9015uint64x2_t test_vmlsl_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
9016  return vmlsl_u32(a, b, c);
9017}
9018
9019// CHECK-LABEL: define <8 x i16> @test_vmlsl_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) #0 {
9020// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9021// CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9022// CHECK:   [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4
9023// CHECK:   [[SUB_I_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I_I]]
9024// CHECK:   ret <8 x i16> [[SUB_I_I]]
9025int16x8_t test_vmlsl_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
9026  return vmlsl_high_s8(a, b, c);
9027}
9028// CHECK-LABEL: define <4 x i32> @test_vmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 {
9029// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
9030// CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
9031// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
9032// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
9033// CHECK:   [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
9034// CHECK:   [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
9035// CHECK:   [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I_I]], <4 x i16> [[VMULL1_I_I_I]]) #4
9036// CHECK:   [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]]
9037// CHECK:   ret <4 x i32> [[SUB_I_I]]
9038int32x4_t test_vmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
9039  return vmlsl_high_s16(a, b, c);
9040}
9041// CHECK-LABEL: define <2 x i64> @test_vmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 {
9042// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
9043// CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
9044// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
9045// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
9046// CHECK:   [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
9047// CHECK:   [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
9048// CHECK:   [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I_I]], <2 x i32> [[VMULL1_I_I_I]]) #4
9049// CHECK:   [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]]
9050// CHECK:   ret <2 x i64> [[SUB_I_I]]
9051int64x2_t test_vmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
9052  return vmlsl_high_s32(a, b, c);
9053}
9054// CHECK-LABEL: define <8 x i16> @test_vmlsl_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) #0 {
9055// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9056// CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9057// CHECK:   [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4
9058// CHECK:   [[SUB_I_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I_I]]
9059// CHECK:   ret <8 x i16> [[SUB_I_I]]
9060uint16x8_t test_vmlsl_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
9061  return vmlsl_high_u8(a, b, c);
9062}
9063// CHECK-LABEL: define <4 x i32> @test_vmlsl_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 {
9064// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
9065// CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
9066// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
9067// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
9068// CHECK:   [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
9069// CHECK:   [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
9070// CHECK:   [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I_I]], <4 x i16> [[VMULL1_I_I_I]]) #4
9071// CHECK:   [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]]
9072// CHECK:   ret <4 x i32> [[SUB_I_I]]
9073uint32x4_t test_vmlsl_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
9074  return vmlsl_high_u16(a, b, c);
9075}
9076// CHECK-LABEL: define <2 x i64> @test_vmlsl_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 {
9077// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
9078// CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
9079// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
9080// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
9081// CHECK:   [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
9082// CHECK:   [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
9083// CHECK:   [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I_I]], <2 x i32> [[VMULL1_I_I_I]]) #4
9084// CHECK:   [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]]
9085// CHECK:   ret <2 x i64> [[SUB_I_I]]
9086uint64x2_t test_vmlsl_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
9087  return vmlsl_high_u32(a, b, c);
9088}
9089
9090// CHECK-LABEL: define <4 x i32> @test_vqdmull_s16(<4 x i16> %a, <4 x i16> %b) #0 {
9091// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9092// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9093// CHECK:   [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
9094// CHECK:   [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
9095// CHECK:   [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]]) #4
9096// CHECK:   [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
9097// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32>
9098// CHECK:   ret <4 x i32> [[TMP2]]
9099int32x4_t test_vqdmull_s16(int16x4_t a, int16x4_t b) {
9100  return vqdmull_s16(a, b);
9101}
9102// CHECK-LABEL: define <2 x i64> @test_vqdmull_s32(<2 x i32> %a, <2 x i32> %b) #0 {
9103// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9104// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9105// CHECK:   [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
9106// CHECK:   [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
9107// CHECK:   [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]]) #4
9108// CHECK:   [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
9109// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64>
9110// CHECK:   ret <2 x i64> [[TMP2]]
9111int64x2_t test_vqdmull_s32(int32x2_t a, int32x2_t b) {
9112  return vqdmull_s32(a, b);
9113}
9114
9115// CHECK-LABEL: define <4 x i32> @test_vqdmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
9116// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9117// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9118// CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
9119// CHECK:   [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
9120// CHECK:   [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
9121// CHECK:   [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #4
9122// CHECK:   [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
9123// CHECK:   [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #4
9124// CHECK:   ret <4 x i32> [[VQDMLAL_V3_I]]
9125int32x4_t test_vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
9126  return vqdmlal_s16(a, b, c);
9127}
9128
9129// CHECK-LABEL: define <2 x i64> @test_vqdmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
9130// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9131// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9132// CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
9133// CHECK:   [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
9134// CHECK:   [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
9135// CHECK:   [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #4
9136// CHECK:   [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
9137// CHECK:   [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #4
9138// CHECK:   ret <2 x i64> [[VQDMLAL_V3_I]]
9139int64x2_t test_vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
9140  return vqdmlal_s32(a, b, c);
9141}
9142
9143// CHECK-LABEL: define <4 x i32> @test_vqdmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 {
9144// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9145// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9146// CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
9147// CHECK:   [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
9148// CHECK:   [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
9149// CHECK:   [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #4
9150// CHECK:   [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
9151// CHECK:   [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #4
9152// CHECK:   ret <4 x i32> [[VQDMLSL_V3_I]]
9153int32x4_t test_vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
9154  return vqdmlsl_s16(a, b, c);
9155}
9156
9157// CHECK-LABEL: define <2 x i64> @test_vqdmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 {
9158// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9159// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9160// CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
9161// CHECK:   [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
9162// CHECK:   [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
9163// CHECK:   [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #4
9164// CHECK:   [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
9165// CHECK:   [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #4
9166// CHECK:   ret <2 x i64> [[VQDMLSL_V3_I]]
9167int64x2_t test_vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
9168  return vqdmlsl_s32(a, b, c);
9169}
9170
9171// CHECK-LABEL: define <4 x i32> @test_vqdmull_high_s16(<8 x i16> %a, <8 x i16> %b) #0 {
9172// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
9173// CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
9174// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
9175// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
9176// CHECK:   [[VQDMULL_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
9177// CHECK:   [[VQDMULL_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
9178// CHECK:   [[VQDMULL_V2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I_I]], <4 x i16> [[VQDMULL_V1_I_I]]) #4
9179// CHECK:   [[VQDMULL_V3_I_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I_I]] to <16 x i8>
9180// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I_I]] to <4 x i32>
9181// CHECK:   ret <4 x i32> [[TMP2]]
9182int32x4_t test_vqdmull_high_s16(int16x8_t a, int16x8_t b) {
9183  return vqdmull_high_s16(a, b);
9184}
9185// CHECK-LABEL: define <2 x i64> @test_vqdmull_high_s32(<4 x i32> %a, <4 x i32> %b) #0 {
9186// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
9187// CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
9188// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
9189// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
9190// CHECK:   [[VQDMULL_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
9191// CHECK:   [[VQDMULL_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
9192// CHECK:   [[VQDMULL_V2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I_I]], <2 x i32> [[VQDMULL_V1_I_I]]) #4
9193// CHECK:   [[VQDMULL_V3_I_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I_I]] to <16 x i8>
9194// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I_I]] to <2 x i64>
9195// CHECK:   ret <2 x i64> [[TMP2]]
9196int64x2_t test_vqdmull_high_s32(int32x4_t a, int32x4_t b) {
9197  return vqdmull_high_s32(a, b);
9198}
9199
9200// CHECK-LABEL: define <4 x i32> @test_vqdmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 {
9201// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
9202// CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
9203// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9204// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
9205// CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
9206// CHECK:   [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
9207// CHECK:   [[VQDMLAL1_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
9208// CHECK:   [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I_I]], <4 x i16> [[VQDMLAL1_I_I]]) #4
9209// CHECK:   [[VQDMLAL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
9210// CHECK:   [[VQDMLAL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I_I]], <4 x i32> [[VQDMLAL2_I_I]]) #4
9211// CHECK:   ret <4 x i32> [[VQDMLAL_V3_I_I]]
9212int32x4_t test_vqdmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
9213  return vqdmlal_high_s16(a, b, c);
9214}
9215
9216// CHECK-LABEL: define <2 x i64> @test_vqdmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 {
9217// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
9218// CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
9219// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9220// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
9221// CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
9222// CHECK:   [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
9223// CHECK:   [[VQDMLAL1_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
9224// CHECK:   [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I_I]], <2 x i32> [[VQDMLAL1_I_I]]) #4
9225// CHECK:   [[VQDMLAL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
9226// CHECK:   [[VQDMLAL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I_I]], <2 x i64> [[VQDMLAL2_I_I]]) #4
9227// CHECK:   ret <2 x i64> [[VQDMLAL_V3_I_I]]
9228int64x2_t test_vqdmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
9229  return vqdmlal_high_s32(a, b, c);
9230}
9231
9232// CHECK-LABEL: define <4 x i32> @test_vqdmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 {
9233// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
9234// CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
9235// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9236// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
9237// CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
9238// CHECK:   [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
9239// CHECK:   [[VQDMLAL1_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
9240// CHECK:   [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I_I]], <4 x i16> [[VQDMLAL1_I_I]]) #4
9241// CHECK:   [[VQDMLSL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
9242// CHECK:   [[VQDMLSL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I_I]], <4 x i32> [[VQDMLAL2_I_I]]) #4
9243// CHECK:   ret <4 x i32> [[VQDMLSL_V3_I_I]]
9244int32x4_t test_vqdmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
9245  return vqdmlsl_high_s16(a, b, c);
9246}
9247
9248// CHECK-LABEL: define <2 x i64> @test_vqdmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 {
9249// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
9250// CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
9251// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9252// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
9253// CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
9254// CHECK:   [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
9255// CHECK:   [[VQDMLAL1_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
9256// CHECK:   [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I_I]], <2 x i32> [[VQDMLAL1_I_I]]) #4
9257// CHECK:   [[VQDMLSL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
9258// CHECK:   [[VQDMLSL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I_I]], <2 x i64> [[VQDMLAL2_I_I]]) #4
9259// CHECK:   ret <2 x i64> [[VQDMLSL_V3_I_I]]
9260int64x2_t test_vqdmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
9261  return vqdmlsl_high_s32(a, b, c);
9262}
9263
9264// CHECK-LABEL: define <8 x i16> @test_vmull_p8(<8 x i8> %a, <8 x i8> %b) #0 {
9265// CHECK:   [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %a, <8 x i8> %b) #4
9266// CHECK:   ret <8 x i16> [[VMULL_I]]
9267poly16x8_t test_vmull_p8(poly8x8_t a, poly8x8_t b) {
9268  return vmull_p8(a, b);
9269}
9270
9271// CHECK-LABEL: define <8 x i16> @test_vmull_high_p8(<16 x i8> %a, <16 x i8> %b) #0 {
9272// CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9273// CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9274// CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4
9275// CHECK:   ret <8 x i16> [[VMULL_I_I]]
9276poly16x8_t test_vmull_high_p8(poly8x16_t a, poly8x16_t b) {
9277  return vmull_high_p8(a, b);
9278}
9279
9280// CHECK-LABEL: define i64 @test_vaddd_s64(i64 %a, i64 %b) #0 {
9281// CHECK:   [[VADDD_I:%.*]] = add i64 %a, %b
9282// CHECK:   ret i64 [[VADDD_I]]
9283int64_t test_vaddd_s64(int64_t a, int64_t b) {
9284  return vaddd_s64(a, b);
9285}
9286
9287// CHECK-LABEL: define i64 @test_vaddd_u64(i64 %a, i64 %b) #0 {
9288// CHECK:   [[VADDD_I:%.*]] = add i64 %a, %b
9289// CHECK:   ret i64 [[VADDD_I]]
9290uint64_t test_vaddd_u64(uint64_t a, uint64_t b) {
9291  return vaddd_u64(a, b);
9292}
9293
9294// CHECK-LABEL: define i64 @test_vsubd_s64(i64 %a, i64 %b) #0 {
9295// CHECK:   [[VSUBD_I:%.*]] = sub i64 %a, %b
9296// CHECK:   ret i64 [[VSUBD_I]]
9297int64_t test_vsubd_s64(int64_t a, int64_t b) {
9298  return vsubd_s64(a, b);
9299}
9300
9301// CHECK-LABEL: define i64 @test_vsubd_u64(i64 %a, i64 %b) #0 {
9302// CHECK:   [[VSUBD_I:%.*]] = sub i64 %a, %b
9303// CHECK:   ret i64 [[VSUBD_I]]
9304uint64_t test_vsubd_u64(uint64_t a, uint64_t b) {
9305  return vsubd_u64(a, b);
9306}
9307
9308// CHECK-LABEL: define i8 @test_vqaddb_s8(i8 %a, i8 %b) #0 {
9309// CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
9310// CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
9311// CHECK:   [[VQADDB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4
9312// CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQADDB_S8_I]], i64 0
9313// CHECK:   ret i8 [[TMP2]]
9314int8_t test_vqaddb_s8(int8_t a, int8_t b) {
9315  return vqaddb_s8(a, b);
9316}
9317
9318// CHECK-LABEL: define i16 @test_vqaddh_s16(i16 %a, i16 %b) #0 {
9319// CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
9320// CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
9321// CHECK:   [[VQADDH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
9322// CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQADDH_S16_I]], i64 0
9323// CHECK:   ret i16 [[TMP2]]
9324int16_t test_vqaddh_s16(int16_t a, int16_t b) {
9325  return vqaddh_s16(a, b);
9326}
9327
9328// CHECK-LABEL: define i32 @test_vqadds_s32(i32 %a, i32 %b) #0 {
9329// CHECK:   [[VQADDS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 %b) #4
9330// CHECK:   ret i32 [[VQADDS_S32_I]]
9331int32_t test_vqadds_s32(int32_t a, int32_t b) {
9332  return vqadds_s32(a, b);
9333}
9334
9335// CHECK-LABEL: define i64 @test_vqaddd_s64(i64 %a, i64 %b) #0 {
9336// CHECK:   [[VQADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 %b) #4
9337// CHECK:   ret i64 [[VQADDD_S64_I]]
9338int64_t test_vqaddd_s64(int64_t a, int64_t b) {
9339  return vqaddd_s64(a, b);
9340}
9341
9342// CHECK-LABEL: define i8 @test_vqaddb_u8(i8 %a, i8 %b) #0 {
9343// CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
9344// CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
9345// CHECK:   [[VQADDB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4
9346// CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQADDB_U8_I]], i64 0
9347// CHECK:   ret i8 [[TMP2]]
9348uint8_t test_vqaddb_u8(uint8_t a, uint8_t b) {
9349  return vqaddb_u8(a, b);
9350}
9351
9352// CHECK-LABEL: define i16 @test_vqaddh_u16(i16 %a, i16 %b) #0 {
9353// CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
9354// CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
9355// CHECK:   [[VQADDH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
9356// CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQADDH_U16_I]], i64 0
9357// CHECK:   ret i16 [[TMP2]]
9358uint16_t test_vqaddh_u16(uint16_t a, uint16_t b) {
9359  return vqaddh_u16(a, b);
9360}
9361
9362// CHECK-LABEL: define i32 @test_vqadds_u32(i32 %a, i32 %b) #0 {
9363// CHECK:   [[VQADDS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqadd.i32(i32 %a, i32 %b) #4
9364// CHECK:   ret i32 [[VQADDS_U32_I]]
9365uint32_t test_vqadds_u32(uint32_t a, uint32_t b) {
9366  return vqadds_u32(a, b);
9367}
9368
9369// CHECK-LABEL: define i64 @test_vqaddd_u64(i64 %a, i64 %b) #0 {
9370// CHECK:   [[VQADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqadd.i64(i64 %a, i64 %b) #4
9371// CHECK:   ret i64 [[VQADDD_U64_I]]
9372uint64_t test_vqaddd_u64(uint64_t a, uint64_t b) {
9373  return vqaddd_u64(a, b);
9374}
9375
9376// CHECK-LABEL: define i8 @test_vqsubb_s8(i8 %a, i8 %b) #0 {
9377// CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
9378// CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
9379// CHECK:   [[VQSUBB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4
9380// CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQSUBB_S8_I]], i64 0
9381// CHECK:   ret i8 [[TMP2]]
9382int8_t test_vqsubb_s8(int8_t a, int8_t b) {
9383  return vqsubb_s8(a, b);
9384}
9385
9386// CHECK-LABEL: define i16 @test_vqsubh_s16(i16 %a, i16 %b) #0 {
9387// CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
9388// CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
9389// CHECK:   [[VQSUBH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
9390// CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQSUBH_S16_I]], i64 0
9391// CHECK:   ret i16 [[TMP2]]
9392int16_t test_vqsubh_s16(int16_t a, int16_t b) {
9393  return vqsubh_s16(a, b);
9394}
9395
9396// CHECK-LABEL: define i32 @test_vqsubs_s32(i32 %a, i32 %b) #0 {
9397// CHECK:   [[VQSUBS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 %b) #4
9398// CHECK:   ret i32 [[VQSUBS_S32_I]]
9399int32_t test_vqsubs_s32(int32_t a, int32_t b) {
9400  return vqsubs_s32(a, b);
9401}
9402
9403// CHECK-LABEL: define i64 @test_vqsubd_s64(i64 %a, i64 %b) #0 {
9404// CHECK:   [[VQSUBD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 %b) #4
9405// CHECK:   ret i64 [[VQSUBD_S64_I]]
9406int64_t test_vqsubd_s64(int64_t a, int64_t b) {
9407  return vqsubd_s64(a, b);
9408}
9409
9410// CHECK-LABEL: define i8 @test_vqsubb_u8(i8 %a, i8 %b) #0 {
9411// CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
9412// CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
9413// CHECK:   [[VQSUBB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4
9414// CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQSUBB_U8_I]], i64 0
9415// CHECK:   ret i8 [[TMP2]]
9416uint8_t test_vqsubb_u8(uint8_t a, uint8_t b) {
9417  return vqsubb_u8(a, b);
9418}
9419
9420// CHECK-LABEL: define i16 @test_vqsubh_u16(i16 %a, i16 %b) #0 {
9421// CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
9422// CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
9423// CHECK:   [[VQSUBH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
9424// CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQSUBH_U16_I]], i64 0
9425// CHECK:   ret i16 [[TMP2]]
9426uint16_t test_vqsubh_u16(uint16_t a, uint16_t b) {
9427  return vqsubh_u16(a, b);
9428}
9429
9430// CHECK-LABEL: define i32 @test_vqsubs_u32(i32 %a, i32 %b) #0 {
9431// CHECK:   [[VQSUBS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqsub.i32(i32 %a, i32 %b) #4
9432// CHECK:   ret i32 [[VQSUBS_U32_I]]
9433uint32_t test_vqsubs_u32(uint32_t a, uint32_t b) {
9434  return vqsubs_u32(a, b);
9435}
9436
9437// CHECK-LABEL: define i64 @test_vqsubd_u64(i64 %a, i64 %b) #0 {
9438// CHECK:   [[VQSUBD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqsub.i64(i64 %a, i64 %b) #4
9439// CHECK:   ret i64 [[VQSUBD_U64_I]]
9440uint64_t test_vqsubd_u64(uint64_t a, uint64_t b) {
9441  return vqsubd_u64(a, b);
9442}
9443
9444// CHECK-LABEL: define i64 @test_vshld_s64(i64 %a, i64 %b) #0 {
9445// CHECK:   [[VSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sshl.i64(i64 %a, i64 %b) #4
9446// CHECK:   ret i64 [[VSHLD_S64_I]]
9447int64_t test_vshld_s64(int64_t a, int64_t b) {
9448  return vshld_s64(a, b);
9449}
9450
9451// CHECK-LABEL: define i64 @test_vshld_u64(i64 %a, i64 %b) #0 {
9452// CHECK:   [[VSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.ushl.i64(i64 %a, i64 %b) #4
9453// CHECK:   ret i64 [[VSHLD_U64_I]]
9454uint64_t test_vshld_u64(uint64_t a, uint64_t b) {
9455  return vshld_u64(a, b);
9456}
9457
9458// CHECK-LABEL: define i8 @test_vqshlb_s8(i8 %a, i8 %b) #0 {
9459// CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
9460// CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
9461// CHECK:   [[VQSHLB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4
9462// CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQSHLB_S8_I]], i64 0
9463// CHECK:   ret i8 [[TMP2]]
9464int8_t test_vqshlb_s8(int8_t a, int8_t b) {
9465  return vqshlb_s8(a, b);
9466}
9467
9468// CHECK-LABEL: define i16 @test_vqshlh_s16(i16 %a, i16 %b) #0 {
9469// CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
9470// CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
9471// CHECK:   [[VQSHLH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
9472// CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQSHLH_S16_I]], i64 0
9473// CHECK:   ret i16 [[TMP2]]
9474int16_t test_vqshlh_s16(int16_t a, int16_t b) {
9475  return vqshlh_s16(a, b);
9476}
9477
9478// CHECK-LABEL: define i32 @test_vqshls_s32(i32 %a, i32 %b) #0 {
9479// CHECK:   [[VQSHLS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqshl.i32(i32 %a, i32 %b) #4
9480// CHECK:   ret i32 [[VQSHLS_S32_I]]
9481int32_t test_vqshls_s32(int32_t a, int32_t b) {
9482  return vqshls_s32(a, b);
9483}
9484
9485// CHECK-LABEL: define i64 @test_vqshld_s64(i64 %a, i64 %b) #0 {
9486// CHECK:   [[VQSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %a, i64 %b) #4
9487// CHECK:   ret i64 [[VQSHLD_S64_I]]
9488int64_t test_vqshld_s64(int64_t a, int64_t b) {
9489  return vqshld_s64(a, b);
9490}
9491
9492// CHECK-LABEL: define i8 @test_vqshlb_u8(i8 %a, i8 %b) #0 {
9493// CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
9494// CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
9495// CHECK:   [[VQSHLB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4
9496// CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQSHLB_U8_I]], i64 0
9497// CHECK:   ret i8 [[TMP2]]
9498uint8_t test_vqshlb_u8(uint8_t a, uint8_t b) {
9499  return vqshlb_u8(a, b);
9500}
9501
9502// CHECK-LABEL: define i16 @test_vqshlh_u16(i16 %a, i16 %b) #0 {
9503// CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
9504// CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
9505// CHECK:   [[VQSHLH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
9506// CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQSHLH_U16_I]], i64 0
9507// CHECK:   ret i16 [[TMP2]]
9508uint16_t test_vqshlh_u16(uint16_t a, uint16_t b) {
9509  return vqshlh_u16(a, b);
9510}
9511
9512// CHECK-LABEL: define i32 @test_vqshls_u32(i32 %a, i32 %b) #0 {
9513// CHECK:   [[VQSHLS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqshl.i32(i32 %a, i32 %b) #4
9514// CHECK:   ret i32 [[VQSHLS_U32_I]]
9515uint32_t test_vqshls_u32(uint32_t a, uint32_t b) {
9516  return vqshls_u32(a, b);
9517}
9518
9519// CHECK-LABEL: define i64 @test_vqshld_u64(i64 %a, i64 %b) #0 {
9520// CHECK:   [[VQSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %a, i64 %b) #4
9521// CHECK:   ret i64 [[VQSHLD_U64_I]]
9522uint64_t test_vqshld_u64(uint64_t a, uint64_t b) {
9523  return vqshld_u64(a, b);
9524}
9525
9526// CHECK-LABEL: define i64 @test_vrshld_s64(i64 %a, i64 %b) #0 {
9527// CHECK:   [[VRSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %a, i64 %b) #4
9528// CHECK:   ret i64 [[VRSHLD_S64_I]]
9529int64_t test_vrshld_s64(int64_t a, int64_t b) {
9530  return vrshld_s64(a, b);
9531}
9532
9533
9534// CHECK-LABEL: define i64 @test_vrshld_u64(i64 %a, i64 %b) #0 {
9535// CHECK:   [[VRSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %a, i64 %b) #4
9536// CHECK:   ret i64 [[VRSHLD_U64_I]]
9537uint64_t test_vrshld_u64(uint64_t a, uint64_t b) {
9538  return vrshld_u64(a, b);
9539}
9540
9541// CHECK-LABEL: define i8 @test_vqrshlb_s8(i8 %a, i8 %b) #0 {
9542// CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
9543// CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
9544// CHECK:   [[VQRSHLB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4
9545// CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQRSHLB_S8_I]], i64 0
9546// CHECK:   ret i8 [[TMP2]]
9547int8_t test_vqrshlb_s8(int8_t a, int8_t b) {
9548  return vqrshlb_s8(a, b);
9549}
9550
9551// CHECK-LABEL: define i16 @test_vqrshlh_s16(i16 %a, i16 %b) #0 {
9552// CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
9553// CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
9554// CHECK:   [[VQRSHLH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
9555// CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQRSHLH_S16_I]], i64 0
9556// CHECK:   ret i16 [[TMP2]]
9557int16_t test_vqrshlh_s16(int16_t a, int16_t b) {
9558  return vqrshlh_s16(a, b);
9559}
9560
9561// CHECK-LABEL: define i32 @test_vqrshls_s32(i32 %a, i32 %b) #0 {
9562// CHECK:   [[VQRSHLS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrshl.i32(i32 %a, i32 %b) #4
9563// CHECK:   ret i32 [[VQRSHLS_S32_I]]
9564int32_t test_vqrshls_s32(int32_t a, int32_t b) {
9565  return vqrshls_s32(a, b);
9566}
9567
9568// CHECK-LABEL: define i64 @test_vqrshld_s64(i64 %a, i64 %b) #0 {
9569// CHECK:   [[VQRSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %a, i64 %b) #4
9570// CHECK:   ret i64 [[VQRSHLD_S64_I]]
9571int64_t test_vqrshld_s64(int64_t a, int64_t b) {
9572  return vqrshld_s64(a, b);
9573}
9574
9575// CHECK-LABEL: define i8 @test_vqrshlb_u8(i8 %a, i8 %b) #0 {
9576// CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
9577// CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
9578// CHECK:   [[VQRSHLB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4
9579// CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQRSHLB_U8_I]], i64 0
9580// CHECK:   ret i8 [[TMP2]]
9581uint8_t test_vqrshlb_u8(uint8_t a, uint8_t b) {
9582  return vqrshlb_u8(a, b);
9583}
9584
9585// CHECK-LABEL: define i16 @test_vqrshlh_u16(i16 %a, i16 %b) #0 {
9586// CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
9587// CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
9588// CHECK:   [[VQRSHLH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
9589// CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQRSHLH_U16_I]], i64 0
9590// CHECK:   ret i16 [[TMP2]]
9591uint16_t test_vqrshlh_u16(uint16_t a, uint16_t b) {
9592  return vqrshlh_u16(a, b);
9593}
9594
9595// CHECK-LABEL: define i32 @test_vqrshls_u32(i32 %a, i32 %b) #0 {
9596// CHECK:   [[VQRSHLS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqrshl.i32(i32 %a, i32 %b) #4
9597// CHECK:   ret i32 [[VQRSHLS_U32_I]]
9598uint32_t test_vqrshls_u32(uint32_t a, uint32_t b) {
9599  return vqrshls_u32(a, b);
9600}
9601
9602// CHECK-LABEL: define i64 @test_vqrshld_u64(i64 %a, i64 %b) #0 {
9603// CHECK:   [[VQRSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %a, i64 %b) #4
9604// CHECK:   ret i64 [[VQRSHLD_U64_I]]
9605uint64_t test_vqrshld_u64(uint64_t a, uint64_t b) {
9606  return vqrshld_u64(a, b);
9607}
9608
9609// CHECK-LABEL: define i64 @test_vpaddd_s64(<2 x i64> %a) #0 {
9610// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9611// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
9612// CHECK:   [[VPADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> [[TMP1]]) #4
9613// CHECK:   ret i64 [[VPADDD_S64_I]]
9614int64_t test_vpaddd_s64(int64x2_t a) {
9615  return vpaddd_s64(a);
9616}
9617
9618// CHECK-LABEL: define float @test_vpadds_f32(<2 x float> %a) #0 {
9619// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
9620// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
9621// CHECK:   [[LANE0_I:%.*]] = extractelement <2 x float> [[TMP1]], i64 0
9622// CHECK:   [[LANE1_I:%.*]] = extractelement <2 x float> [[TMP1]], i64 1
9623// CHECK:   [[VPADDD_I:%.*]] = fadd float [[LANE0_I]], [[LANE1_I]]
9624// CHECK:   ret float [[VPADDD_I]]
9625float32_t test_vpadds_f32(float32x2_t a) {
9626  return vpadds_f32(a);
9627}
9628
9629// CHECK-LABEL: define double @test_vpaddd_f64(<2 x double> %a) #0 {
9630// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
9631// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
9632// CHECK:   [[LANE0_I:%.*]] = extractelement <2 x double> [[TMP1]], i64 0
9633// CHECK:   [[LANE1_I:%.*]] = extractelement <2 x double> [[TMP1]], i64 1
9634// CHECK:   [[VPADDD_I:%.*]] = fadd double [[LANE0_I]], [[LANE1_I]]
9635// CHECK:   ret double [[VPADDD_I]]
9636float64_t test_vpaddd_f64(float64x2_t a) {
9637  return vpaddd_f64(a);
9638}
9639
9640// CHECK-LABEL: define float @test_vpmaxnms_f32(<2 x float> %a) #0 {
9641// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
9642// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
9643// CHECK:   [[VPMAXNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> [[TMP1]]) #4
9644// CHECK:   ret float [[VPMAXNMS_F32_I]]
9645float32_t test_vpmaxnms_f32(float32x2_t a) {
9646  return vpmaxnms_f32(a);
9647}
9648
9649// CHECK-LABEL: define double @test_vpmaxnmqd_f64(<2 x double> %a) #0 {
9650// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
9651// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
9652// CHECK:   [[VPMAXNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> [[TMP1]]) #4
9653// CHECK:   ret double [[VPMAXNMQD_F64_I]]
9654float64_t test_vpmaxnmqd_f64(float64x2_t a) {
9655  return vpmaxnmqd_f64(a);
9656}
9657
9658// CHECK-LABEL: define float @test_vpmaxs_f32(<2 x float> %a) #0 {
9659// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
9660// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
9661// CHECK:   [[VPMAXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> [[TMP1]]) #4
9662// CHECK:   ret float [[VPMAXS_F32_I]]
9663float32_t test_vpmaxs_f32(float32x2_t a) {
9664  return vpmaxs_f32(a);
9665}
9666
9667// CHECK-LABEL: define double @test_vpmaxqd_f64(<2 x double> %a) #0 {
9668// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
9669// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
9670// CHECK:   [[VPMAXQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> [[TMP1]]) #4
9671// CHECK:   ret double [[VPMAXQD_F64_I]]
9672float64_t test_vpmaxqd_f64(float64x2_t a) {
9673  return vpmaxqd_f64(a);
9674}
9675
9676// CHECK-LABEL: define float @test_vpminnms_f32(<2 x float> %a) #0 {
9677// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
9678// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
9679// CHECK:   [[VPMINNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> [[TMP1]]) #4
9680// CHECK:   ret float [[VPMINNMS_F32_I]]
9681float32_t test_vpminnms_f32(float32x2_t a) {
9682  return vpminnms_f32(a);
9683}
9684
9685// CHECK-LABEL: define double @test_vpminnmqd_f64(<2 x double> %a) #0 {
9686// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
9687// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
9688// CHECK:   [[VPMINNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> [[TMP1]]) #4
9689// CHECK:   ret double [[VPMINNMQD_F64_I]]
9690float64_t test_vpminnmqd_f64(float64x2_t a) {
9691  return vpminnmqd_f64(a);
9692}
9693
9694// CHECK-LABEL: define float @test_vpmins_f32(<2 x float> %a) #0 {
9695// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
9696// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
9697// CHECK:   [[VPMINS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> [[TMP1]]) #4
9698// CHECK:   ret float [[VPMINS_F32_I]]
9699float32_t test_vpmins_f32(float32x2_t a) {
9700  return vpmins_f32(a);
9701}
9702
9703// CHECK-LABEL: define double @test_vpminqd_f64(<2 x double> %a) #0 {
9704// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
9705// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
9706// CHECK:   [[VPMINQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> [[TMP1]]) #4
9707// CHECK:   ret double [[VPMINQD_F64_I]]
9708float64_t test_vpminqd_f64(float64x2_t a) {
9709  return vpminqd_f64(a);
9710}
9711
9712// CHECK-LABEL: define i16 @test_vqdmulhh_s16(i16 %a, i16 %b) #0 {
9713// CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
9714// CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
9715// CHECK:   [[VQDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
9716// CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQDMULHH_S16_I]], i64 0
9717// CHECK:   ret i16 [[TMP2]]
9718int16_t test_vqdmulhh_s16(int16_t a, int16_t b) {
9719  return vqdmulhh_s16(a, b);
9720}
9721
9722// CHECK-LABEL: define i32 @test_vqdmulhs_s32(i32 %a, i32 %b) #0 {
9723// CHECK:   [[VQDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %a, i32 %b) #4
9724// CHECK:   ret i32 [[VQDMULHS_S32_I]]
9725int32_t test_vqdmulhs_s32(int32_t a, int32_t b) {
9726  return vqdmulhs_s32(a, b);
9727}
9728
9729// CHECK-LABEL: define i16 @test_vqrdmulhh_s16(i16 %a, i16 %b) #0 {
9730// CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
9731// CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
9732// CHECK:   [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
9733// CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQRDMULHH_S16_I]], i64 0
9734// CHECK:   ret i16 [[TMP2]]
9735int16_t test_vqrdmulhh_s16(int16_t a, int16_t b) {
9736  return vqrdmulhh_s16(a, b);
9737}
9738
9739// CHECK-LABEL: define i32 @test_vqrdmulhs_s32(i32 %a, i32 %b) #0 {
9740// CHECK:   [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %a, i32 %b) #4
9741// CHECK:   ret i32 [[VQRDMULHS_S32_I]]
9742int32_t test_vqrdmulhs_s32(int32_t a, int32_t b) {
9743  return vqrdmulhs_s32(a, b);
9744}
9745
9746// CHECK-LABEL: define float @test_vmulxs_f32(float %a, float %b) #0 {
9747// CHECK:   [[VMULXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmulx.f32(float %a, float %b) #4
9748// CHECK:   ret float [[VMULXS_F32_I]]
9749float32_t test_vmulxs_f32(float32_t a, float32_t b) {
9750  return vmulxs_f32(a, b);
9751}
9752
9753// CHECK-LABEL: define double @test_vmulxd_f64(double %a, double %b) #0 {
9754// CHECK:   [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double %a, double %b) #4
9755// CHECK:   ret double [[VMULXD_F64_I]]
9756float64_t test_vmulxd_f64(float64_t a, float64_t b) {
9757  return vmulxd_f64(a, b);
9758}
9759
9760// CHECK-LABEL: define <1 x double> @test_vmulx_f64(<1 x double> %a, <1 x double> %b) #0 {
9761// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
9762// CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
9763// CHECK:   [[VMULX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
9764// CHECK:   [[VMULX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
9765// CHECK:   [[VMULX2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmulx.v1f64(<1 x double> [[VMULX_I]], <1 x double> [[VMULX1_I]]) #4
9766// CHECK:   ret <1 x double> [[VMULX2_I]]
9767float64x1_t test_vmulx_f64(float64x1_t a, float64x1_t b) {
9768  return vmulx_f64(a, b);
9769}
9770
9771// CHECK-LABEL: define float @test_vrecpss_f32(float %a, float %b) #0 {
9772// CHECK:   [[VRECPS_I:%.*]] = call float @llvm.aarch64.neon.frecps.f32(float %a, float %b) #4
9773// CHECK:   ret float [[VRECPS_I]]
9774float32_t test_vrecpss_f32(float32_t a, float32_t b) {
9775  return vrecpss_f32(a, b);
9776}
9777
9778// CHECK-LABEL: define double @test_vrecpsd_f64(double %a, double %b) #0 {
9779// CHECK:   [[VRECPS_I:%.*]] = call double @llvm.aarch64.neon.frecps.f64(double %a, double %b) #4
9780// CHECK:   ret double [[VRECPS_I]]
9781float64_t test_vrecpsd_f64(float64_t a, float64_t b) {
9782  return vrecpsd_f64(a, b);
9783}
9784
9785// CHECK-LABEL: define float @test_vrsqrtss_f32(float %a, float %b) #0 {
9786// CHECK:   [[VRSQRTSS_F32_I:%.*]] = call float @llvm.aarch64.neon.frsqrts.f32(float %a, float %b) #4
9787// CHECK:   ret float [[VRSQRTSS_F32_I]]
9788float32_t test_vrsqrtss_f32(float32_t a, float32_t b) {
9789  return vrsqrtss_f32(a, b);
9790}
9791
9792// CHECK-LABEL: define double @test_vrsqrtsd_f64(double %a, double %b) #0 {
9793// CHECK:   [[VRSQRTSD_F64_I:%.*]] = call double @llvm.aarch64.neon.frsqrts.f64(double %a, double %b) #4
9794// CHECK:   ret double [[VRSQRTSD_F64_I]]
9795float64_t test_vrsqrtsd_f64(float64_t a, float64_t b) {
9796  return vrsqrtsd_f64(a, b);
9797}
9798
9799// CHECK-LABEL: define float @test_vcvts_f32_s32(i32 %a) #0 {
9800// CHECK:   [[TMP0:%.*]] = sitofp i32 %a to float
9801// CHECK:   ret float [[TMP0]]
9802float32_t test_vcvts_f32_s32(int32_t a) {
9803  return vcvts_f32_s32(a);
9804}
9805
9806// CHECK-LABEL: define double @test_vcvtd_f64_s64(i64 %a) #0 {
9807// CHECK:   [[TMP0:%.*]] = sitofp i64 %a to double
9808// CHECK:   ret double [[TMP0]]
9809float64_t test_vcvtd_f64_s64(int64_t a) {
9810  return vcvtd_f64_s64(a);
9811}
9812
9813// CHECK-LABEL: define float @test_vcvts_f32_u32(i32 %a) #0 {
9814// CHECK:   [[TMP0:%.*]] = uitofp i32 %a to float
9815// CHECK:   ret float [[TMP0]]
9816float32_t test_vcvts_f32_u32(uint32_t a) {
9817  return vcvts_f32_u32(a);
9818}
9819
9820// CHECK-LABEL: define double @test_vcvtd_f64_u64(i64 %a) #0 {
9821// CHECK:   [[TMP0:%.*]] = uitofp i64 %a to double
9822// CHECK:   ret double [[TMP0]]
9823float64_t test_vcvtd_f64_u64(uint64_t a) {
9824  return vcvtd_f64_u64(a);
9825}
9826
9827// CHECK-LABEL: define float @test_vrecpes_f32(float %a) #0 {
9828// CHECK:   [[VRECPES_F32_I:%.*]] = call float @llvm.aarch64.neon.frecpe.f32(float %a) #4
9829// CHECK:   ret float [[VRECPES_F32_I]]
9830float32_t test_vrecpes_f32(float32_t a) {
9831  return vrecpes_f32(a);
9832}
9833
9834// CHECK-LABEL: define double @test_vrecped_f64(double %a) #0 {
9835// CHECK:   [[VRECPED_F64_I:%.*]] = call double @llvm.aarch64.neon.frecpe.f64(double %a) #4
9836// CHECK:   ret double [[VRECPED_F64_I]]
9837float64_t test_vrecped_f64(float64_t a) {
9838  return vrecped_f64(a);
9839}
9840
9841// CHECK-LABEL: define float @test_vrecpxs_f32(float %a) #0 {
9842// CHECK:   [[VRECPXS_F32_I:%.*]] = call float @llvm.aarch64.neon.frecpx.f32(float %a) #4
9843// CHECK:   ret float [[VRECPXS_F32_I]]
9844float32_t test_vrecpxs_f32(float32_t a) {
9845  return vrecpxs_f32(a);
9846 }
9847
9848// CHECK-LABEL: define double @test_vrecpxd_f64(double %a) #0 {
9849// CHECK:   [[VRECPXD_F64_I:%.*]] = call double @llvm.aarch64.neon.frecpx.f64(double %a) #4
9850// CHECK:   ret double [[VRECPXD_F64_I]]
9851float64_t test_vrecpxd_f64(float64_t a) {
9852  return vrecpxd_f64(a);
9853}
9854
9855// CHECK-LABEL: define <2 x i32> @test_vrsqrte_u32(<2 x i32> %a) #0 {
9856// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9857// CHECK:   [[VRSQRTE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
9858// CHECK:   [[VRSQRTE_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.ursqrte.v2i32(<2 x i32> [[VRSQRTE_V_I]]) #4
9859// CHECK:   ret <2 x i32> [[VRSQRTE_V1_I]]
9860uint32x2_t test_vrsqrte_u32(uint32x2_t a) {
9861  return vrsqrte_u32(a);
9862}
9863
9864// CHECK-LABEL: define <4 x i32> @test_vrsqrteq_u32(<4 x i32> %a) #0 {
9865// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9866// CHECK:   [[VRSQRTEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
9867// CHECK:   [[VRSQRTEQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ursqrte.v4i32(<4 x i32> [[VRSQRTEQ_V_I]]) #4
9868// CHECK:   ret <4 x i32> [[VRSQRTEQ_V1_I]]
9869uint32x4_t test_vrsqrteq_u32(uint32x4_t a) {
9870  return vrsqrteq_u32(a);
9871}
9872
9873// CHECK-LABEL: define float @test_vrsqrtes_f32(float %a) #0 {
9874// CHECK:   [[VRSQRTES_F32_I:%.*]] = call float @llvm.aarch64.neon.frsqrte.f32(float %a) #4
9875// CHECK:   ret float [[VRSQRTES_F32_I]]
9876float32_t test_vrsqrtes_f32(float32_t a) {
9877  return vrsqrtes_f32(a);
9878}
9879
9880// CHECK-LABEL: define double @test_vrsqrted_f64(double %a) #0 {
9881// CHECK:   [[VRSQRTED_F64_I:%.*]] = call double @llvm.aarch64.neon.frsqrte.f64(double %a) #4
9882// CHECK:   ret double [[VRSQRTED_F64_I]]
9883float64_t test_vrsqrted_f64(float64_t a) {
9884  return vrsqrted_f64(a);
9885}
9886
9887// CHECK-LABEL: define <16 x i8> @test_vld1q_u8(i8* %a) #0 {
9888// CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
9889// CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]]
9890// CHECK:   ret <16 x i8> [[TMP1]]
9891uint8x16_t test_vld1q_u8(uint8_t const *a) {
9892  return vld1q_u8(a);
9893}
9894
9895// CHECK-LABEL: define <8 x i16> @test_vld1q_u16(i16* %a) #0 {
9896// CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
9897// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
9898// CHECK:   [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]]
9899// CHECK:   ret <8 x i16> [[TMP2]]
9900uint16x8_t test_vld1q_u16(uint16_t const *a) {
9901  return vld1q_u16(a);
9902}
9903
9904// CHECK-LABEL: define <4 x i32> @test_vld1q_u32(i32* %a) #0 {
9905// CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
9906// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
9907// CHECK:   [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]]
9908// CHECK:   ret <4 x i32> [[TMP2]]
9909uint32x4_t test_vld1q_u32(uint32_t const *a) {
9910  return vld1q_u32(a);
9911}
9912
9913// CHECK-LABEL: define <2 x i64> @test_vld1q_u64(i64* %a) #0 {
9914// CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
9915// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
9916// CHECK:   [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]]
9917// CHECK:   ret <2 x i64> [[TMP2]]
9918uint64x2_t test_vld1q_u64(uint64_t const *a) {
9919  return vld1q_u64(a);
9920}
9921
9922// CHECK-LABEL: define <16 x i8> @test_vld1q_s8(i8* %a) #0 {
9923// CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
9924// CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]]
9925// CHECK:   ret <16 x i8> [[TMP1]]
9926int8x16_t test_vld1q_s8(int8_t const *a) {
9927  return vld1q_s8(a);
9928}
9929
9930// CHECK-LABEL: define <8 x i16> @test_vld1q_s16(i16* %a) #0 {
9931// CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
9932// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
9933// CHECK:   [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]]
9934// CHECK:   ret <8 x i16> [[TMP2]]
9935int16x8_t test_vld1q_s16(int16_t const *a) {
9936  return vld1q_s16(a);
9937}
9938
9939// CHECK-LABEL: define <4 x i32> @test_vld1q_s32(i32* %a) #0 {
9940// CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
9941// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
9942// CHECK:   [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]]
9943// CHECK:   ret <4 x i32> [[TMP2]]
9944int32x4_t test_vld1q_s32(int32_t const *a) {
9945  return vld1q_s32(a);
9946}
9947
9948// CHECK-LABEL: define <2 x i64> @test_vld1q_s64(i64* %a) #0 {
9949// CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
9950// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
9951// CHECK:   [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]]
9952// CHECK:   ret <2 x i64> [[TMP2]]
9953int64x2_t test_vld1q_s64(int64_t const *a) {
9954  return vld1q_s64(a);
9955}
9956
9957// CHECK-LABEL: define <8 x half> @test_vld1q_f16(half* %a) #0 {
9958// CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
9959// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
9960// CHECK:   [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]]
9961// CHECK:   [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <8 x half>
9962// CHECK:   ret <8 x half> [[TMP3]]
9963float16x8_t test_vld1q_f16(float16_t const *a) {
9964  return vld1q_f16(a);
9965}
9966
9967// CHECK-LABEL: define <4 x float> @test_vld1q_f32(float* %a) #0 {
9968// CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
9969// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
9970// CHECK:   [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]]
9971// CHECK:   ret <4 x float> [[TMP2]]
9972float32x4_t test_vld1q_f32(float32_t const *a) {
9973  return vld1q_f32(a);
9974}
9975
9976// CHECK-LABEL: define <2 x double> @test_vld1q_f64(double* %a) #0 {
9977// CHECK:   [[TMP0:%.*]] = bitcast double* %a to i8*
9978// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x double>*
9979// CHECK:   [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]]
9980// CHECK:   ret <2 x double> [[TMP2]]
9981float64x2_t test_vld1q_f64(float64_t const *a) {
9982  return vld1q_f64(a);
9983}
9984
9985// CHECK-LABEL: define <16 x i8> @test_vld1q_p8(i8* %a) #0 {
9986// CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
9987// CHECK:   [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]]
9988// CHECK:   ret <16 x i8> [[TMP1]]
9989poly8x16_t test_vld1q_p8(poly8_t const *a) {
9990  return vld1q_p8(a);
9991}
9992
9993// CHECK-LABEL: define <8 x i16> @test_vld1q_p16(i16* %a) #0 {
9994// CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
9995// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
9996// CHECK:   [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]]
9997// CHECK:   ret <8 x i16> [[TMP2]]
9998poly16x8_t test_vld1q_p16(poly16_t const *a) {
9999  return vld1q_p16(a);
10000}
10001
10002// CHECK-LABEL: define <8 x i8> @test_vld1_u8(i8* %a) #0 {
10003// CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
10004// CHECK:   [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]]
10005// CHECK:   ret <8 x i8> [[TMP1]]
10006uint8x8_t test_vld1_u8(uint8_t const *a) {
10007  return vld1_u8(a);
10008}
10009
10010// CHECK-LABEL: define <4 x i16> @test_vld1_u16(i16* %a) #0 {
10011// CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
10012// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
10013// CHECK:   [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]]
10014// CHECK:   ret <4 x i16> [[TMP2]]
10015uint16x4_t test_vld1_u16(uint16_t const *a) {
10016  return vld1_u16(a);
10017}
10018
10019// CHECK-LABEL: define <2 x i32> @test_vld1_u32(i32* %a) #0 {
10020// CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
10021// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
10022// CHECK:   [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]]
10023// CHECK:   ret <2 x i32> [[TMP2]]
10024uint32x2_t test_vld1_u32(uint32_t const *a) {
10025  return vld1_u32(a);
10026}
10027
10028// CHECK-LABEL: define <1 x i64> @test_vld1_u64(i64* %a) #0 {
10029// CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
10030// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>*
10031// CHECK:   [[TMP2:%.*]] = load <1 x i64>, <1 x i64>* [[TMP1]]
10032// CHECK:   ret <1 x i64> [[TMP2]]
10033uint64x1_t test_vld1_u64(uint64_t const *a) {
10034  return vld1_u64(a);
10035}
10036
10037// CHECK-LABEL: define <8 x i8> @test_vld1_s8(i8* %a) #0 {
10038// CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
10039// CHECK:   [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]]
10040// CHECK:   ret <8 x i8> [[TMP1]]
10041int8x8_t test_vld1_s8(int8_t const *a) {
10042  return vld1_s8(a);
10043}
10044
10045// CHECK-LABEL: define <4 x i16> @test_vld1_s16(i16* %a) #0 {
10046// CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
10047// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
10048// CHECK:   [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]]
10049// CHECK:   ret <4 x i16> [[TMP2]]
10050int16x4_t test_vld1_s16(int16_t const *a) {
10051  return vld1_s16(a);
10052}
10053
10054// CHECK-LABEL: define <2 x i32> @test_vld1_s32(i32* %a) #0 {
10055// CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
10056// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
10057// CHECK:   [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]]
10058// CHECK:   ret <2 x i32> [[TMP2]]
10059int32x2_t test_vld1_s32(int32_t const *a) {
10060  return vld1_s32(a);
10061}
10062
10063// CHECK-LABEL: define <1 x i64> @test_vld1_s64(i64* %a) #0 {
10064// CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
10065// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>*
10066// CHECK:   [[TMP2:%.*]] = load <1 x i64>, <1 x i64>* [[TMP1]]
10067// CHECK:   ret <1 x i64> [[TMP2]]
10068int64x1_t test_vld1_s64(int64_t const *a) {
10069  return vld1_s64(a);
10070}
10071
10072// CHECK-LABEL: define <4 x half> @test_vld1_f16(half* %a) #0 {
10073// CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
10074// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
10075// CHECK:   [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]]
10076// CHECK:   [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <4 x half>
10077// CHECK:   ret <4 x half> [[TMP3]]
10078float16x4_t test_vld1_f16(float16_t const *a) {
10079  return vld1_f16(a);
10080}
10081
10082// CHECK-LABEL: define <2 x float> @test_vld1_f32(float* %a) #0 {
10083// CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
10084// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
10085// CHECK:   [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]]
10086// CHECK:   ret <2 x float> [[TMP2]]
10087float32x2_t test_vld1_f32(float32_t const *a) {
10088  return vld1_f32(a);
10089}
10090
10091// CHECK-LABEL: define <1 x double> @test_vld1_f64(double* %a) #0 {
10092// CHECK:   [[TMP0:%.*]] = bitcast double* %a to i8*
10093// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x double>*
10094// CHECK:   [[TMP2:%.*]] = load <1 x double>, <1 x double>* [[TMP1]]
10095// CHECK:   ret <1 x double> [[TMP2]]
10096float64x1_t test_vld1_f64(float64_t const *a) {
10097  return vld1_f64(a);
10098}
10099
10100// CHECK-LABEL: define <8 x i8> @test_vld1_p8(i8* %a) #0 {
10101// CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
10102// CHECK:   [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]]
10103// CHECK:   ret <8 x i8> [[TMP1]]
10104poly8x8_t test_vld1_p8(poly8_t const *a) {
10105  return vld1_p8(a);
10106}
10107
10108// CHECK-LABEL: define <4 x i16> @test_vld1_p16(i16* %a) #0 {
10109// CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
10110// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
10111// CHECK:   [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]]
10112// CHECK:   ret <4 x i16> [[TMP2]]
10113poly16x4_t test_vld1_p16(poly16_t const *a) {
10114  return vld1_p16(a);
10115}
10116
10117// CHECK-LABEL: define %struct.uint8x16x2_t @test_vld2q_u8(i8* %a) #0 {
10118// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
10119// CHECK:   [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 16
10120// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8*
10121// CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
10122// CHECK:   [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
10123// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
10124// CHECK:   store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP2]]
10125// CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL]] to i8*
10126// CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8*
10127// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false)
10128// CHECK:   [[TMP5:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16
10129// CHECK:   ret %struct.uint8x16x2_t [[TMP5]]
10130uint8x16x2_t test_vld2q_u8(uint8_t const *a) {
10131  return vld2q_u8(a);
10132}
10133
10134// CHECK-LABEL: define %struct.uint16x8x2_t @test_vld2q_u16(i16* %a) #0 {
10135// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
10136// CHECK:   [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16
10137// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
10138// CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
10139// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
10140// CHECK:   [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
10141// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
10142// CHECK:   store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
10143// CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL]] to i8*
10144// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
10145// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
10146// CHECK:   [[TMP6:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16
10147// CHECK:   ret %struct.uint16x8x2_t [[TMP6]]
10148uint16x8x2_t test_vld2q_u16(uint16_t const *a) {
10149  return vld2q_u16(a);
10150}
10151
10152// CHECK-LABEL: define %struct.uint32x4x2_t @test_vld2q_u32(i32* %a) #0 {
10153// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
10154// CHECK:   [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16
10155// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
10156// CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
10157// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
10158// CHECK:   [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
10159// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }*
10160// CHECK:   store { <4 x i32>, <4 x i32> } [[VLD2]], { <4 x i32>, <4 x i32> }* [[TMP3]]
10161// CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL]] to i8*
10162// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
10163// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
10164// CHECK:   [[TMP6:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16
10165// CHECK:   ret %struct.uint32x4x2_t [[TMP6]]
10166uint32x4x2_t test_vld2q_u32(uint32_t const *a) {
10167  return vld2q_u32(a);
10168}
10169
10170// CHECK-LABEL: define %struct.uint64x2x2_t @test_vld2q_u64(i64* %a) #0 {
10171// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x2x2_t, align 16
10172// CHECK:   [[__RET:%.*]] = alloca %struct.uint64x2x2_t, align 16
10173// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8*
10174// CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
10175// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
10176// CHECK:   [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
10177// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }*
10178// CHECK:   store { <2 x i64>, <2 x i64> } [[VLD2]], { <2 x i64>, <2 x i64> }* [[TMP3]]
10179// CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x2x2_t* [[RETVAL]] to i8*
10180// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8*
10181// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
10182// CHECK:   [[TMP6:%.*]] = load %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[RETVAL]], align 16
10183// CHECK:   ret %struct.uint64x2x2_t [[TMP6]]
10184uint64x2x2_t test_vld2q_u64(uint64_t const *a) {
10185  return vld2q_u64(a);
10186}
10187
10188// CHECK-LABEL: define %struct.int8x16x2_t @test_vld2q_s8(i8* %a) #0 {
10189// CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
10190// CHECK:   [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 16
10191// CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8*
10192// CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
10193// CHECK:   [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
10194// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
10195// CHECK:   store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP2]]
10196// CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL]] to i8*
10197// CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8*
10198// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false)
10199// CHECK:   [[TMP5:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16
10200// CHECK:   ret %struct.int8x16x2_t [[TMP5]]
10201int8x16x2_t test_vld2q_s8(int8_t const *a) {
10202  return vld2q_s8(a);
10203}
10204
10205// CHECK-LABEL: define %struct.int16x8x2_t @test_vld2q_s16(i16* %a) #0 {
10206// CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
10207// CHECK:   [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16
10208// CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
10209// CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
10210// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
10211// CHECK:   [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
10212// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
10213// CHECK:   store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
10214// CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL]] to i8*
10215// CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
10216// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
10217// CHECK:   [[TMP6:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16
10218// CHECK:   ret %struct.int16x8x2_t [[TMP6]]
10219int16x8x2_t test_vld2q_s16(int16_t const *a) {
10220  return vld2q_s16(a);
10221}
10222
10223// CHECK-LABEL: define %struct.int32x4x2_t @test_vld2q_s32(i32* %a) #0 {
10224// CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
10225// CHECK:   [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16
10226// CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
10227// CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
10228// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
10229// CHECK:   [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
10230// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }*
10231// CHECK:   store { <4 x i32>, <4 x i32> } [[VLD2]], { <4 x i32>, <4 x i32> }* [[TMP3]]
10232// CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL]] to i8*
10233// CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
10234// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
10235// CHECK:   [[TMP6:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16
10236// CHECK:   ret %struct.int32x4x2_t [[TMP6]]
10237int32x4x2_t test_vld2q_s32(int32_t const *a) {
10238  return vld2q_s32(a);
10239}
10240
10241// CHECK-LABEL: define %struct.int64x2x2_t @test_vld2q_s64(i64* %a) #0 {
10242// CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x2x2_t, align 16
10243// CHECK:   [[__RET:%.*]] = alloca %struct.int64x2x2_t, align 16
10244// CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8*
10245// CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
10246// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
10247// CHECK:   [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
10248// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }*
10249// CHECK:   store { <2 x i64>, <2 x i64> } [[VLD2]], { <2 x i64>, <2 x i64> }* [[TMP3]]
10250// CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x2x2_t* [[RETVAL]] to i8*
10251// CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8*
10252// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
10253// CHECK:   [[TMP6:%.*]] = load %struct.int64x2x2_t, %struct.int64x2x2_t* [[RETVAL]], align 16
10254// CHECK:   ret %struct.int64x2x2_t [[TMP6]]
10255int64x2x2_t test_vld2q_s64(int64_t const *a) {
10256  return vld2q_s64(a);
10257}
10258
10259// CHECK-LABEL: define %struct.float16x8x2_t @test_vld2q_f16(half* %a) #0 {
10260// CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16
10261// CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
10262// CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
10263// CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
10264// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
10265// CHECK:   [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
10266// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
10267// CHECK:   store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
10268// CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8*
10269// CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
10270// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
10271// CHECK:   [[TMP6:%.*]] = load %struct.float16x8x2_t, %struct.float16x8x2_t* [[RETVAL]], align 16
10272// CHECK:   ret %struct.float16x8x2_t [[TMP6]]
10273float16x8x2_t test_vld2q_f16(float16_t const *a) {
10274  return vld2q_f16(a);
10275}
10276
10277// CHECK-LABEL: define %struct.float32x4x2_t @test_vld2q_f32(float* %a) #0 {
10278// CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
10279// CHECK:   [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16
10280// CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
10281// CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
10282// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x float>*
10283// CHECK:   [[VLD2:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0v4f32(<4 x float>* [[TMP2]])
10284// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float> }*
10285// CHECK:   store { <4 x float>, <4 x float> } [[VLD2]], { <4 x float>, <4 x float> }* [[TMP3]]
10286// CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL]] to i8*
10287// CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
10288// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
10289// CHECK:   [[TMP6:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16
10290// CHECK:   ret %struct.float32x4x2_t [[TMP6]]
10291float32x4x2_t test_vld2q_f32(float32_t const *a) {
10292  return vld2q_f32(a);
10293}
10294
10295// CHECK-LABEL: define %struct.float64x2x2_t @test_vld2q_f64(double* %a) #0 {
10296// CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16
10297// CHECK:   [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16
10298// CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8*
10299// CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
10300// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x double>*
10301// CHECK:   [[VLD2:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0v2f64(<2 x double>* [[TMP2]])
10302// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double> }*
10303// CHECK:   store { <2 x double>, <2 x double> } [[VLD2]], { <2 x double>, <2 x double> }* [[TMP3]]
10304// CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x2x2_t* [[RETVAL]] to i8*
10305// CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8*
10306// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
10307// CHECK:   [[TMP6:%.*]] = load %struct.float64x2x2_t, %struct.float64x2x2_t* [[RETVAL]], align 16
10308// CHECK:   ret %struct.float64x2x2_t [[TMP6]]
10309float64x2x2_t test_vld2q_f64(float64_t const *a) {
10310  return vld2q_f64(a);
10311}
10312
10313// CHECK-LABEL: define %struct.poly8x16x2_t @test_vld2q_p8(i8* %a) #0 {
10314// CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
10315// CHECK:   [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 16
10316// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8*
10317// CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
10318// CHECK:   [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
10319// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
10320// CHECK:   store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP2]]
10321// CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL]] to i8*
10322// CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8*
10323// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false)
10324// CHECK:   [[TMP5:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16
10325// CHECK:   ret %struct.poly8x16x2_t [[TMP5]]
10326poly8x16x2_t test_vld2q_p8(poly8_t const *a) {
10327  return vld2q_p8(a);
10328}
10329
10330// CHECK-LABEL: define %struct.poly16x8x2_t @test_vld2q_p16(i16* %a) #0 {
10331// CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
10332// CHECK:   [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16
10333// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
10334// CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
10335// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
10336// CHECK:   [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
10337// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
10338// CHECK:   store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]]
10339// CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL]] to i8*
10340// CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
10341// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
10342// CHECK:   [[TMP6:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16
10343// CHECK:   ret %struct.poly16x8x2_t [[TMP6]]
10344poly16x8x2_t test_vld2q_p16(poly16_t const *a) {
10345  return vld2q_p16(a);
10346}
10347
10348// CHECK-LABEL: define %struct.uint8x8x2_t @test_vld2_u8(i8* %a) #0 {
10349// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
10350// CHECK:   [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8
10351// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
10352// CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
10353// CHECK:   [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
10354// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
10355// CHECK:   store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP2]]
10356// CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL]] to i8*
10357// CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
10358// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false)
10359// CHECK:   [[TMP5:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8
10360// CHECK:   ret %struct.uint8x8x2_t [[TMP5]]
10361uint8x8x2_t test_vld2_u8(uint8_t const *a) {
10362  return vld2_u8(a);
10363}
10364
10365// CHECK-LABEL: define %struct.uint16x4x2_t @test_vld2_u16(i16* %a) #0 {
10366// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
10367// CHECK:   [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8
10368// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
10369// CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
10370// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
10371// CHECK:   [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
10372// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
10373// CHECK:   store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
10374// CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL]] to i8*
10375// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
10376// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
10377// CHECK:   [[TMP6:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8
10378// CHECK:   ret %struct.uint16x4x2_t [[TMP6]]
10379uint16x4x2_t test_vld2_u16(uint16_t const *a) {
10380  return vld2_u16(a);
10381}
10382
10383// CHECK-LABEL: define %struct.uint32x2x2_t @test_vld2_u32(i32* %a) #0 {
10384// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
10385// CHECK:   [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8
10386// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
10387// CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
10388// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
10389// CHECK:   [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
10390// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }*
10391// CHECK:   store { <2 x i32>, <2 x i32> } [[VLD2]], { <2 x i32>, <2 x i32> }* [[TMP3]]
10392// CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL]] to i8*
10393// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
10394// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
10395// CHECK:   [[TMP6:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8
10396// CHECK:   ret %struct.uint32x2x2_t [[TMP6]]
10397uint32x2x2_t test_vld2_u32(uint32_t const *a) {
10398  return vld2_u32(a);
10399}
10400
10401// CHECK-LABEL: define %struct.uint64x1x2_t @test_vld2_u64(i64* %a) #0 {
10402// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x1x2_t, align 8
10403// CHECK:   [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8
10404// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
10405// CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
10406// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
10407// CHECK:   [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
10408// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
10409// CHECK:   store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]]
10410// CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x1x2_t* [[RETVAL]] to i8*
10411// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
10412// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
10413// CHECK:   [[TMP6:%.*]] = load %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[RETVAL]], align 8
10414// CHECK:   ret %struct.uint64x1x2_t [[TMP6]]
10415uint64x1x2_t test_vld2_u64(uint64_t const *a) {
10416  return vld2_u64(a);
10417}
10418
10419// CHECK-LABEL: define %struct.int8x8x2_t @test_vld2_s8(i8* %a) #0 {
10420// CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
10421// CHECK:   [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8
10422// CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
10423// CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
10424// CHECK:   [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
10425// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
10426// CHECK:   store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP2]]
10427// CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL]] to i8*
10428// CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
10429// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false)
10430// CHECK:   [[TMP5:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8
10431// CHECK:   ret %struct.int8x8x2_t [[TMP5]]
10432int8x8x2_t test_vld2_s8(int8_t const *a) {
10433  return vld2_s8(a);
10434}
10435
10436// CHECK-LABEL: define %struct.int16x4x2_t @test_vld2_s16(i16* %a) #0 {
10437// CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
10438// CHECK:   [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8
10439// CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
10440// CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
10441// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
10442// CHECK:   [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
10443// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
10444// CHECK:   store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
10445// CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL]] to i8*
10446// CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
10447// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
10448// CHECK:   [[TMP6:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8
10449// CHECK:   ret %struct.int16x4x2_t [[TMP6]]
10450int16x4x2_t test_vld2_s16(int16_t const *a) {
10451  return vld2_s16(a);
10452}
10453
10454// CHECK-LABEL: define %struct.int32x2x2_t @test_vld2_s32(i32* %a) #0 {
10455// CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
10456// CHECK:   [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8
10457// CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
10458// CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
10459// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
10460// CHECK:   [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
10461// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }*
10462// CHECK:   store { <2 x i32>, <2 x i32> } [[VLD2]], { <2 x i32>, <2 x i32> }* [[TMP3]]
10463// CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL]] to i8*
10464// CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
10465// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
10466// CHECK:   [[TMP6:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8
10467// CHECK:   ret %struct.int32x2x2_t [[TMP6]]
10468int32x2x2_t test_vld2_s32(int32_t const *a) {
10469  return vld2_s32(a);
10470}
10471
10472// CHECK-LABEL: define %struct.int64x1x2_t @test_vld2_s64(i64* %a) #0 {
10473// CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x1x2_t, align 8
10474// CHECK:   [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8
10475// CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
10476// CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
10477// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
10478// CHECK:   [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
10479// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
10480// CHECK:   store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]]
10481// CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x1x2_t* [[RETVAL]] to i8*
10482// CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
10483// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
10484// CHECK:   [[TMP6:%.*]] = load %struct.int64x1x2_t, %struct.int64x1x2_t* [[RETVAL]], align 8
10485// CHECK:   ret %struct.int64x1x2_t [[TMP6]]
10486int64x1x2_t test_vld2_s64(int64_t const *a) {
10487  return vld2_s64(a);
10488}
10489
10490// CHECK-LABEL: define %struct.float16x4x2_t @test_vld2_f16(half* %a) #0 {
10491// CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8
10492// CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
10493// CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
10494// CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
10495// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
10496// CHECK:   [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
10497// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
10498// CHECK:   store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
10499// CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8*
10500// CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
10501// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
10502// CHECK:   [[TMP6:%.*]] = load %struct.float16x4x2_t, %struct.float16x4x2_t* [[RETVAL]], align 8
10503// CHECK:   ret %struct.float16x4x2_t [[TMP6]]
10504float16x4x2_t test_vld2_f16(float16_t const *a) {
10505  return vld2_f16(a);
10506}
10507
10508// CHECK-LABEL: define %struct.float32x2x2_t @test_vld2_f32(float* %a) #0 {
10509// CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
10510// CHECK:   [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8
10511// CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
10512// CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
10513// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x float>*
10514// CHECK:   [[VLD2:%.*]] = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0v2f32(<2 x float>* [[TMP2]])
10515// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float> }*
10516// CHECK:   store { <2 x float>, <2 x float> } [[VLD2]], { <2 x float>, <2 x float> }* [[TMP3]]
10517// CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL]] to i8*
10518// CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
10519// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
10520// CHECK:   [[TMP6:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8
10521// CHECK:   ret %struct.float32x2x2_t [[TMP6]]
10522float32x2x2_t test_vld2_f32(float32_t const *a) {
10523  return vld2_f32(a);
10524}
10525
10526// CHECK-LABEL: define %struct.float64x1x2_t @test_vld2_f64(double* %a) #0 {
10527// CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8
10528// CHECK:   [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8
10529// CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8*
10530// CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
10531// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x double>*
10532// CHECK:   [[VLD2:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0v1f64(<1 x double>* [[TMP2]])
10533// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double> }*
10534// CHECK:   store { <1 x double>, <1 x double> } [[VLD2]], { <1 x double>, <1 x double> }* [[TMP3]]
10535// CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x1x2_t* [[RETVAL]] to i8*
10536// CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8*
10537// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
10538// CHECK:   [[TMP6:%.*]] = load %struct.float64x1x2_t, %struct.float64x1x2_t* [[RETVAL]], align 8
10539// CHECK:   ret %struct.float64x1x2_t [[TMP6]]
10540float64x1x2_t test_vld2_f64(float64_t const *a) {
10541  return vld2_f64(a);
10542}
10543
10544// CHECK-LABEL: define %struct.poly8x8x2_t @test_vld2_p8(i8* %a) #0 {
10545// CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
10546// CHECK:   [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8
10547// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
10548// CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
10549// CHECK:   [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
10550// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
10551// CHECK:   store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP2]]
10552// CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL]] to i8*
10553// CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
10554// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false)
10555// CHECK:   [[TMP5:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8
10556// CHECK:   ret %struct.poly8x8x2_t [[TMP5]]
10557poly8x8x2_t test_vld2_p8(poly8_t const *a) {
10558  return vld2_p8(a);
10559}
10560
10561// CHECK-LABEL: define %struct.poly16x4x2_t @test_vld2_p16(i16* %a) #0 {
10562// CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
10563// CHECK:   [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8
10564// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
10565// CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
10566// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
10567// CHECK:   [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
10568// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
10569// CHECK:   store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]]
10570// CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL]] to i8*
10571// CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
10572// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
10573// CHECK:   [[TMP6:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8
10574// CHECK:   ret %struct.poly16x4x2_t [[TMP6]]
10575poly16x4x2_t test_vld2_p16(poly16_t const *a) {
10576  return vld2_p16(a);
10577}
10578
10579// CHECK-LABEL: define %struct.uint8x16x3_t @test_vld3q_u8(i8* %a) #0 {
10580// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x3_t, align 16
10581// CHECK:   [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 16
10582// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8*
10583// CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
10584// CHECK:   [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
10585// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
10586// CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
10587// CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x16x3_t* [[RETVAL]] to i8*
10588// CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8*
10589// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 48, i32 16, i1 false)
10590// CHECK:   [[TMP5:%.*]] = load %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[RETVAL]], align 16
10591// CHECK:   ret %struct.uint8x16x3_t [[TMP5]]
10592uint8x16x3_t test_vld3q_u8(uint8_t const *a) {
10593  return vld3q_u8(a);
10594}
10595
10596// CHECK-LABEL: define %struct.uint16x8x3_t @test_vld3q_u16(i16* %a) #0 {
10597// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x3_t, align 16
10598// CHECK:   [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16
10599// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
10600// CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
10601// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
10602// CHECK:   [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
10603// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
10604// CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
10605// CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x8x3_t* [[RETVAL]] to i8*
10606// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
10607// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
10608// CHECK:   [[TMP6:%.*]] = load %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[RETVAL]], align 16
10609// CHECK:   ret %struct.uint16x8x3_t [[TMP6]]
10610uint16x8x3_t test_vld3q_u16(uint16_t const *a) {
10611  return vld3q_u16(a);
10612}
10613
10614// CHECK-LABEL: define %struct.uint32x4x3_t @test_vld3q_u32(i32* %a) #0 {
10615// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x3_t, align 16
10616// CHECK:   [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16
10617// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
10618// CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
10619// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
10620// CHECK:   [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
10621// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }*
10622// CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
10623// CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x4x3_t* [[RETVAL]] to i8*
10624// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
10625// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
10626// CHECK:   [[TMP6:%.*]] = load %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[RETVAL]], align 16
10627// CHECK:   ret %struct.uint32x4x3_t [[TMP6]]
10628uint32x4x3_t test_vld3q_u32(uint32_t const *a) {
10629  return vld3q_u32(a);
10630}
10631
10632// CHECK-LABEL: define %struct.uint64x2x3_t @test_vld3q_u64(i64* %a) #0 {
10633// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x2x3_t, align 16
10634// CHECK:   [[__RET:%.*]] = alloca %struct.uint64x2x3_t, align 16
10635// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8*
10636// CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
10637// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
10638// CHECK:   [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
10639// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }*
10640// CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
10641// CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x2x3_t* [[RETVAL]] to i8*
10642// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8*
10643// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
10644// CHECK:   [[TMP6:%.*]] = load %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[RETVAL]], align 16
10645// CHECK:   ret %struct.uint64x2x3_t [[TMP6]]
10646uint64x2x3_t test_vld3q_u64(uint64_t const *a) {
10647  return vld3q_u64(a);
10648}
10649
10650// CHECK-LABEL: define %struct.int8x16x3_t @test_vld3q_s8(i8* %a) #0 {
10651// CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x3_t, align 16
10652// CHECK:   [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 16
10653// CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8*
10654// CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
10655// CHECK:   [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
10656// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
10657// CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
10658// CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x16x3_t* [[RETVAL]] to i8*
10659// CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8*
10660// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 48, i32 16, i1 false)
10661// CHECK:   [[TMP5:%.*]] = load %struct.int8x16x3_t, %struct.int8x16x3_t* [[RETVAL]], align 16
10662// CHECK:   ret %struct.int8x16x3_t [[TMP5]]
10663int8x16x3_t test_vld3q_s8(int8_t const *a) {
10664  return vld3q_s8(a);
10665}
10666
10667// CHECK-LABEL: define %struct.int16x8x3_t @test_vld3q_s16(i16* %a) #0 {
10668// CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x3_t, align 16
10669// CHECK:   [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16
10670// CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
10671// CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
10672// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
10673// CHECK:   [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
10674// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
10675// CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
10676// CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x8x3_t* [[RETVAL]] to i8*
10677// CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
10678// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
10679// CHECK:   [[TMP6:%.*]] = load %struct.int16x8x3_t, %struct.int16x8x3_t* [[RETVAL]], align 16
10680// CHECK:   ret %struct.int16x8x3_t [[TMP6]]
10681int16x8x3_t test_vld3q_s16(int16_t const *a) {
10682  return vld3q_s16(a);
10683}
10684
10685// CHECK-LABEL: define %struct.int32x4x3_t @test_vld3q_s32(i32* %a) #0 {
10686// CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x3_t, align 16
10687// CHECK:   [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16
10688// CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
10689// CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
10690// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
10691// CHECK:   [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
10692// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }*
10693// CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
10694// CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x4x3_t* [[RETVAL]] to i8*
10695// CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
10696// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
10697// CHECK:   [[TMP6:%.*]] = load %struct.int32x4x3_t, %struct.int32x4x3_t* [[RETVAL]], align 16
10698// CHECK:   ret %struct.int32x4x3_t [[TMP6]]
10699int32x4x3_t test_vld3q_s32(int32_t const *a) {
10700  return vld3q_s32(a);
10701}
10702
10703// CHECK-LABEL: define %struct.int64x2x3_t @test_vld3q_s64(i64* %a) #0 {
10704// CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x2x3_t, align 16
10705// CHECK:   [[__RET:%.*]] = alloca %struct.int64x2x3_t, align 16
10706// CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8*
10707// CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
10708// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
10709// CHECK:   [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
10710// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }*
10711// CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
10712// CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x2x3_t* [[RETVAL]] to i8*
10713// CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8*
10714// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
10715// CHECK:   [[TMP6:%.*]] = load %struct.int64x2x3_t, %struct.int64x2x3_t* [[RETVAL]], align 16
10716// CHECK:   ret %struct.int64x2x3_t [[TMP6]]
10717int64x2x3_t test_vld3q_s64(int64_t const *a) {
10718  return vld3q_s64(a);
10719}
10720
10721// CHECK-LABEL: define %struct.float16x8x3_t @test_vld3q_f16(half* %a) #0 {
10722// CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x8x3_t, align 16
10723// CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
10724// CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
10725// CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
10726// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
10727// CHECK:   [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
10728// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
10729// CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
10730// CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x8x3_t* [[RETVAL]] to i8*
10731// CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
10732// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
10733// CHECK:   [[TMP6:%.*]] = load %struct.float16x8x3_t, %struct.float16x8x3_t* [[RETVAL]], align 16
10734// CHECK:   ret %struct.float16x8x3_t [[TMP6]]
10735float16x8x3_t test_vld3q_f16(float16_t const *a) {
10736  return vld3q_f16(a);
10737}
10738
10739// CHECK-LABEL: define %struct.float32x4x3_t @test_vld3q_f32(float* %a) #0 {
10740// CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x3_t, align 16
10741// CHECK:   [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16
10742// CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
10743// CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
10744// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x float>*
10745// CHECK:   [[VLD3:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0v4f32(<4 x float>* [[TMP2]])
10746// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float> }*
10747// CHECK:   store { <4 x float>, <4 x float>, <4 x float> } [[VLD3]], { <4 x float>, <4 x float>, <4 x float> }* [[TMP3]]
10748// CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x4x3_t* [[RETVAL]] to i8*
10749// CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
10750// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
10751// CHECK:   [[TMP6:%.*]] = load %struct.float32x4x3_t, %struct.float32x4x3_t* [[RETVAL]], align 16
10752// CHECK:   ret %struct.float32x4x3_t [[TMP6]]
10753float32x4x3_t test_vld3q_f32(float32_t const *a) {
10754  return vld3q_f32(a);
10755}
10756
10757// CHECK-LABEL: define %struct.float64x2x3_t @test_vld3q_f64(double* %a) #0 {
10758// CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16
10759// CHECK:   [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16
10760// CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8*
10761// CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
10762// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x double>*
10763// CHECK:   [[VLD3:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0v2f64(<2 x double>* [[TMP2]])
10764// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double>, <2 x double> }*
10765// CHECK:   store { <2 x double>, <2 x double>, <2 x double> } [[VLD3]], { <2 x double>, <2 x double>, <2 x double> }* [[TMP3]]
10766// CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x2x3_t* [[RETVAL]] to i8*
10767// CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8*
10768// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
10769// CHECK:   [[TMP6:%.*]] = load %struct.float64x2x3_t, %struct.float64x2x3_t* [[RETVAL]], align 16
10770// CHECK:   ret %struct.float64x2x3_t [[TMP6]]
10771float64x2x3_t test_vld3q_f64(float64_t const *a) {
10772  return vld3q_f64(a);
10773}
10774
10775// CHECK-LABEL: define %struct.poly8x16x3_t @test_vld3q_p8(i8* %a) #0 {
10776// CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x3_t, align 16
10777// CHECK:   [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 16
10778// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8*
10779// CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
10780// CHECK:   [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
10781// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
10782// CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
10783// CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x16x3_t* [[RETVAL]] to i8*
10784// CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8*
10785// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 48, i32 16, i1 false)
10786// CHECK:   [[TMP5:%.*]] = load %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[RETVAL]], align 16
10787// CHECK:   ret %struct.poly8x16x3_t [[TMP5]]
10788poly8x16x3_t test_vld3q_p8(poly8_t const *a) {
10789  return vld3q_p8(a);
10790}
10791
10792// CHECK-LABEL: define %struct.poly16x8x3_t @test_vld3q_p16(i16* %a) #0 {
10793// CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x3_t, align 16
10794// CHECK:   [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16
10795// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
10796// CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
10797// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
10798// CHECK:   [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
10799// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
10800// CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
10801// CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x8x3_t* [[RETVAL]] to i8*
10802// CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
10803// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
10804// CHECK:   [[TMP6:%.*]] = load %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[RETVAL]], align 16
10805// CHECK:   ret %struct.poly16x8x3_t [[TMP6]]
10806poly16x8x3_t test_vld3q_p16(poly16_t const *a) {
10807  return vld3q_p16(a);
10808}
10809
10810// CHECK-LABEL: define %struct.uint8x8x3_t @test_vld3_u8(i8* %a) #0 {
10811// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x3_t, align 8
10812// CHECK:   [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8
10813// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
10814// CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
10815// CHECK:   [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
10816// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
10817// CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
10818// CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x3_t* [[RETVAL]] to i8*
10819// CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
10820// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 24, i32 8, i1 false)
10821// CHECK:   [[TMP5:%.*]] = load %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[RETVAL]], align 8
10822// CHECK:   ret %struct.uint8x8x3_t [[TMP5]]
10823uint8x8x3_t test_vld3_u8(uint8_t const *a) {
10824  return vld3_u8(a);
10825}
10826
10827// CHECK-LABEL: define %struct.uint16x4x3_t @test_vld3_u16(i16* %a) #0 {
10828// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x3_t, align 8
10829// CHECK:   [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8
10830// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
10831// CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
10832// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
10833// CHECK:   [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
10834// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
10835// CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
10836// CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x4x3_t* [[RETVAL]] to i8*
10837// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
10838// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
10839// CHECK:   [[TMP6:%.*]] = load %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[RETVAL]], align 8
10840// CHECK:   ret %struct.uint16x4x3_t [[TMP6]]
10841uint16x4x3_t test_vld3_u16(uint16_t const *a) {
10842  return vld3_u16(a);
10843}
10844
10845// CHECK-LABEL: define %struct.uint32x2x3_t @test_vld3_u32(i32* %a) #0 {
10846// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x3_t, align 8
10847// CHECK:   [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8
10848// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
10849// CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
10850// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
10851// CHECK:   [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
10852// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }*
10853// CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
10854// CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x2x3_t* [[RETVAL]] to i8*
10855// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
10856// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
10857// CHECK:   [[TMP6:%.*]] = load %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[RETVAL]], align 8
10858// CHECK:   ret %struct.uint32x2x3_t [[TMP6]]
10859uint32x2x3_t test_vld3_u32(uint32_t const *a) {
10860  return vld3_u32(a);
10861}
10862
10863// CHECK-LABEL: define %struct.uint64x1x3_t @test_vld3_u64(i64* %a) #0 {
10864// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x1x3_t, align 8
10865// CHECK:   [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8
10866// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
10867// CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
10868// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
10869// CHECK:   [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
10870// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
10871// CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
10872// CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x1x3_t* [[RETVAL]] to i8*
10873// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
10874// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
10875// CHECK:   [[TMP6:%.*]] = load %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[RETVAL]], align 8
10876// CHECK:   ret %struct.uint64x1x3_t [[TMP6]]
10877uint64x1x3_t test_vld3_u64(uint64_t const *a) {
10878  return vld3_u64(a);
10879}
10880
10881// CHECK-LABEL: define %struct.int8x8x3_t @test_vld3_s8(i8* %a) #0 {
10882// CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x3_t, align 8
10883// CHECK:   [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8
10884// CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
10885// CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
10886// CHECK:   [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
10887// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
10888// CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
10889// CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x3_t* [[RETVAL]] to i8*
10890// CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
10891// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 24, i32 8, i1 false)
10892// CHECK:   [[TMP5:%.*]] = load %struct.int8x8x3_t, %struct.int8x8x3_t* [[RETVAL]], align 8
10893// CHECK:   ret %struct.int8x8x3_t [[TMP5]]
10894int8x8x3_t test_vld3_s8(int8_t const *a) {
10895  return vld3_s8(a);
10896}
10897
10898// CHECK-LABEL: define %struct.int16x4x3_t @test_vld3_s16(i16* %a) #0 {
10899// CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x3_t, align 8
10900// CHECK:   [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8
10901// CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
10902// CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
10903// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
10904// CHECK:   [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
10905// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
10906// CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
10907// CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x4x3_t* [[RETVAL]] to i8*
10908// CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
10909// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
10910// CHECK:   [[TMP6:%.*]] = load %struct.int16x4x3_t, %struct.int16x4x3_t* [[RETVAL]], align 8
10911// CHECK:   ret %struct.int16x4x3_t [[TMP6]]
10912int16x4x3_t test_vld3_s16(int16_t const *a) {
10913  return vld3_s16(a);
10914}
10915
10916// CHECK-LABEL: define %struct.int32x2x3_t @test_vld3_s32(i32* %a) #0 {
10917// CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x3_t, align 8
10918// CHECK:   [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8
10919// CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
10920// CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
10921// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
10922// CHECK:   [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
10923// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }*
10924// CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
10925// CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x2x3_t* [[RETVAL]] to i8*
10926// CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
10927// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
10928// CHECK:   [[TMP6:%.*]] = load %struct.int32x2x3_t, %struct.int32x2x3_t* [[RETVAL]], align 8
10929// CHECK:   ret %struct.int32x2x3_t [[TMP6]]
10930int32x2x3_t test_vld3_s32(int32_t const *a) {
10931  return vld3_s32(a);
10932}
10933
10934// CHECK-LABEL: define %struct.int64x1x3_t @test_vld3_s64(i64* %a) #0 {
10935// CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x1x3_t, align 8
10936// CHECK:   [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8
10937// CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
10938// CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
10939// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
10940// CHECK:   [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
10941// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
10942// CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
10943// CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x1x3_t* [[RETVAL]] to i8*
10944// CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
10945// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
10946// CHECK:   [[TMP6:%.*]] = load %struct.int64x1x3_t, %struct.int64x1x3_t* [[RETVAL]], align 8
10947// CHECK:   ret %struct.int64x1x3_t [[TMP6]]
10948int64x1x3_t test_vld3_s64(int64_t const *a) {
10949  return vld3_s64(a);
10950}
10951
10952// CHECK-LABEL: define %struct.float16x4x3_t @test_vld3_f16(half* %a) #0 {
10953// CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x4x3_t, align 8
10954// CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
10955// CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
10956// CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
10957// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
10958// CHECK:   [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
10959// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
10960// CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
10961// CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x4x3_t* [[RETVAL]] to i8*
10962// CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
10963// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
10964// CHECK:   [[TMP6:%.*]] = load %struct.float16x4x3_t, %struct.float16x4x3_t* [[RETVAL]], align 8
10965// CHECK:   ret %struct.float16x4x3_t [[TMP6]]
10966float16x4x3_t test_vld3_f16(float16_t const *a) {
10967  return vld3_f16(a);
10968}
10969
10970// CHECK-LABEL: define %struct.float32x2x3_t @test_vld3_f32(float* %a) #0 {
10971// CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x3_t, align 8
10972// CHECK:   [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8
10973// CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
10974// CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
10975// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x float>*
10976// CHECK:   [[VLD3:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0v2f32(<2 x float>* [[TMP2]])
10977// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float> }*
10978// CHECK:   store { <2 x float>, <2 x float>, <2 x float> } [[VLD3]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP3]]
10979// CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x2x3_t* [[RETVAL]] to i8*
10980// CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
10981// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
10982// CHECK:   [[TMP6:%.*]] = load %struct.float32x2x3_t, %struct.float32x2x3_t* [[RETVAL]], align 8
10983// CHECK:   ret %struct.float32x2x3_t [[TMP6]]
10984float32x2x3_t test_vld3_f32(float32_t const *a) {
10985  return vld3_f32(a);
10986}
10987
10988// CHECK-LABEL: define %struct.float64x1x3_t @test_vld3_f64(double* %a) #0 {
10989// CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8
10990// CHECK:   [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8
10991// CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8*
10992// CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
10993// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x double>*
10994// CHECK:   [[VLD3:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0v1f64(<1 x double>* [[TMP2]])
10995// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double>, <1 x double> }*
10996// CHECK:   store { <1 x double>, <1 x double>, <1 x double> } [[VLD3]], { <1 x double>, <1 x double>, <1 x double> }* [[TMP3]]
10997// CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x1x3_t* [[RETVAL]] to i8*
10998// CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8*
10999// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
11000// CHECK:   [[TMP6:%.*]] = load %struct.float64x1x3_t, %struct.float64x1x3_t* [[RETVAL]], align 8
11001// CHECK:   ret %struct.float64x1x3_t [[TMP6]]
11002float64x1x3_t test_vld3_f64(float64_t const *a) {
11003  return vld3_f64(a);
11004}
11005
11006// CHECK-LABEL: define %struct.poly8x8x3_t @test_vld3_p8(i8* %a) #0 {
11007// CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x3_t, align 8
11008// CHECK:   [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8
11009// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
11010// CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
11011// CHECK:   [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
11012// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
11013// CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
11014// CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x3_t* [[RETVAL]] to i8*
11015// CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
11016// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 24, i32 8, i1 false)
11017// CHECK:   [[TMP5:%.*]] = load %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[RETVAL]], align 8
11018// CHECK:   ret %struct.poly8x8x3_t [[TMP5]]
11019poly8x8x3_t test_vld3_p8(poly8_t const *a) {
11020  return vld3_p8(a);
11021}
11022
11023// CHECK-LABEL: define %struct.poly16x4x3_t @test_vld3_p16(i16* %a) #0 {
11024// CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x3_t, align 8
11025// CHECK:   [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8
11026// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
11027// CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
11028// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
11029// CHECK:   [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
11030// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
11031// CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
11032// CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x4x3_t* [[RETVAL]] to i8*
11033// CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
11034// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
11035// CHECK:   [[TMP6:%.*]] = load %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[RETVAL]], align 8
11036// CHECK:   ret %struct.poly16x4x3_t [[TMP6]]
11037poly16x4x3_t test_vld3_p16(poly16_t const *a) {
11038  return vld3_p16(a);
11039}
11040
11041// CHECK-LABEL: define %struct.uint8x16x4_t @test_vld4q_u8(i8* %a) #0 {
11042// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x4_t, align 16
11043// CHECK:   [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 16
11044// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8*
11045// CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
11046// CHECK:   [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
11047// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
11048// CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
11049// CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x16x4_t* [[RETVAL]] to i8*
11050// CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8*
11051// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 64, i32 16, i1 false)
11052// CHECK:   [[TMP5:%.*]] = load %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[RETVAL]], align 16
11053// CHECK:   ret %struct.uint8x16x4_t [[TMP5]]
11054uint8x16x4_t test_vld4q_u8(uint8_t const *a) {
11055  return vld4q_u8(a);
11056}
11057
11058// CHECK-LABEL: define %struct.uint16x8x4_t @test_vld4q_u16(i16* %a) #0 {
11059// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x4_t, align 16
11060// CHECK:   [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16
11061// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
11062// CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
11063// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
11064// CHECK:   [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
11065// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
11066// CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
11067// CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x8x4_t* [[RETVAL]] to i8*
11068// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
11069// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
11070// CHECK:   [[TMP6:%.*]] = load %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[RETVAL]], align 16
11071// CHECK:   ret %struct.uint16x8x4_t [[TMP6]]
11072uint16x8x4_t test_vld4q_u16(uint16_t const *a) {
11073  return vld4q_u16(a);
11074}
11075
11076// CHECK-LABEL: define %struct.uint32x4x4_t @test_vld4q_u32(i32* %a) #0 {
11077// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x4_t, align 16
11078// CHECK:   [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16
11079// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
11080// CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
11081// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
11082// CHECK:   [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
11083// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }*
11084// CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
11085// CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x4x4_t* [[RETVAL]] to i8*
11086// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
11087// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
11088// CHECK:   [[TMP6:%.*]] = load %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[RETVAL]], align 16
11089// CHECK:   ret %struct.uint32x4x4_t [[TMP6]]
11090uint32x4x4_t test_vld4q_u32(uint32_t const *a) {
11091  return vld4q_u32(a);
11092}
11093
11094// CHECK-LABEL: define %struct.uint64x2x4_t @test_vld4q_u64(i64* %a) #0 {
11095// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x2x4_t, align 16
11096// CHECK:   [[__RET:%.*]] = alloca %struct.uint64x2x4_t, align 16
11097// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8*
11098// CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
11099// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
11100// CHECK:   [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
11101// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }*
11102// CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
11103// CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x2x4_t* [[RETVAL]] to i8*
11104// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8*
11105// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
11106// CHECK:   [[TMP6:%.*]] = load %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[RETVAL]], align 16
11107// CHECK:   ret %struct.uint64x2x4_t [[TMP6]]
11108uint64x2x4_t test_vld4q_u64(uint64_t const *a) {
11109  return vld4q_u64(a);
11110}
11111
11112// CHECK-LABEL: define %struct.int8x16x4_t @test_vld4q_s8(i8* %a) #0 {
11113// CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x4_t, align 16
11114// CHECK:   [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 16
11115// CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8*
11116// CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
11117// CHECK:   [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
11118// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
11119// CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
11120// CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x16x4_t* [[RETVAL]] to i8*
11121// CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8*
11122// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 64, i32 16, i1 false)
11123// CHECK:   [[TMP5:%.*]] = load %struct.int8x16x4_t, %struct.int8x16x4_t* [[RETVAL]], align 16
11124// CHECK:   ret %struct.int8x16x4_t [[TMP5]]
11125int8x16x4_t test_vld4q_s8(int8_t const *a) {
11126  return vld4q_s8(a);
11127}
11128
11129// CHECK-LABEL: define %struct.int16x8x4_t @test_vld4q_s16(i16* %a) #0 {
11130// CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x4_t, align 16
11131// CHECK:   [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16
11132// CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
11133// CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
11134// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
11135// CHECK:   [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
11136// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
11137// CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
11138// CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x8x4_t* [[RETVAL]] to i8*
11139// CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
11140// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
11141// CHECK:   [[TMP6:%.*]] = load %struct.int16x8x4_t, %struct.int16x8x4_t* [[RETVAL]], align 16
11142// CHECK:   ret %struct.int16x8x4_t [[TMP6]]
11143int16x8x4_t test_vld4q_s16(int16_t const *a) {
11144  return vld4q_s16(a);
11145}
11146
11147// CHECK-LABEL: define %struct.int32x4x4_t @test_vld4q_s32(i32* %a) #0 {
11148// CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x4_t, align 16
11149// CHECK:   [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16
11150// CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
11151// CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
11152// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>*
11153// CHECK:   [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0v4i32(<4 x i32>* [[TMP2]])
11154// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }*
11155// CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
11156// CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x4x4_t* [[RETVAL]] to i8*
11157// CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
11158// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
11159// CHECK:   [[TMP6:%.*]] = load %struct.int32x4x4_t, %struct.int32x4x4_t* [[RETVAL]], align 16
11160// CHECK:   ret %struct.int32x4x4_t [[TMP6]]
11161int32x4x4_t test_vld4q_s32(int32_t const *a) {
11162  return vld4q_s32(a);
11163}
11164
11165// CHECK-LABEL: define %struct.int64x2x4_t @test_vld4q_s64(i64* %a) #0 {
11166// CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x2x4_t, align 16
11167// CHECK:   [[__RET:%.*]] = alloca %struct.int64x2x4_t, align 16
11168// CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8*
11169// CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
11170// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
11171// CHECK:   [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
11172// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }*
11173// CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
11174// CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x2x4_t* [[RETVAL]] to i8*
11175// CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8*
11176// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
11177// CHECK:   [[TMP6:%.*]] = load %struct.int64x2x4_t, %struct.int64x2x4_t* [[RETVAL]], align 16
11178// CHECK:   ret %struct.int64x2x4_t [[TMP6]]
11179int64x2x4_t test_vld4q_s64(int64_t const *a) {
11180  return vld4q_s64(a);
11181}
11182
11183// CHECK-LABEL: define %struct.float16x8x4_t @test_vld4q_f16(half* %a) #0 {
11184// CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x8x4_t, align 16
11185// CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
11186// CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
11187// CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
11188// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
11189// CHECK:   [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
11190// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
11191// CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
11192// CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x8x4_t* [[RETVAL]] to i8*
11193// CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
11194// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
11195// CHECK:   [[TMP6:%.*]] = load %struct.float16x8x4_t, %struct.float16x8x4_t* [[RETVAL]], align 16
11196// CHECK:   ret %struct.float16x8x4_t [[TMP6]]
11197float16x8x4_t test_vld4q_f16(float16_t const *a) {
11198  return vld4q_f16(a);
11199}
11200
11201// CHECK-LABEL: define %struct.float32x4x4_t @test_vld4q_f32(float* %a) #0 {
11202// CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x4_t, align 16
11203// CHECK:   [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16
11204// CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
11205// CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
11206// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x float>*
11207// CHECK:   [[VLD4:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0v4f32(<4 x float>* [[TMP2]])
11208// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float>, <4 x float> }*
11209// CHECK:   store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4]], { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* [[TMP3]]
11210// CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x4x4_t* [[RETVAL]] to i8*
11211// CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
11212// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
11213// CHECK:   [[TMP6:%.*]] = load %struct.float32x4x4_t, %struct.float32x4x4_t* [[RETVAL]], align 16
11214// CHECK:   ret %struct.float32x4x4_t [[TMP6]]
11215float32x4x4_t test_vld4q_f32(float32_t const *a) {
11216  return vld4q_f32(a);
11217}
11218
11219// CHECK-LABEL: define %struct.float64x2x4_t @test_vld4q_f64(double* %a) #0 {
11220// CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16
11221// CHECK:   [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16
11222// CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8*
11223// CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
11224// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x double>*
11225// CHECK:   [[VLD4:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0v2f64(<2 x double>* [[TMP2]])
11226// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double>, <2 x double>, <2 x double> }*
11227// CHECK:   store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD4]], { <2 x double>, <2 x double>, <2 x double>, <2 x double> }* [[TMP3]]
11228// CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x2x4_t* [[RETVAL]] to i8*
11229// CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8*
11230// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
11231// CHECK:   [[TMP6:%.*]] = load %struct.float64x2x4_t, %struct.float64x2x4_t* [[RETVAL]], align 16
11232// CHECK:   ret %struct.float64x2x4_t [[TMP6]]
11233float64x2x4_t test_vld4q_f64(float64_t const *a) {
11234  return vld4q_f64(a);
11235}
11236
11237// CHECK-LABEL: define %struct.poly8x16x4_t @test_vld4q_p8(i8* %a) #0 {
11238// CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x4_t, align 16
11239// CHECK:   [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 16
11240// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8*
11241// CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>*
11242// CHECK:   [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>* [[TMP1]])
11243// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
11244// CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]]
11245// CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x16x4_t* [[RETVAL]] to i8*
11246// CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8*
11247// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 64, i32 16, i1 false)
11248// CHECK:   [[TMP5:%.*]] = load %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[RETVAL]], align 16
11249// CHECK:   ret %struct.poly8x16x4_t [[TMP5]]
11250poly8x16x4_t test_vld4q_p8(poly8_t const *a) {
11251  return vld4q_p8(a);
11252}
11253
11254// CHECK-LABEL: define %struct.poly16x8x4_t @test_vld4q_p16(i16* %a) #0 {
11255// CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x4_t, align 16
11256// CHECK:   [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16
11257// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
11258// CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
11259// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>*
11260// CHECK:   [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0v8i16(<8 x i16>* [[TMP2]])
11261// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
11262// CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
11263// CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x8x4_t* [[RETVAL]] to i8*
11264// CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
11265// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
11266// CHECK:   [[TMP6:%.*]] = load %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[RETVAL]], align 16
11267// CHECK:   ret %struct.poly16x8x4_t [[TMP6]]
11268poly16x8x4_t test_vld4q_p16(poly16_t const *a) {
11269  return vld4q_p16(a);
11270}
11271
11272// CHECK-LABEL: define %struct.uint8x8x4_t @test_vld4_u8(i8* %a) #0 {
11273// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x4_t, align 8
11274// CHECK:   [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8
11275// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
11276// CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
11277// CHECK:   [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
11278// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
11279// CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
11280// CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x4_t* [[RETVAL]] to i8*
11281// CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
11282// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 8, i1 false)
11283// CHECK:   [[TMP5:%.*]] = load %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[RETVAL]], align 8
11284// CHECK:   ret %struct.uint8x8x4_t [[TMP5]]
11285uint8x8x4_t test_vld4_u8(uint8_t const *a) {
11286  return vld4_u8(a);
11287}
11288
11289// CHECK-LABEL: define %struct.uint16x4x4_t @test_vld4_u16(i16* %a) #0 {
11290// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x4_t, align 8
11291// CHECK:   [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8
11292// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
11293// CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
11294// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
11295// CHECK:   [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
11296// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
11297// CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
11298// CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x4x4_t* [[RETVAL]] to i8*
11299// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
11300// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
11301// CHECK:   [[TMP6:%.*]] = load %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[RETVAL]], align 8
11302// CHECK:   ret %struct.uint16x4x4_t [[TMP6]]
11303uint16x4x4_t test_vld4_u16(uint16_t const *a) {
11304  return vld4_u16(a);
11305}
11306
11307// CHECK-LABEL: define %struct.uint32x2x4_t @test_vld4_u32(i32* %a) #0 {
11308// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x4_t, align 8
11309// CHECK:   [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8
11310// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
11311// CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
11312// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
11313// CHECK:   [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
11314// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }*
11315// CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
11316// CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x2x4_t* [[RETVAL]] to i8*
11317// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
11318// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
11319// CHECK:   [[TMP6:%.*]] = load %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[RETVAL]], align 8
11320// CHECK:   ret %struct.uint32x2x4_t [[TMP6]]
11321uint32x2x4_t test_vld4_u32(uint32_t const *a) {
11322  return vld4_u32(a);
11323}
11324
11325// CHECK-LABEL: define %struct.uint64x1x4_t @test_vld4_u64(i64* %a) #0 {
11326// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x1x4_t, align 8
11327// CHECK:   [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8
11328// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
11329// CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
11330// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
11331// CHECK:   [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
11332// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
11333// CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
11334// CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x1x4_t* [[RETVAL]] to i8*
11335// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
11336// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
11337// CHECK:   [[TMP6:%.*]] = load %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[RETVAL]], align 8
11338// CHECK:   ret %struct.uint64x1x4_t [[TMP6]]
11339uint64x1x4_t test_vld4_u64(uint64_t const *a) {
11340  return vld4_u64(a);
11341}
11342
11343// CHECK-LABEL: define %struct.int8x8x4_t @test_vld4_s8(i8* %a) #0 {
11344// CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x4_t, align 8
11345// CHECK:   [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8
11346// CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
11347// CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
11348// CHECK:   [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
11349// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
11350// CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
11351// CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x4_t* [[RETVAL]] to i8*
11352// CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
11353// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 8, i1 false)
11354// CHECK:   [[TMP5:%.*]] = load %struct.int8x8x4_t, %struct.int8x8x4_t* [[RETVAL]], align 8
11355// CHECK:   ret %struct.int8x8x4_t [[TMP5]]
11356int8x8x4_t test_vld4_s8(int8_t const *a) {
11357  return vld4_s8(a);
11358}
11359
11360// CHECK-LABEL: define %struct.int16x4x4_t @test_vld4_s16(i16* %a) #0 {
11361// CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x4_t, align 8
11362// CHECK:   [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8
11363// CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
11364// CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
11365// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
11366// CHECK:   [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
11367// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
11368// CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
11369// CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x4x4_t* [[RETVAL]] to i8*
11370// CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
11371// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
11372// CHECK:   [[TMP6:%.*]] = load %struct.int16x4x4_t, %struct.int16x4x4_t* [[RETVAL]], align 8
11373// CHECK:   ret %struct.int16x4x4_t [[TMP6]]
11374int16x4x4_t test_vld4_s16(int16_t const *a) {
11375  return vld4_s16(a);
11376}
11377
11378// CHECK-LABEL: define %struct.int32x2x4_t @test_vld4_s32(i32* %a) #0 {
11379// CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x4_t, align 8
11380// CHECK:   [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8
11381// CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
11382// CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
11383// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>*
11384// CHECK:   [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0v2i32(<2 x i32>* [[TMP2]])
11385// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }*
11386// CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
11387// CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x2x4_t* [[RETVAL]] to i8*
11388// CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
11389// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
11390// CHECK:   [[TMP6:%.*]] = load %struct.int32x2x4_t, %struct.int32x2x4_t* [[RETVAL]], align 8
11391// CHECK:   ret %struct.int32x2x4_t [[TMP6]]
11392int32x2x4_t test_vld4_s32(int32_t const *a) {
11393  return vld4_s32(a);
11394}
11395
11396// CHECK-LABEL: define %struct.int64x1x4_t @test_vld4_s64(i64* %a) #0 {
11397// CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x1x4_t, align 8
11398// CHECK:   [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8
11399// CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
11400// CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
11401// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
11402// CHECK:   [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
11403// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
11404// CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
11405// CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x1x4_t* [[RETVAL]] to i8*
11406// CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
11407// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
11408// CHECK:   [[TMP6:%.*]] = load %struct.int64x1x4_t, %struct.int64x1x4_t* [[RETVAL]], align 8
11409// CHECK:   ret %struct.int64x1x4_t [[TMP6]]
11410int64x1x4_t test_vld4_s64(int64_t const *a) {
11411  return vld4_s64(a);
11412}
11413
11414// CHECK-LABEL: define %struct.float16x4x4_t @test_vld4_f16(half* %a) #0 {
11415// CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x4x4_t, align 8
11416// CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
11417// CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
11418// CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
11419// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
11420// CHECK:   [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
11421// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
11422// CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
11423// CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x4x4_t* [[RETVAL]] to i8*
11424// CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
11425// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
11426// CHECK:   [[TMP6:%.*]] = load %struct.float16x4x4_t, %struct.float16x4x4_t* [[RETVAL]], align 8
11427// CHECK:   ret %struct.float16x4x4_t [[TMP6]]
11428float16x4x4_t test_vld4_f16(float16_t const *a) {
11429  return vld4_f16(a);
11430}
11431
11432// CHECK-LABEL: define %struct.float32x2x4_t @test_vld4_f32(float* %a) #0 {
11433// CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x4_t, align 8
11434// CHECK:   [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8
11435// CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
11436// CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
11437// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x float>*
11438// CHECK:   [[VLD4:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0v2f32(<2 x float>* [[TMP2]])
11439// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float>, <2 x float> }*
11440// CHECK:   store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP3]]
11441// CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x2x4_t* [[RETVAL]] to i8*
11442// CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
11443// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
11444// CHECK:   [[TMP6:%.*]] = load %struct.float32x2x4_t, %struct.float32x2x4_t* [[RETVAL]], align 8
11445// CHECK:   ret %struct.float32x2x4_t [[TMP6]]
11446float32x2x4_t test_vld4_f32(float32_t const *a) {
11447  return vld4_f32(a);
11448}
11449
11450// CHECK-LABEL: define %struct.float64x1x4_t @test_vld4_f64(double* %a) #0 {
11451// CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8
11452// CHECK:   [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8
11453// CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8*
11454// CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
11455// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x double>*
11456// CHECK:   [[VLD4:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0v1f64(<1 x double>* [[TMP2]])
11457// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double>, <1 x double>, <1 x double> }*
11458// CHECK:   store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD4]], { <1 x double>, <1 x double>, <1 x double>, <1 x double> }* [[TMP3]]
11459// CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x1x4_t* [[RETVAL]] to i8*
11460// CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8*
11461// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
11462// CHECK:   [[TMP6:%.*]] = load %struct.float64x1x4_t, %struct.float64x1x4_t* [[RETVAL]], align 8
11463// CHECK:   ret %struct.float64x1x4_t [[TMP6]]
11464float64x1x4_t test_vld4_f64(float64_t const *a) {
11465  return vld4_f64(a);
11466}
11467
11468// CHECK-LABEL: define %struct.poly8x8x4_t @test_vld4_p8(i8* %a) #0 {
11469// CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x4_t, align 8
11470// CHECK:   [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8
11471// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
11472// CHECK:   [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>*
11473// CHECK:   [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0v8i8(<8 x i8>* [[TMP1]])
11474// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
11475// CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]]
11476// CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x4_t* [[RETVAL]] to i8*
11477// CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
11478// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 8, i1 false)
11479// CHECK:   [[TMP5:%.*]] = load %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[RETVAL]], align 8
11480// CHECK:   ret %struct.poly8x8x4_t [[TMP5]]
11481poly8x8x4_t test_vld4_p8(poly8_t const *a) {
11482  return vld4_p8(a);
11483}
11484
11485// CHECK-LABEL: define %struct.poly16x4x4_t @test_vld4_p16(i16* %a) #0 {
11486// CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x4_t, align 8
11487// CHECK:   [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8
11488// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
11489// CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
11490// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>*
11491// CHECK:   [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* [[TMP2]])
11492// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
11493// CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
11494// CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x4x4_t* [[RETVAL]] to i8*
11495// CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
11496// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
11497// CHECK:   [[TMP6:%.*]] = load %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[RETVAL]], align 8
11498// CHECK:   ret %struct.poly16x4x4_t [[TMP6]]
11499poly16x4x4_t test_vld4_p16(poly16_t const *a) {
11500  return vld4_p16(a);
11501}
11502
11503// CHECK-LABEL: define void @test_vst1q_u8(i8* %a, <16 x i8> %b) #0 {
11504// CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
11505// CHECK:   store <16 x i8> %b, <16 x i8>* [[TMP0]]
11506// CHECK:   ret void
11507void test_vst1q_u8(uint8_t *a, uint8x16_t b) {
11508  vst1q_u8(a, b);
11509}
11510
11511// CHECK-LABEL: define void @test_vst1q_u16(i16* %a, <8 x i16> %b) #0 {
11512// CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
11513// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
11514// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
11515// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
11516// CHECK:   store <8 x i16> [[TMP3]], <8 x i16>* [[TMP2]]
11517// CHECK:   ret void
11518void test_vst1q_u16(uint16_t *a, uint16x8_t b) {
11519  vst1q_u16(a, b);
11520}
11521
11522// CHECK-LABEL: define void @test_vst1q_u32(i32* %a, <4 x i32> %b) #0 {
11523// CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
11524// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
11525// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
11526// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
11527// CHECK:   store <4 x i32> [[TMP3]], <4 x i32>* [[TMP2]]
11528// CHECK:   ret void
11529void test_vst1q_u32(uint32_t *a, uint32x4_t b) {
11530  vst1q_u32(a, b);
11531}
11532
11533// CHECK-LABEL: define void @test_vst1q_u64(i64* %a, <2 x i64> %b) #0 {
11534// CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
11535// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
11536// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
11537// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
11538// CHECK:   store <2 x i64> [[TMP3]], <2 x i64>* [[TMP2]]
11539// CHECK:   ret void
11540void test_vst1q_u64(uint64_t *a, uint64x2_t b) {
11541  vst1q_u64(a, b);
11542}
11543
11544// CHECK-LABEL: define void @test_vst1q_s8(i8* %a, <16 x i8> %b) #0 {
11545// CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
11546// CHECK:   store <16 x i8> %b, <16 x i8>* [[TMP0]]
11547// CHECK:   ret void
11548void test_vst1q_s8(int8_t *a, int8x16_t b) {
11549  vst1q_s8(a, b);
11550}
11551
11552// CHECK-LABEL: define void @test_vst1q_s16(i16* %a, <8 x i16> %b) #0 {
11553// CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
11554// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
11555// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
11556// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
11557// CHECK:   store <8 x i16> [[TMP3]], <8 x i16>* [[TMP2]]
11558// CHECK:   ret void
11559void test_vst1q_s16(int16_t *a, int16x8_t b) {
11560  vst1q_s16(a, b);
11561}
11562
11563// CHECK-LABEL: define void @test_vst1q_s32(i32* %a, <4 x i32> %b) #0 {
11564// CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
11565// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
11566// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
11567// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
11568// CHECK:   store <4 x i32> [[TMP3]], <4 x i32>* [[TMP2]]
11569// CHECK:   ret void
11570void test_vst1q_s32(int32_t *a, int32x4_t b) {
11571  vst1q_s32(a, b);
11572}
11573
11574// CHECK-LABEL: define void @test_vst1q_s64(i64* %a, <2 x i64> %b) #0 {
11575// CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
11576// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
11577// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
11578// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
11579// CHECK:   store <2 x i64> [[TMP3]], <2 x i64>* [[TMP2]]
11580// CHECK:   ret void
11581void test_vst1q_s64(int64_t *a, int64x2_t b) {
11582  vst1q_s64(a, b);
11583}
11584
11585// CHECK-LABEL: define void @test_vst1q_f16(half* %a, <8 x half> %b) #0 {
11586// CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
11587// CHECK:   [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
11588// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
11589// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
11590// CHECK:   store <8 x i16> [[TMP3]], <8 x i16>* [[TMP2]]
11591// CHECK:   ret void
11592void test_vst1q_f16(float16_t *a, float16x8_t b) {
11593  vst1q_f16(a, b);
11594}
11595
11596// CHECK-LABEL: define void @test_vst1q_f32(float* %a, <4 x float> %b) #0 {
11597// CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
11598// CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
11599// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
11600// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
11601// CHECK:   store <4 x float> [[TMP3]], <4 x float>* [[TMP2]]
11602// CHECK:   ret void
11603void test_vst1q_f32(float32_t *a, float32x4_t b) {
11604  vst1q_f32(a, b);
11605}
11606
11607// CHECK-LABEL: define void @test_vst1q_f64(double* %a, <2 x double> %b) #0 {
11608// CHECK:   [[TMP0:%.*]] = bitcast double* %a to i8*
11609// CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
11610// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x double>*
11611// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
11612// CHECK:   store <2 x double> [[TMP3]], <2 x double>* [[TMP2]]
11613// CHECK:   ret void
11614void test_vst1q_f64(float64_t *a, float64x2_t b) {
11615  vst1q_f64(a, b);
11616}
11617
11618// CHECK-LABEL: define void @test_vst1q_p8(i8* %a, <16 x i8> %b) #0 {
11619// CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>*
11620// CHECK:   store <16 x i8> %b, <16 x i8>* [[TMP0]]
11621// CHECK:   ret void
11622void test_vst1q_p8(poly8_t *a, poly8x16_t b) {
11623  vst1q_p8(a, b);
11624}
11625
11626// CHECK-LABEL: define void @test_vst1q_p16(i16* %a, <8 x i16> %b) #0 {
11627// CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
11628// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
11629// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
11630// CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
11631// CHECK:   store <8 x i16> [[TMP3]], <8 x i16>* [[TMP2]]
11632// CHECK:   ret void
11633void test_vst1q_p16(poly16_t *a, poly16x8_t b) {
11634  vst1q_p16(a, b);
11635}
11636
11637// CHECK-LABEL: define void @test_vst1_u8(i8* %a, <8 x i8> %b) #0 {
11638// CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
11639// CHECK:   store <8 x i8> %b, <8 x i8>* [[TMP0]]
11640// CHECK:   ret void
11641void test_vst1_u8(uint8_t *a, uint8x8_t b) {
11642  vst1_u8(a, b);
11643}
11644
11645// CHECK-LABEL: define void @test_vst1_u16(i16* %a, <4 x i16> %b) #0 {
11646// CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
11647// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
11648// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
11649// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
11650// CHECK:   store <4 x i16> [[TMP3]], <4 x i16>* [[TMP2]]
11651// CHECK:   ret void
11652void test_vst1_u16(uint16_t *a, uint16x4_t b) {
11653  vst1_u16(a, b);
11654}
11655
11656// CHECK-LABEL: define void @test_vst1_u32(i32* %a, <2 x i32> %b) #0 {
11657// CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
11658// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
11659// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
11660// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
11661// CHECK:   store <2 x i32> [[TMP3]], <2 x i32>* [[TMP2]]
11662// CHECK:   ret void
11663void test_vst1_u32(uint32_t *a, uint32x2_t b) {
11664  vst1_u32(a, b);
11665}
11666
11667// CHECK-LABEL: define void @test_vst1_u64(i64* %a, <1 x i64> %b) #0 {
11668// CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
11669// CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
11670// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>*
11671// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
11672// CHECK:   store <1 x i64> [[TMP3]], <1 x i64>* [[TMP2]]
11673// CHECK:   ret void
11674void test_vst1_u64(uint64_t *a, uint64x1_t b) {
11675  vst1_u64(a, b);
11676}
11677
11678// CHECK-LABEL: define void @test_vst1_s8(i8* %a, <8 x i8> %b) #0 {
11679// CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
11680// CHECK:   store <8 x i8> %b, <8 x i8>* [[TMP0]]
11681// CHECK:   ret void
11682void test_vst1_s8(int8_t *a, int8x8_t b) {
11683  vst1_s8(a, b);
11684}
11685
11686// CHECK-LABEL: define void @test_vst1_s16(i16* %a, <4 x i16> %b) #0 {
11687// CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
11688// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
11689// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
11690// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
11691// CHECK:   store <4 x i16> [[TMP3]], <4 x i16>* [[TMP2]]
11692// CHECK:   ret void
11693void test_vst1_s16(int16_t *a, int16x4_t b) {
11694  vst1_s16(a, b);
11695}
11696
11697// CHECK-LABEL: define void @test_vst1_s32(i32* %a, <2 x i32> %b) #0 {
11698// CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
11699// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
11700// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
11701// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
11702// CHECK:   store <2 x i32> [[TMP3]], <2 x i32>* [[TMP2]]
11703// CHECK:   ret void
11704void test_vst1_s32(int32_t *a, int32x2_t b) {
11705  vst1_s32(a, b);
11706}
11707
11708// CHECK-LABEL: define void @test_vst1_s64(i64* %a, <1 x i64> %b) #0 {
11709// CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
11710// CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
11711// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>*
11712// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
11713// CHECK:   store <1 x i64> [[TMP3]], <1 x i64>* [[TMP2]]
11714// CHECK:   ret void
11715void test_vst1_s64(int64_t *a, int64x1_t b) {
11716  vst1_s64(a, b);
11717}
11718
11719// CHECK-LABEL: define void @test_vst1_f16(half* %a, <4 x half> %b) #0 {
11720// CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
11721// CHECK:   [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
11722// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
11723// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
11724// CHECK:   store <4 x i16> [[TMP3]], <4 x i16>* [[TMP2]]
11725// CHECK:   ret void
11726void test_vst1_f16(float16_t *a, float16x4_t b) {
11727  vst1_f16(a, b);
11728}
11729
11730// CHECK-LABEL: define void @test_vst1_f32(float* %a, <2 x float> %b) #0 {
11731// CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
11732// CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
11733// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
11734// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
11735// CHECK:   store <2 x float> [[TMP3]], <2 x float>* [[TMP2]]
11736// CHECK:   ret void
11737void test_vst1_f32(float32_t *a, float32x2_t b) {
11738  vst1_f32(a, b);
11739}
11740
11741// CHECK-LABEL: define void @test_vst1_f64(double* %a, <1 x double> %b) #0 {
11742// CHECK:   [[TMP0:%.*]] = bitcast double* %a to i8*
11743// CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
11744// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <1 x double>*
11745// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
11746// CHECK:   store <1 x double> [[TMP3]], <1 x double>* [[TMP2]]
11747// CHECK:   ret void
11748void test_vst1_f64(float64_t *a, float64x1_t b) {
11749  vst1_f64(a, b);
11750}
11751
11752// CHECK-LABEL: define void @test_vst1_p8(i8* %a, <8 x i8> %b) #0 {
11753// CHECK:   [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>*
11754// CHECK:   store <8 x i8> %b, <8 x i8>* [[TMP0]]
11755// CHECK:   ret void
11756void test_vst1_p8(poly8_t *a, poly8x8_t b) {
11757  vst1_p8(a, b);
11758}
11759
11760// CHECK-LABEL: define void @test_vst1_p16(i16* %a, <4 x i16> %b) #0 {
11761// CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
11762// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
11763// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
11764// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
11765// CHECK:   store <4 x i16> [[TMP3]], <4 x i16>* [[TMP2]]
11766// CHECK:   ret void
11767void test_vst1_p16(poly16_t *a, poly16x4_t b) {
11768  vst1_p16(a, b);
11769}
11770
11771// CHECK-LABEL: define void @test_vst2q_u8(i8* %a, [2 x <16 x i8>] %b.coerce) #0 {
11772// CHECK:   [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16
11773// CHECK:   [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16
11774// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0
11775// CHECK:   store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
11776// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__S1]] to i8*
11777// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x16x2_t* [[B]] to i8*
11778// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
11779// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0
11780// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
11781// CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
11782// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0
11783// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1
11784// CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
11785// CHECK:   call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a)
11786// CHECK:   ret void
11787void test_vst2q_u8(uint8_t *a, uint8x16x2_t b) {
11788  vst2q_u8(a, b);
11789}
11790
11791// CHECK-LABEL: define void @test_vst2q_u16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 {
11792// CHECK:   [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
11793// CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
11794// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0
11795// CHECK:   store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
11796// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8*
11797// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8*
11798// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
11799// CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
11800// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
11801// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
11802// CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
11803// CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
11804// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
11805// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1
11806// CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
11807// CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
11808// CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
11809// CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
11810// CHECK:   call void @llvm.aarch64.neon.st2.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i8* [[TMP2]])
11811// CHECK:   ret void
11812void test_vst2q_u16(uint16_t *a, uint16x8x2_t b) {
11813  vst2q_u16(a, b);
11814}
11815
11816// CHECK-LABEL: define void @test_vst2q_u32(i32* %a, [2 x <4 x i32>] %b.coerce) #0 {
11817// CHECK:   [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
11818// CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
11819// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0
11820// CHECK:   store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16
11821// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8*
11822// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8*
11823// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
11824// CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
11825// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
11826// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0
11827// CHECK:   [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
11828// CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
11829// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
11830// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i64 0, i64 1
11831// CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
11832// CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
11833// CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
11834// CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
11835// CHECK:   call void @llvm.aarch64.neon.st2.v4i32.p0i8(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], i8* [[TMP2]])
11836// CHECK:   ret void
11837void test_vst2q_u32(uint32_t *a, uint32x4x2_t b) {
11838  vst2q_u32(a, b);
11839}
11840
11841// CHECK-LABEL: define void @test_vst2q_u64(i64* %a, [2 x <2 x i64>] %b.coerce) #0 {
11842// CHECK:   [[B:%.*]] = alloca %struct.uint64x2x2_t, align 16
11843// CHECK:   [[__S1:%.*]] = alloca %struct.uint64x2x2_t, align 16
11844// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[B]], i32 0, i32 0
11845// CHECK:   store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
11846// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__S1]] to i8*
11847// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x2x2_t* [[B]] to i8*
11848// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
11849// CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
11850// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0
11851// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0
11852// CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
11853// CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
11854// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0
11855// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1
11856// CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
11857// CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
11858// CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
11859// CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
11860// CHECK:   call void @llvm.aarch64.neon.st2.v2i64.p0i8(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i8* [[TMP2]])
11861// CHECK:   ret void
11862void test_vst2q_u64(uint64_t *a, uint64x2x2_t b) {
11863  vst2q_u64(a, b);
11864}
11865
11866// CHECK-LABEL: define void @test_vst2q_s8(i8* %a, [2 x <16 x i8>] %b.coerce) #0 {
11867// CHECK:   [[B:%.*]] = alloca %struct.int8x16x2_t, align 16
11868// CHECK:   [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16
11869// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0
11870// CHECK:   store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
11871// CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__S1]] to i8*
11872// CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x16x2_t* [[B]] to i8*
11873// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
11874// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0
11875// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
11876// CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
11877// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0
11878// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1
11879// CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
11880// CHECK:   call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a)
11881// CHECK:   ret void
11882void test_vst2q_s8(int8_t *a, int8x16x2_t b) {
11883  vst2q_s8(a, b);
11884}
11885
11886// CHECK-LABEL: define void @test_vst2q_s16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 {
11887// CHECK:   [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
11888// CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
11889// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0
11890// CHECK:   store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
11891// CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8*
11892// CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8*
11893// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
11894// CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
11895// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
11896// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
11897// CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
11898// CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
11899// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
11900// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1
11901// CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
11902// CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
11903// CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
11904// CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
11905// CHECK:   call void @llvm.aarch64.neon.st2.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i8* [[TMP2]])
11906// CHECK:   ret void
11907void test_vst2q_s16(int16_t *a, int16x8x2_t b) {
11908  vst2q_s16(a, b);
11909}
11910
11911// CHECK-LABEL: define void @test_vst2q_s32(i32* %a, [2 x <4 x i32>] %b.coerce) #0 {
11912// CHECK:   [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
11913// CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
11914// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0
11915// CHECK:   store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16
11916// CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8*
11917// CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8*
11918// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
11919// CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
11920// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
11921// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0
11922// CHECK:   [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
11923// CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
11924// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
11925// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i64 0, i64 1
11926// CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
11927// CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
11928// CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
11929// CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
11930// CHECK:   call void @llvm.aarch64.neon.st2.v4i32.p0i8(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], i8* [[TMP2]])
11931// CHECK:   ret void
11932void test_vst2q_s32(int32_t *a, int32x4x2_t b) {
11933  vst2q_s32(a, b);
11934}
11935
11936// CHECK-LABEL: define void @test_vst2q_s64(i64* %a, [2 x <2 x i64>] %b.coerce) #0 {
11937// CHECK:   [[B:%.*]] = alloca %struct.int64x2x2_t, align 16
11938// CHECK:   [[__S1:%.*]] = alloca %struct.int64x2x2_t, align 16
11939// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[B]], i32 0, i32 0
11940// CHECK:   store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
11941// CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__S1]] to i8*
11942// CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x2x2_t* [[B]] to i8*
11943// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
11944// CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
11945// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0
11946// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0
11947// CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
11948// CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
11949// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0
11950// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1
11951// CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
11952// CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
11953// CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
11954// CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
11955// CHECK:   call void @llvm.aarch64.neon.st2.v2i64.p0i8(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i8* [[TMP2]])
11956// CHECK:   ret void
11957void test_vst2q_s64(int64_t *a, int64x2x2_t b) {
11958  vst2q_s64(a, b);
11959}
11960
11961// CHECK-LABEL: define void @test_vst2q_f16(half* %a, [2 x <8 x half>] %b.coerce) #0 {
11962// CHECK:   [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
11963// CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
11964// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0
11965// CHECK:   store [2 x <8 x half>] [[B]].coerce, [2 x <8 x half>]* [[COERCE_DIVE]], align 16
11966// CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8*
11967// CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8*
11968// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
11969// CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
11970// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
11971// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i64 0, i64 0
11972// CHECK:   [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
11973// CHECK:   [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
11974// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
11975// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i64 0, i64 1
11976// CHECK:   [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
11977// CHECK:   [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
11978// CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
11979// CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
11980// CHECK:   call void @llvm.aarch64.neon.st2.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i8* [[TMP2]])
11981// CHECK:   ret void
11982void test_vst2q_f16(float16_t *a, float16x8x2_t b) {
11983  vst2q_f16(a, b);
11984}
11985
11986// CHECK-LABEL: define void @test_vst2q_f32(float* %a, [2 x <4 x float>] %b.coerce) #0 {
11987// CHECK:   [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
11988// CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
11989// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0
11990// CHECK:   store [2 x <4 x float>] [[B]].coerce, [2 x <4 x float>]* [[COERCE_DIVE]], align 16
11991// CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8*
11992// CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8*
11993// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
11994// CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
11995// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
11996// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i64 0, i64 0
11997// CHECK:   [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
11998// CHECK:   [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
11999// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
12000// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i64 0, i64 1
12001// CHECK:   [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
12002// CHECK:   [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
12003// CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
12004// CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
12005// CHECK:   call void @llvm.aarch64.neon.st2.v4f32.p0i8(<4 x float> [[TMP7]], <4 x float> [[TMP8]], i8* [[TMP2]])
12006// CHECK:   ret void
12007void test_vst2q_f32(float32_t *a, float32x4x2_t b) {
12008  vst2q_f32(a, b);
12009}
12010
12011// CHECK-LABEL: define void @test_vst2q_f64(double* %a, [2 x <2 x double>] %b.coerce) #0 {
12012// CHECK:   [[B:%.*]] = alloca %struct.float64x2x2_t, align 16
12013// CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16
12014// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[B]], i32 0, i32 0
12015// CHECK:   store [2 x <2 x double>] [[B]].coerce, [2 x <2 x double>]* [[COERCE_DIVE]], align 16
12016// CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__S1]] to i8*
12017// CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x2x2_t* [[B]] to i8*
12018// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
12019// CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
12020// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0
12021// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL]], i64 0, i64 0
12022// CHECK:   [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
12023// CHECK:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
12024// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0
12025// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL1]], i64 0, i64 1
12026// CHECK:   [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
12027// CHECK:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
12028// CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
12029// CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
12030// CHECK:   call void @llvm.aarch64.neon.st2.v2f64.p0i8(<2 x double> [[TMP7]], <2 x double> [[TMP8]], i8* [[TMP2]])
12031// CHECK:   ret void
12032void test_vst2q_f64(float64_t *a, float64x2x2_t b) {
12033  vst2q_f64(a, b);
12034}
12035
12036// CHECK-LABEL: define void @test_vst2q_p8(i8* %a, [2 x <16 x i8>] %b.coerce) #0 {
12037// CHECK:   [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16
12038// CHECK:   [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16
12039// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0
12040// CHECK:   store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
12041// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__S1]] to i8*
12042// CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x16x2_t* [[B]] to i8*
12043// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
12044// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0
12045// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
12046// CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
12047// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0
12048// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1
12049// CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
12050// CHECK:   call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a)
12051// CHECK:   ret void
12052void test_vst2q_p8(poly8_t *a, poly8x16x2_t b) {
12053  vst2q_p8(a, b);
12054}
12055
12056// CHECK-LABEL: define void @test_vst2q_p16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 {
12057// CHECK:   [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
12058// CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
12059// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0
12060// CHECK:   store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
12061// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8*
12062// CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8*
12063// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
12064// CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
12065// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
12066// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
12067// CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
12068// CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
12069// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
12070// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1
12071// CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
12072// CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
12073// CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
12074// CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
12075// CHECK:   call void @llvm.aarch64.neon.st2.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i8* [[TMP2]])
12076// CHECK:   ret void
12077void test_vst2q_p16(poly16_t *a, poly16x8x2_t b) {
12078  vst2q_p16(a, b);
12079}
12080
12081// CHECK-LABEL: define void @test_vst2_u8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 {
12082// CHECK:   [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
12083// CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
12084// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0
12085// CHECK:   store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
12086// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8*
12087// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8*
12088// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
12089// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
12090// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
12091// CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
12092// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
12093// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1
12094// CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
12095// CHECK:   call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a)
12096// CHECK:   ret void
12097void test_vst2_u8(uint8_t *a, uint8x8x2_t b) {
12098  vst2_u8(a, b);
12099}
12100
12101// CHECK-LABEL: define void @test_vst2_u16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 {
12102// CHECK:   [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
12103// CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
12104// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0
12105// CHECK:   store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
12106// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8*
12107// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8*
12108// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
12109// CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
12110// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
12111// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
12112// CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
12113// CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
12114// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
12115// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1
12116// CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
12117// CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
12118// CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
12119// CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
12120// CHECK:   call void @llvm.aarch64.neon.st2.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i8* [[TMP2]])
12121// CHECK:   ret void
12122void test_vst2_u16(uint16_t *a, uint16x4x2_t b) {
12123  vst2_u16(a, b);
12124}
12125
12126// CHECK-LABEL: define void @test_vst2_u32(i32* %a, [2 x <2 x i32>] %b.coerce) #0 {
12127// CHECK:   [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
12128// CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
12129// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0
12130// CHECK:   store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8
12131// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8*
12132// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8*
12133// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
12134// CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
12135// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
12136// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0
12137// CHECK:   [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
12138// CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
12139// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
12140// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i64 0, i64 1
12141// CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
12142// CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
12143// CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
12144// CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
12145// CHECK:   call void @llvm.aarch64.neon.st2.v2i32.p0i8(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i8* [[TMP2]])
12146// CHECK:   ret void
12147void test_vst2_u32(uint32_t *a, uint32x2x2_t b) {
12148  vst2_u32(a, b);
12149}
12150
12151// CHECK-LABEL: define void @test_vst2_u64(i64* %a, [2 x <1 x i64>] %b.coerce) #0 {
12152// CHECK:   [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8
12153// CHECK:   [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8
12154// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[B]], i32 0, i32 0
12155// CHECK:   store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
12156// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__S1]] to i8*
12157// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x1x2_t* [[B]] to i8*
12158// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
12159// CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
12160// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0
12161// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0
12162// CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
12163// CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
12164// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0
12165// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1
12166// CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
12167// CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
12168// CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
12169// CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
12170// CHECK:   call void @llvm.aarch64.neon.st2.v1i64.p0i8(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i8* [[TMP2]])
12171// CHECK:   ret void
12172void test_vst2_u64(uint64_t *a, uint64x1x2_t b) {
12173  vst2_u64(a, b);
12174}
12175
12176// CHECK-LABEL: define void @test_vst2_s8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 {
12177// CHECK:   [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
12178// CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
12179// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0
12180// CHECK:   store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
12181// CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8*
12182// CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8*
12183// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
12184// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
12185// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
12186// CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
12187// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
12188// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1
12189// CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
12190// CHECK:   call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a)
12191// CHECK:   ret void
12192void test_vst2_s8(int8_t *a, int8x8x2_t b) {
12193  vst2_s8(a, b);
12194}
12195
12196// CHECK-LABEL: define void @test_vst2_s16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 {
12197// CHECK:   [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
12198// CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
12199// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0
12200// CHECK:   store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
12201// CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8*
12202// CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8*
12203// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
12204// CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
12205// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
12206// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
12207// CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
12208// CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
12209// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
12210// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1
12211// CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
12212// CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
12213// CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
12214// CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
12215// CHECK:   call void @llvm.aarch64.neon.st2.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i8* [[TMP2]])
12216// CHECK:   ret void
12217void test_vst2_s16(int16_t *a, int16x4x2_t b) {
12218  vst2_s16(a, b);
12219}
12220
12221// CHECK-LABEL: define void @test_vst2_s32(i32* %a, [2 x <2 x i32>] %b.coerce) #0 {
12222// CHECK:   [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
12223// CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
12224// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0
12225// CHECK:   store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8
12226// CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8*
12227// CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8*
12228// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
12229// CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
12230// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
12231// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0
12232// CHECK:   [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
12233// CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
12234// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
12235// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i64 0, i64 1
12236// CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
12237// CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
12238// CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
12239// CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
12240// CHECK:   call void @llvm.aarch64.neon.st2.v2i32.p0i8(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i8* [[TMP2]])
12241// CHECK:   ret void
12242void test_vst2_s32(int32_t *a, int32x2x2_t b) {
12243  vst2_s32(a, b);
12244}
12245
12246// CHECK-LABEL: define void @test_vst2_s64(i64* %a, [2 x <1 x i64>] %b.coerce) #0 {
12247// CHECK:   [[B:%.*]] = alloca %struct.int64x1x2_t, align 8
12248// CHECK:   [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8
12249// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[B]], i32 0, i32 0
12250// CHECK:   store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
12251// CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__S1]] to i8*
12252// CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x1x2_t* [[B]] to i8*
12253// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
12254// CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
12255// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0
12256// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0
12257// CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
12258// CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
12259// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0
12260// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1
12261// CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
12262// CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
12263// CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
12264// CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
12265// CHECK:   call void @llvm.aarch64.neon.st2.v1i64.p0i8(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i8* [[TMP2]])
12266// CHECK:   ret void
12267void test_vst2_s64(int64_t *a, int64x1x2_t b) {
12268  vst2_s64(a, b);
12269}
12270
12271// CHECK-LABEL: define void @test_vst2_f16(half* %a, [2 x <4 x half>] %b.coerce) #0 {
12272// CHECK:   [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
12273// CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
12274// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0
12275// CHECK:   store [2 x <4 x half>] [[B]].coerce, [2 x <4 x half>]* [[COERCE_DIVE]], align 8
12276// CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8*
12277// CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8*
12278// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
12279// CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
12280// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
12281// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i64 0, i64 0
12282// CHECK:   [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
12283// CHECK:   [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
12284// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
12285// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i64 0, i64 1
12286// CHECK:   [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
12287// CHECK:   [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
12288// CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
12289// CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
12290// CHECK:   call void @llvm.aarch64.neon.st2.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i8* [[TMP2]])
12291// CHECK:   ret void
12292void test_vst2_f16(float16_t *a, float16x4x2_t b) {
12293  vst2_f16(a, b);
12294}
12295
12296// CHECK-LABEL: define void @test_vst2_f32(float* %a, [2 x <2 x float>] %b.coerce) #0 {
12297// CHECK:   [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
12298// CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
12299// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0
12300// CHECK:   store [2 x <2 x float>] [[B]].coerce, [2 x <2 x float>]* [[COERCE_DIVE]], align 8
12301// CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8*
12302// CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8*
12303// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
12304// CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
12305// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
12306// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i64 0, i64 0
12307// CHECK:   [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
12308// CHECK:   [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
12309// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
12310// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL1]], i64 0, i64 1
12311// CHECK:   [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
12312// CHECK:   [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
12313// CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
12314// CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
12315// CHECK:   call void @llvm.aarch64.neon.st2.v2f32.p0i8(<2 x float> [[TMP7]], <2 x float> [[TMP8]], i8* [[TMP2]])
12316// CHECK:   ret void
12317void test_vst2_f32(float32_t *a, float32x2x2_t b) {
12318  vst2_f32(a, b);
12319}
12320
12321// CHECK-LABEL: define void @test_vst2_f64(double* %a, [2 x <1 x double>] %b.coerce) #0 {
12322// CHECK:   [[B:%.*]] = alloca %struct.float64x1x2_t, align 8
12323// CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8
12324// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[B]], i32 0, i32 0
12325// CHECK:   store [2 x <1 x double>] [[B]].coerce, [2 x <1 x double>]* [[COERCE_DIVE]], align 8
12326// CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__S1]] to i8*
12327// CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x1x2_t* [[B]] to i8*
12328// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
12329// CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
12330// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0
12331// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL]], i64 0, i64 0
12332// CHECK:   [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
12333// CHECK:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
12334// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0
12335// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL1]], i64 0, i64 1
12336// CHECK:   [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
12337// CHECK:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
12338// CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
12339// CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
12340// CHECK:   call void @llvm.aarch64.neon.st2.v1f64.p0i8(<1 x double> [[TMP7]], <1 x double> [[TMP8]], i8* [[TMP2]])
12341// CHECK:   ret void
12342void test_vst2_f64(float64_t *a, float64x1x2_t b) {
12343  vst2_f64(a, b);
12344}
12345
12346// CHECK-LABEL: define void @test_vst2_p8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 {
12347// CHECK:   [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
12348// CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
12349// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0
12350// CHECK:   store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
12351// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8*
12352// CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8*
12353// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
12354// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
12355// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
12356// CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
12357// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
12358// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1
12359// CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
12360// CHECK:   call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a)
12361// CHECK:   ret void
12362void test_vst2_p8(poly8_t *a, poly8x8x2_t b) {
12363  vst2_p8(a, b);
12364}
12365
12366// CHECK-LABEL: define void @test_vst2_p16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 {
12367// CHECK:   [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
12368// CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
12369// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0
12370// CHECK:   store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
12371// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8*
12372// CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8*
12373// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
12374// CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
12375// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
12376// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
12377// CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
12378// CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
12379// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
12380// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1
12381// CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
12382// CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
12383// CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
12384// CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
12385// CHECK:   call void @llvm.aarch64.neon.st2.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i8* [[TMP2]])
12386// CHECK:   ret void
12387void test_vst2_p16(poly16_t *a, poly16x4x2_t b) {
12388  vst2_p16(a, b);
12389}
12390
12391// CHECK-LABEL: define void @test_vst3q_u8(i8* %a, [3 x <16 x i8>] %b.coerce) #0 {
12392// CHECK:   [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16
12393// CHECK:   [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16
12394// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0
12395// CHECK:   store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
12396// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__S1]] to i8*
12397// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x16x3_t* [[B]] to i8*
12398// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
12399// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
12400// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
12401// CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
12402// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
12403// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1
12404// CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
12405// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
12406// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2
12407// CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
12408// CHECK:   call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a)
12409// CHECK:   ret void
12410void test_vst3q_u8(uint8_t *a, uint8x16x3_t b) {
12411  vst3q_u8(a, b);
12412}
12413
12414// CHECK-LABEL: define void @test_vst3q_u16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 {
12415// CHECK:   [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
12416// CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
12417// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0
12418// CHECK:   store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
12419// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8*
12420// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8*
12421// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
12422// CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
12423// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
12424// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
12425// CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
12426// CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
12427// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
12428// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1
12429// CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
12430// CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
12431// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
12432// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2
12433// CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
12434// CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
12435// CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
12436// CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
12437// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
12438// CHECK:   call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i8* [[TMP2]])
12439// CHECK:   ret void
12440void test_vst3q_u16(uint16_t *a, uint16x8x3_t b) {
12441  vst3q_u16(a, b);
12442}
12443
12444// CHECK-LABEL: define void @test_vst3q_u32(i32* %a, [3 x <4 x i32>] %b.coerce) #0 {
12445// CHECK:   [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
12446// CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
12447// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0
12448// CHECK:   store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16
12449// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8*
12450// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8*
12451// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
12452// CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
12453// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
12454// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0
12455// CHECK:   [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
12456// CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
12457// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
12458// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i64 0, i64 1
12459// CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
12460// CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
12461// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
12462// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i64 0, i64 2
12463// CHECK:   [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
12464// CHECK:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
12465// CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
12466// CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
12467// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
12468// CHECK:   call void @llvm.aarch64.neon.st3.v4i32.p0i8(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i8* [[TMP2]])
12469// CHECK:   ret void
12470void test_vst3q_u32(uint32_t *a, uint32x4x3_t b) {
12471  vst3q_u32(a, b);
12472}
12473
12474// CHECK-LABEL: define void @test_vst3q_u64(i64* %a, [3 x <2 x i64>] %b.coerce) #0 {
12475// CHECK:   [[B:%.*]] = alloca %struct.uint64x2x3_t, align 16
12476// CHECK:   [[__S1:%.*]] = alloca %struct.uint64x2x3_t, align 16
12477// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[B]], i32 0, i32 0
12478// CHECK:   store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
12479// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__S1]] to i8*
12480// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x2x3_t* [[B]] to i8*
12481// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
12482// CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
12483// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0
12484// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0
12485// CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
12486// CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
12487// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0
12488// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1
12489// CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
12490// CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
12491// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0
12492// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2
12493// CHECK:   [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
12494// CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
12495// CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
12496// CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
12497// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
12498// CHECK:   call void @llvm.aarch64.neon.st3.v2i64.p0i8(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i8* [[TMP2]])
12499// CHECK:   ret void
12500void test_vst3q_u64(uint64_t *a, uint64x2x3_t b) {
12501  vst3q_u64(a, b);
12502}
12503
12504// CHECK-LABEL: define void @test_vst3q_s8(i8* %a, [3 x <16 x i8>] %b.coerce) #0 {
12505// CHECK:   [[B:%.*]] = alloca %struct.int8x16x3_t, align 16
12506// CHECK:   [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16
12507// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0
12508// CHECK:   store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
12509// CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__S1]] to i8*
12510// CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x16x3_t* [[B]] to i8*
12511// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
12512// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
12513// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
12514// CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
12515// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
12516// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1
12517// CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
12518// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
12519// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2
12520// CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
12521// CHECK:   call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a)
12522// CHECK:   ret void
12523void test_vst3q_s8(int8_t *a, int8x16x3_t b) {
12524  vst3q_s8(a, b);
12525}
12526
12527// CHECK-LABEL: define void @test_vst3q_s16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 {
12528// CHECK:   [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
12529// CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
12530// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0
12531// CHECK:   store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
12532// CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8*
12533// CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8*
12534// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
12535// CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
12536// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
12537// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
12538// CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
12539// CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
12540// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
12541// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1
12542// CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
12543// CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
12544// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
12545// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2
12546// CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
12547// CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
12548// CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
12549// CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
12550// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
12551// CHECK:   call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i8* [[TMP2]])
12552// CHECK:   ret void
12553void test_vst3q_s16(int16_t *a, int16x8x3_t b) {
12554  vst3q_s16(a, b);
12555}
12556
12557// CHECK-LABEL: define void @test_vst3q_s32(i32* %a, [3 x <4 x i32>] %b.coerce) #0 {
12558// CHECK:   [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
12559// CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
12560// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0
12561// CHECK:   store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16
12562// CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8*
12563// CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8*
12564// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
12565// CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
12566// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
12567// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0
12568// CHECK:   [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
12569// CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
12570// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
12571// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i64 0, i64 1
12572// CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
12573// CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
12574// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
12575// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i64 0, i64 2
12576// CHECK:   [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
12577// CHECK:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
12578// CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
12579// CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
12580// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
12581// CHECK:   call void @llvm.aarch64.neon.st3.v4i32.p0i8(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i8* [[TMP2]])
12582// CHECK:   ret void
12583void test_vst3q_s32(int32_t *a, int32x4x3_t b) {
12584  vst3q_s32(a, b);
12585}
12586
12587// CHECK-LABEL: define void @test_vst3q_s64(i64* %a, [3 x <2 x i64>] %b.coerce) #0 {
12588// CHECK:   [[B:%.*]] = alloca %struct.int64x2x3_t, align 16
12589// CHECK:   [[__S1:%.*]] = alloca %struct.int64x2x3_t, align 16
12590// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[B]], i32 0, i32 0
12591// CHECK:   store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
12592// CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__S1]] to i8*
12593// CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x2x3_t* [[B]] to i8*
12594// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
12595// CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
12596// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0
12597// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0
12598// CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
12599// CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
12600// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0
12601// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1
12602// CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
12603// CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
12604// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0
12605// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2
12606// CHECK:   [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
12607// CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
12608// CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
12609// CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
12610// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
12611// CHECK:   call void @llvm.aarch64.neon.st3.v2i64.p0i8(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i8* [[TMP2]])
12612// CHECK:   ret void
12613void test_vst3q_s64(int64_t *a, int64x2x3_t b) {
12614  vst3q_s64(a, b);
12615}
12616
12617// CHECK-LABEL: define void @test_vst3q_f16(half* %a, [3 x <8 x half>] %b.coerce) #0 {
12618// CHECK:   [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
12619// CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
12620// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0
12621// CHECK:   store [3 x <8 x half>] [[B]].coerce, [3 x <8 x half>]* [[COERCE_DIVE]], align 16
12622// CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8*
12623// CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8*
12624// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
12625// CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
12626// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
12627// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i64 0, i64 0
12628// CHECK:   [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
12629// CHECK:   [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
12630// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
12631// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i64 0, i64 1
12632// CHECK:   [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
12633// CHECK:   [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
12634// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
12635// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i64 0, i64 2
12636// CHECK:   [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
12637// CHECK:   [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
12638// CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
12639// CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
12640// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
12641// CHECK:   call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i8* [[TMP2]])
12642// CHECK:   ret void
12643void test_vst3q_f16(float16_t *a, float16x8x3_t b) {
12644  vst3q_f16(a, b);
12645}
12646
12647// CHECK-LABEL: define void @test_vst3q_f32(float* %a, [3 x <4 x float>] %b.coerce) #0 {
12648// CHECK:   [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
12649// CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
12650// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0
12651// CHECK:   store [3 x <4 x float>] [[B]].coerce, [3 x <4 x float>]* [[COERCE_DIVE]], align 16
12652// CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8*
12653// CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8*
12654// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
12655// CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
12656// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
12657// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i64 0, i64 0
12658// CHECK:   [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
12659// CHECK:   [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
12660// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
12661// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i64 0, i64 1
12662// CHECK:   [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
12663// CHECK:   [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
12664// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
12665// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i64 0, i64 2
12666// CHECK:   [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
12667// CHECK:   [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
12668// CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
12669// CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
12670// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
12671// CHECK:   call void @llvm.aarch64.neon.st3.v4f32.p0i8(<4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], i8* [[TMP2]])
12672// CHECK:   ret void
12673void test_vst3q_f32(float32_t *a, float32x4x3_t b) {
12674  vst3q_f32(a, b);
12675}
12676
12677// CHECK-LABEL: define void @test_vst3q_f64(double* %a, [3 x <2 x double>] %b.coerce) #0 {
12678// CHECK:   [[B:%.*]] = alloca %struct.float64x2x3_t, align 16
12679// CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16
12680// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[B]], i32 0, i32 0
12681// CHECK:   store [3 x <2 x double>] [[B]].coerce, [3 x <2 x double>]* [[COERCE_DIVE]], align 16
12682// CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__S1]] to i8*
12683// CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x2x3_t* [[B]] to i8*
12684// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
12685// CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
12686// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
12687// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL]], i64 0, i64 0
12688// CHECK:   [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
12689// CHECK:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
12690// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
12691// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL1]], i64 0, i64 1
12692// CHECK:   [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
12693// CHECK:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
12694// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
12695// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL3]], i64 0, i64 2
12696// CHECK:   [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16
12697// CHECK:   [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
12698// CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
12699// CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
12700// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
12701// CHECK:   call void @llvm.aarch64.neon.st3.v2f64.p0i8(<2 x double> [[TMP9]], <2 x double> [[TMP10]], <2 x double> [[TMP11]], i8* [[TMP2]])
12702// CHECK:   ret void
12703void test_vst3q_f64(float64_t *a, float64x2x3_t b) {
12704  vst3q_f64(a, b);
12705}
12706
12707// CHECK-LABEL: define void @test_vst3q_p8(i8* %a, [3 x <16 x i8>] %b.coerce) #0 {
12708// CHECK:   [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16
12709// CHECK:   [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16
12710// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0
12711// CHECK:   store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
12712// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__S1]] to i8*
12713// CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x16x3_t* [[B]] to i8*
12714// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
12715// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
12716// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
12717// CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
12718// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
12719// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1
12720// CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
12721// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
12722// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2
12723// CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
12724// CHECK:   call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a)
12725// CHECK:   ret void
12726void test_vst3q_p8(poly8_t *a, poly8x16x3_t b) {
12727  vst3q_p8(a, b);
12728}
12729
12730// CHECK-LABEL: define void @test_vst3q_p16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 {
12731// CHECK:   [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
12732// CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
12733// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0
12734// CHECK:   store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
12735// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8*
12736// CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8*
12737// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
12738// CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
12739// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
12740// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
12741// CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
12742// CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
12743// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
12744// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1
12745// CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
12746// CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
12747// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
12748// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2
12749// CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
12750// CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
12751// CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
12752// CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
12753// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
12754// CHECK:   call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i8* [[TMP2]])
12755// CHECK:   ret void
12756void test_vst3q_p16(poly16_t *a, poly16x8x3_t b) {
12757  vst3q_p16(a, b);
12758}
12759
12760// CHECK-LABEL: define void @test_vst3_u8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 {
12761// CHECK:   [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
12762// CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
12763// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0
12764// CHECK:   store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
12765// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8*
12766// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8*
12767// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
12768// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
12769// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
12770// CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
12771// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
12772// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1
12773// CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
12774// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
12775// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2
12776// CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
12777// CHECK:   call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a)
12778// CHECK:   ret void
12779void test_vst3_u8(uint8_t *a, uint8x8x3_t b) {
12780  vst3_u8(a, b);
12781}
12782
12783// CHECK-LABEL: define void @test_vst3_u16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 {
12784// CHECK:   [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
12785// CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
12786// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0
12787// CHECK:   store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
12788// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8*
12789// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8*
12790// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
12791// CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
12792// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
12793// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
12794// CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
12795// CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
12796// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
12797// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1
12798// CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
12799// CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
12800// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
12801// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2
12802// CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
12803// CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
12804// CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
12805// CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
12806// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
12807// CHECK:   call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i8* [[TMP2]])
12808// CHECK:   ret void
12809void test_vst3_u16(uint16_t *a, uint16x4x3_t b) {
12810  vst3_u16(a, b);
12811}
12812
12813// CHECK-LABEL: define void @test_vst3_u32(i32* %a, [3 x <2 x i32>] %b.coerce) #0 {
12814// CHECK:   [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
12815// CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
12816// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0
12817// CHECK:   store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8
12818// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8*
12819// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8*
12820// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
12821// CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
12822// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
12823// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0
12824// CHECK:   [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
12825// CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
12826// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
12827// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i64 0, i64 1
12828// CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
12829// CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
12830// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
12831// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i64 0, i64 2
12832// CHECK:   [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
12833// CHECK:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
12834// CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
12835// CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
12836// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
12837// CHECK:   call void @llvm.aarch64.neon.st3.v2i32.p0i8(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i8* [[TMP2]])
12838// CHECK:   ret void
12839void test_vst3_u32(uint32_t *a, uint32x2x3_t b) {
12840  vst3_u32(a, b);
12841}
12842
12843// CHECK-LABEL: define void @test_vst3_u64(i64* %a, [3 x <1 x i64>] %b.coerce) #0 {
12844// CHECK:   [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8
12845// CHECK:   [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8
12846// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[B]], i32 0, i32 0
12847// CHECK:   store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
12848// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__S1]] to i8*
12849// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x1x3_t* [[B]] to i8*
12850// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
12851// CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
12852// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
12853// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0
12854// CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
12855// CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
12856// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
12857// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1
12858// CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
12859// CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
12860// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
12861// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2
12862// CHECK:   [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
12863// CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
12864// CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
12865// CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
12866// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
12867// CHECK:   call void @llvm.aarch64.neon.st3.v1i64.p0i8(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i8* [[TMP2]])
12868// CHECK:   ret void
12869void test_vst3_u64(uint64_t *a, uint64x1x3_t b) {
12870  vst3_u64(a, b);
12871}
12872
12873// CHECK-LABEL: define void @test_vst3_s8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 {
12874// CHECK:   [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
12875// CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
12876// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0
12877// CHECK:   store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
12878// CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8*
12879// CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8*
12880// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
12881// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
12882// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
12883// CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
12884// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
12885// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1
12886// CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
12887// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
12888// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2
12889// CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
12890// CHECK:   call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a)
12891// CHECK:   ret void
12892void test_vst3_s8(int8_t *a, int8x8x3_t b) {
12893  vst3_s8(a, b);
12894}
12895
12896// CHECK-LABEL: define void @test_vst3_s16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 {
12897// CHECK:   [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
12898// CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
12899// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0
12900// CHECK:   store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
12901// CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8*
12902// CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8*
12903// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
12904// CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
12905// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
12906// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
12907// CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
12908// CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
12909// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
12910// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1
12911// CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
12912// CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
12913// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
12914// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2
12915// CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
12916// CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
12917// CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
12918// CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
12919// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
12920// CHECK:   call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i8* [[TMP2]])
12921// CHECK:   ret void
12922void test_vst3_s16(int16_t *a, int16x4x3_t b) {
12923  vst3_s16(a, b);
12924}
12925
12926// CHECK-LABEL: define void @test_vst3_s32(i32* %a, [3 x <2 x i32>] %b.coerce) #0 {
12927// CHECK:   [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
12928// CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
12929// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0
12930// CHECK:   store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8
12931// CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8*
12932// CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8*
12933// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
12934// CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
12935// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
12936// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0
12937// CHECK:   [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
12938// CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
12939// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
12940// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i64 0, i64 1
12941// CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
12942// CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
12943// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
12944// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i64 0, i64 2
12945// CHECK:   [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
12946// CHECK:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
12947// CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
12948// CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
12949// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
12950// CHECK:   call void @llvm.aarch64.neon.st3.v2i32.p0i8(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i8* [[TMP2]])
12951// CHECK:   ret void
12952void test_vst3_s32(int32_t *a, int32x2x3_t b) {
12953  vst3_s32(a, b);
12954}
12955
12956// CHECK-LABEL: define void @test_vst3_s64(i64* %a, [3 x <1 x i64>] %b.coerce) #0 {
12957// CHECK:   [[B:%.*]] = alloca %struct.int64x1x3_t, align 8
12958// CHECK:   [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8
12959// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[B]], i32 0, i32 0
12960// CHECK:   store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
12961// CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__S1]] to i8*
12962// CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x1x3_t* [[B]] to i8*
12963// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
12964// CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
12965// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
12966// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0
12967// CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
12968// CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
12969// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
12970// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1
12971// CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
12972// CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
12973// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
12974// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2
12975// CHECK:   [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
12976// CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
12977// CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
12978// CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
12979// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
12980// CHECK:   call void @llvm.aarch64.neon.st3.v1i64.p0i8(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i8* [[TMP2]])
12981// CHECK:   ret void
12982void test_vst3_s64(int64_t *a, int64x1x3_t b) {
12983  vst3_s64(a, b);
12984}
12985
12986// CHECK-LABEL: define void @test_vst3_f16(half* %a, [3 x <4 x half>] %b.coerce) #0 {
12987// CHECK:   [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
12988// CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
12989// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0
12990// CHECK:   store [3 x <4 x half>] [[B]].coerce, [3 x <4 x half>]* [[COERCE_DIVE]], align 8
12991// CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8*
12992// CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8*
12993// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
12994// CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
12995// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
12996// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i64 0, i64 0
12997// CHECK:   [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
12998// CHECK:   [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
12999// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
13000// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL1]], i64 0, i64 1
13001// CHECK:   [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
13002// CHECK:   [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
13003// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
13004// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i64 0, i64 2
13005// CHECK:   [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
13006// CHECK:   [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
13007// CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
13008// CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
13009// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
13010// CHECK:   call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i8* [[TMP2]])
13011// CHECK:   ret void
13012void test_vst3_f16(float16_t *a, float16x4x3_t b) {
13013  vst3_f16(a, b);
13014}
13015
13016// CHECK-LABEL: define void @test_vst3_f32(float* %a, [3 x <2 x float>] %b.coerce) #0 {
13017// CHECK:   [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
13018// CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
13019// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0
13020// CHECK:   store [3 x <2 x float>] [[B]].coerce, [3 x <2 x float>]* [[COERCE_DIVE]], align 8
13021// CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8*
13022// CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8*
13023// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
13024// CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
13025// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
13026// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i64 0, i64 0
13027// CHECK:   [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
13028// CHECK:   [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
13029// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
13030// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL1]], i64 0, i64 1
13031// CHECK:   [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
13032// CHECK:   [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
13033// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
13034// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL3]], i64 0, i64 2
13035// CHECK:   [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
13036// CHECK:   [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
13037// CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
13038// CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
13039// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
13040// CHECK:   call void @llvm.aarch64.neon.st3.v2f32.p0i8(<2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], i8* [[TMP2]])
13041// CHECK:   ret void
13042void test_vst3_f32(float32_t *a, float32x2x3_t b) {
13043  vst3_f32(a, b);
13044}
13045
13046// CHECK-LABEL: define void @test_vst3_f64(double* %a, [3 x <1 x double>] %b.coerce) #0 {
13047// CHECK:   [[B:%.*]] = alloca %struct.float64x1x3_t, align 8
13048// CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8
13049// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[B]], i32 0, i32 0
13050// CHECK:   store [3 x <1 x double>] [[B]].coerce, [3 x <1 x double>]* [[COERCE_DIVE]], align 8
13051// CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__S1]] to i8*
13052// CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x1x3_t* [[B]] to i8*
13053// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
13054// CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
13055// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
13056// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL]], i64 0, i64 0
13057// CHECK:   [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
13058// CHECK:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
13059// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
13060// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL1]], i64 0, i64 1
13061// CHECK:   [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
13062// CHECK:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
13063// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
13064// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL3]], i64 0, i64 2
13065// CHECK:   [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8
13066// CHECK:   [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
13067// CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
13068// CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
13069// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
13070// CHECK:   call void @llvm.aarch64.neon.st3.v1f64.p0i8(<1 x double> [[TMP9]], <1 x double> [[TMP10]], <1 x double> [[TMP11]], i8* [[TMP2]])
13071// CHECK:   ret void
13072void test_vst3_f64(float64_t *a, float64x1x3_t b) {
13073  vst3_f64(a, b);
13074}
13075
13076// CHECK-LABEL: define void @test_vst3_p8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 {
13077// CHECK:   [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
13078// CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
13079// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0
13080// CHECK:   store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
13081// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8*
13082// CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8*
13083// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
13084// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
13085// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
13086// CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
13087// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
13088// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1
13089// CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
13090// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
13091// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2
13092// CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
13093// CHECK:   call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a)
13094// CHECK:   ret void
13095void test_vst3_p8(poly8_t *a, poly8x8x3_t b) {
13096  vst3_p8(a, b);
13097}
13098
13099// CHECK-LABEL: define void @test_vst3_p16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 {
13100// CHECK:   [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
13101// CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
13102// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0
13103// CHECK:   store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
13104// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8*
13105// CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8*
13106// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
13107// CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
13108// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
13109// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
13110// CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
13111// CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
13112// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
13113// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1
13114// CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
13115// CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
13116// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
13117// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2
13118// CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
13119// CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
13120// CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
13121// CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
13122// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
13123// CHECK:   call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i8* [[TMP2]])
13124// CHECK:   ret void
13125void test_vst3_p16(poly16_t *a, poly16x4x3_t b) {
13126  vst3_p16(a, b);
13127}
13128
13129// CHECK-LABEL: define void @test_vst4q_u8(i8* %a, [4 x <16 x i8>] %b.coerce) #0 {
13130// CHECK:   [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16
13131// CHECK:   [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16
13132// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0
13133// CHECK:   store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
13134// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__S1]] to i8*
13135// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x16x4_t* [[B]] to i8*
13136// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
13137// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
13138// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
13139// CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
13140// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
13141// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1
13142// CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
13143// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
13144// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2
13145// CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
13146// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
13147// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3
13148// CHECK:   [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
13149// CHECK:   call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a)
13150// CHECK:   ret void
13151void test_vst4q_u8(uint8_t *a, uint8x16x4_t b) {
13152  vst4q_u8(a, b);
13153}
13154
13155// CHECK-LABEL: define void @test_vst4q_u16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 {
13156// CHECK:   [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
13157// CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
13158// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0
13159// CHECK:   store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
13160// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8*
13161// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8*
13162// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
13163// CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
13164// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
13165// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
13166// CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
13167// CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
13168// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
13169// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1
13170// CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
13171// CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
13172// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
13173// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2
13174// CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
13175// CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
13176// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
13177// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3
13178// CHECK:   [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
13179// CHECK:   [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
13180// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
13181// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
13182// CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
13183// CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
13184// CHECK:   call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i8* [[TMP2]])
13185// CHECK:   ret void
13186void test_vst4q_u16(uint16_t *a, uint16x8x4_t b) {
13187  vst4q_u16(a, b);
13188}
13189
13190// CHECK-LABEL: define void @test_vst4q_u32(i32* %a, [4 x <4 x i32>] %b.coerce) #0 {
13191// CHECK:   [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
13192// CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
13193// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0
13194// CHECK:   store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16
13195// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8*
13196// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8*
13197// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
13198// CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
13199// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
13200// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0
13201// CHECK:   [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
13202// CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
13203// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
13204// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i64 0, i64 1
13205// CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
13206// CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
13207// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
13208// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i64 0, i64 2
13209// CHECK:   [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
13210// CHECK:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
13211// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
13212// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i64 0, i64 3
13213// CHECK:   [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
13214// CHECK:   [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
13215// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
13216// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
13217// CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
13218// CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
13219// CHECK:   call void @llvm.aarch64.neon.st4.v4i32.p0i8(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], i8* [[TMP2]])
13220// CHECK:   ret void
13221void test_vst4q_u32(uint32_t *a, uint32x4x4_t b) {
13222  vst4q_u32(a, b);
13223}
13224
13225// CHECK-LABEL: define void @test_vst4q_u64(i64* %a, [4 x <2 x i64>] %b.coerce) #0 {
13226// CHECK:   [[B:%.*]] = alloca %struct.uint64x2x4_t, align 16
13227// CHECK:   [[__S1:%.*]] = alloca %struct.uint64x2x4_t, align 16
13228// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[B]], i32 0, i32 0
13229// CHECK:   store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
13230// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__S1]] to i8*
13231// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x2x4_t* [[B]] to i8*
13232// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
13233// CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
13234// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
13235// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0
13236// CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
13237// CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
13238// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
13239// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1
13240// CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
13241// CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
13242// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
13243// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2
13244// CHECK:   [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
13245// CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
13246// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
13247// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3
13248// CHECK:   [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16
13249// CHECK:   [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
13250// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
13251// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
13252// CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
13253// CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
13254// CHECK:   call void @llvm.aarch64.neon.st4.v2i64.p0i8(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i8* [[TMP2]])
13255// CHECK:   ret void
13256void test_vst4q_u64(uint64_t *a, uint64x2x4_t b) {
13257  vst4q_u64(a, b);
13258}
13259
13260// CHECK-LABEL: define void @test_vst4q_s8(i8* %a, [4 x <16 x i8>] %b.coerce) #0 {
13261// CHECK:   [[B:%.*]] = alloca %struct.int8x16x4_t, align 16
13262// CHECK:   [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16
13263// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0
13264// CHECK:   store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
13265// CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__S1]] to i8*
13266// CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x16x4_t* [[B]] to i8*
13267// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
13268// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
13269// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
13270// CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
13271// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
13272// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1
13273// CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
13274// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
13275// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2
13276// CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
13277// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
13278// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3
13279// CHECK:   [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
13280// CHECK:   call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a)
13281// CHECK:   ret void
13282void test_vst4q_s8(int8_t *a, int8x16x4_t b) {
13283  vst4q_s8(a, b);
13284}
13285
13286// CHECK-LABEL: define void @test_vst4q_s16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 {
13287// CHECK:   [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
13288// CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
13289// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0
13290// CHECK:   store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
13291// CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8*
13292// CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8*
13293// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
13294// CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
13295// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
13296// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
13297// CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
13298// CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
13299// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
13300// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1
13301// CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
13302// CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
13303// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
13304// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2
13305// CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
13306// CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
13307// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
13308// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3
13309// CHECK:   [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
13310// CHECK:   [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
13311// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
13312// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
13313// CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
13314// CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
13315// CHECK:   call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i8* [[TMP2]])
13316// CHECK:   ret void
13317void test_vst4q_s16(int16_t *a, int16x8x4_t b) {
13318  vst4q_s16(a, b);
13319}
13320
13321// CHECK-LABEL: define void @test_vst4q_s32(i32* %a, [4 x <4 x i32>] %b.coerce) #0 {
13322// CHECK:   [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
13323// CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
13324// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0
13325// CHECK:   store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16
13326// CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8*
13327// CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8*
13328// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
13329// CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
13330// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
13331// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0
13332// CHECK:   [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
13333// CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
13334// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
13335// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i64 0, i64 1
13336// CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
13337// CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
13338// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
13339// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i64 0, i64 2
13340// CHECK:   [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
13341// CHECK:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
13342// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
13343// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i64 0, i64 3
13344// CHECK:   [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
13345// CHECK:   [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
13346// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
13347// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
13348// CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
13349// CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
13350// CHECK:   call void @llvm.aarch64.neon.st4.v4i32.p0i8(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], i8* [[TMP2]])
13351// CHECK:   ret void
13352void test_vst4q_s32(int32_t *a, int32x4x4_t b) {
13353  vst4q_s32(a, b);
13354}
13355
13356// CHECK-LABEL: define void @test_vst4q_s64(i64* %a, [4 x <2 x i64>] %b.coerce) #0 {
13357// CHECK:   [[B:%.*]] = alloca %struct.int64x2x4_t, align 16
13358// CHECK:   [[__S1:%.*]] = alloca %struct.int64x2x4_t, align 16
13359// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[B]], i32 0, i32 0
13360// CHECK:   store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
13361// CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__S1]] to i8*
13362// CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x2x4_t* [[B]] to i8*
13363// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
13364// CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
13365// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
13366// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0
13367// CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
13368// CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
13369// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
13370// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1
13371// CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
13372// CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
13373// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
13374// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2
13375// CHECK:   [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
13376// CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
13377// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
13378// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3
13379// CHECK:   [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16
13380// CHECK:   [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
13381// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
13382// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
13383// CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
13384// CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
13385// CHECK:   call void @llvm.aarch64.neon.st4.v2i64.p0i8(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i8* [[TMP2]])
13386// CHECK:   ret void
13387void test_vst4q_s64(int64_t *a, int64x2x4_t b) {
13388  vst4q_s64(a, b);
13389}
13390
13391// CHECK-LABEL: define void @test_vst4q_f16(half* %a, [4 x <8 x half>] %b.coerce) #0 {
13392// CHECK:   [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
13393// CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
13394// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0
13395// CHECK:   store [4 x <8 x half>] [[B]].coerce, [4 x <8 x half>]* [[COERCE_DIVE]], align 16
13396// CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8*
13397// CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8*
13398// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
13399// CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
13400// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
13401// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i64 0, i64 0
13402// CHECK:   [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
13403// CHECK:   [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
13404// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
13405// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i64 0, i64 1
13406// CHECK:   [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
13407// CHECK:   [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
13408// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
13409// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i64 0, i64 2
13410// CHECK:   [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
13411// CHECK:   [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
13412// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
13413// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i64 0, i64 3
13414// CHECK:   [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16
13415// CHECK:   [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8>
13416// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
13417// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
13418// CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
13419// CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
13420// CHECK:   call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i8* [[TMP2]])
13421// CHECK:   ret void
13422void test_vst4q_f16(float16_t *a, float16x8x4_t b) {
13423  vst4q_f16(a, b);
13424}
13425
13426// CHECK-LABEL: define void @test_vst4q_f32(float* %a, [4 x <4 x float>] %b.coerce) #0 {
13427// CHECK:   [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
13428// CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
13429// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0
13430// CHECK:   store [4 x <4 x float>] [[B]].coerce, [4 x <4 x float>]* [[COERCE_DIVE]], align 16
13431// CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8*
13432// CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8*
13433// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
13434// CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
13435// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
13436// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i64 0, i64 0
13437// CHECK:   [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
13438// CHECK:   [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
13439// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
13440// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i64 0, i64 1
13441// CHECK:   [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
13442// CHECK:   [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
13443// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
13444// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i64 0, i64 2
13445// CHECK:   [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
13446// CHECK:   [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
13447// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
13448// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i64 0, i64 3
13449// CHECK:   [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16
13450// CHECK:   [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8>
13451// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
13452// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
13453// CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
13454// CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float>
13455// CHECK:   call void @llvm.aarch64.neon.st4.v4f32.p0i8(<4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], i8* [[TMP2]])
13456// CHECK:   ret void
13457void test_vst4q_f32(float32_t *a, float32x4x4_t b) {
13458  vst4q_f32(a, b);
13459}
13460
13461// CHECK-LABEL: define void @test_vst4q_f64(double* %a, [4 x <2 x double>] %b.coerce) #0 {
13462// CHECK:   [[B:%.*]] = alloca %struct.float64x2x4_t, align 16
13463// CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16
13464// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[B]], i32 0, i32 0
13465// CHECK:   store [4 x <2 x double>] [[B]].coerce, [4 x <2 x double>]* [[COERCE_DIVE]], align 16
13466// CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__S1]] to i8*
13467// CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x2x4_t* [[B]] to i8*
13468// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
13469// CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
13470// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
13471// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL]], i64 0, i64 0
13472// CHECK:   [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
13473// CHECK:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
13474// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
13475// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL1]], i64 0, i64 1
13476// CHECK:   [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
13477// CHECK:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
13478// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
13479// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL3]], i64 0, i64 2
13480// CHECK:   [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16
13481// CHECK:   [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
13482// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
13483// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL5]], i64 0, i64 3
13484// CHECK:   [[TMP9:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX6]], align 16
13485// CHECK:   [[TMP10:%.*]] = bitcast <2 x double> [[TMP9]] to <16 x i8>
13486// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
13487// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
13488// CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
13489// CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x double>
13490// CHECK:   call void @llvm.aarch64.neon.st4.v2f64.p0i8(<2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x double> [[TMP13]], <2 x double> [[TMP14]], i8* [[TMP2]])
13491// CHECK:   ret void
13492void test_vst4q_f64(float64_t *a, float64x2x4_t b) {
13493  vst4q_f64(a, b);
13494}
13495
13496// CHECK-LABEL: define void @test_vst4q_p8(i8* %a, [4 x <16 x i8>] %b.coerce) #0 {
13497// CHECK:   [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16
13498// CHECK:   [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16
13499// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0
13500// CHECK:   store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
13501// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__S1]] to i8*
13502// CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x16x4_t* [[B]] to i8*
13503// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
13504// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
13505// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
13506// CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
13507// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
13508// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1
13509// CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
13510// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
13511// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2
13512// CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
13513// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
13514// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3
13515// CHECK:   [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
13516// CHECK:   call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a)
13517// CHECK:   ret void
13518void test_vst4q_p8(poly8_t *a, poly8x16x4_t b) {
13519  vst4q_p8(a, b);
13520}
13521
13522// CHECK-LABEL: define void @test_vst4q_p16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 {
13523// CHECK:   [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
13524// CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
13525// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0
13526// CHECK:   store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
13527// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8*
13528// CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8*
13529// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
13530// CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
13531// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
13532// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
13533// CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
13534// CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
13535// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
13536// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1
13537// CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
13538// CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
13539// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
13540// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2
13541// CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
13542// CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
13543// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
13544// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3
13545// CHECK:   [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
13546// CHECK:   [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
13547// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
13548// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
13549// CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
13550// CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
13551// CHECK:   call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i8* [[TMP2]])
13552// CHECK:   ret void
13553void test_vst4q_p16(poly16_t *a, poly16x8x4_t b) {
13554  vst4q_p16(a, b);
13555}
13556
13557// CHECK-LABEL: define void @test_vst4_u8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 {
13558// CHECK:   [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
13559// CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
13560// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0
13561// CHECK:   store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
13562// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8*
13563// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8*
13564// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
13565// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
13566// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
13567// CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
13568// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
13569// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1
13570// CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
13571// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
13572// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2
13573// CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
13574// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
13575// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3
13576// CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
13577// CHECK:   call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a)
13578// CHECK:   ret void
13579void test_vst4_u8(uint8_t *a, uint8x8x4_t b) {
13580  vst4_u8(a, b);
13581}
13582
13583// CHECK-LABEL: define void @test_vst4_u16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 {
13584// CHECK:   [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
13585// CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
13586// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0
13587// CHECK:   store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
13588// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8*
13589// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8*
13590// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
13591// CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
13592// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
13593// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
13594// CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
13595// CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
13596// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
13597// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1
13598// CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
13599// CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
13600// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
13601// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2
13602// CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
13603// CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
13604// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
13605// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3
13606// CHECK:   [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
13607// CHECK:   [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
13608// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
13609// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
13610// CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
13611// CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
13612// CHECK:   call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i8* [[TMP2]])
13613// CHECK:   ret void
13614void test_vst4_u16(uint16_t *a, uint16x4x4_t b) {
13615  vst4_u16(a, b);
13616}
13617
13618// CHECK-LABEL: define void @test_vst4_u32(i32* %a, [4 x <2 x i32>] %b.coerce) #0 {
13619// CHECK:   [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
13620// CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
13621// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0
13622// CHECK:   store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8
13623// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8*
13624// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8*
13625// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
13626// CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
13627// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
13628// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0
13629// CHECK:   [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
13630// CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
13631// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
13632// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i64 0, i64 1
13633// CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
13634// CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
13635// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
13636// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i64 0, i64 2
13637// CHECK:   [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
13638// CHECK:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
13639// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
13640// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i64 0, i64 3
13641// CHECK:   [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
13642// CHECK:   [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
13643// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
13644// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
13645// CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
13646// CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
13647// CHECK:   call void @llvm.aarch64.neon.st4.v2i32.p0i8(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], i8* [[TMP2]])
13648// CHECK:   ret void
13649void test_vst4_u32(uint32_t *a, uint32x2x4_t b) {
13650  vst4_u32(a, b);
13651}
13652
13653// CHECK-LABEL: define void @test_vst4_u64(i64* %a, [4 x <1 x i64>] %b.coerce) #0 {
13654// CHECK:   [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8
13655// CHECK:   [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8
13656// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[B]], i32 0, i32 0
13657// CHECK:   store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
13658// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__S1]] to i8*
13659// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x1x4_t* [[B]] to i8*
13660// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
13661// CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
13662// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
13663// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0
13664// CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
13665// CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
13666// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
13667// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1
13668// CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
13669// CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
13670// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
13671// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2
13672// CHECK:   [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
13673// CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
13674// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
13675// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3
13676// CHECK:   [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8
13677// CHECK:   [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
13678// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
13679// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
13680// CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
13681// CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
13682// CHECK:   call void @llvm.aarch64.neon.st4.v1i64.p0i8(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i8* [[TMP2]])
13683// CHECK:   ret void
13684void test_vst4_u64(uint64_t *a, uint64x1x4_t b) {
13685  vst4_u64(a, b);
13686}
13687
13688// CHECK-LABEL: define void @test_vst4_s8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 {
13689// CHECK:   [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
13690// CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
13691// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0
13692// CHECK:   store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
13693// CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8*
13694// CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8*
13695// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
13696// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
13697// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
13698// CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
13699// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
13700// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1
13701// CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
13702// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
13703// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2
13704// CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
13705// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
13706// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3
13707// CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
13708// CHECK:   call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a)
13709// CHECK:   ret void
13710void test_vst4_s8(int8_t *a, int8x8x4_t b) {
13711  vst4_s8(a, b);
13712}
13713
13714// CHECK-LABEL: define void @test_vst4_s16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 {
13715// CHECK:   [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
13716// CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
13717// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0
13718// CHECK:   store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
13719// CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8*
13720// CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8*
13721// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
13722// CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
13723// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
13724// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
13725// CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
13726// CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
13727// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
13728// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1
13729// CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
13730// CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
13731// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
13732// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2
13733// CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
13734// CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
13735// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
13736// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3
13737// CHECK:   [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
13738// CHECK:   [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
13739// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
13740// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
13741// CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
13742// CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
13743// CHECK:   call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i8* [[TMP2]])
13744// CHECK:   ret void
13745void test_vst4_s16(int16_t *a, int16x4x4_t b) {
13746  vst4_s16(a, b);
13747}
13748
13749// CHECK-LABEL: define void @test_vst4_s32(i32* %a, [4 x <2 x i32>] %b.coerce) #0 {
13750// CHECK:   [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
13751// CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
13752// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0
13753// CHECK:   store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8
13754// CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8*
13755// CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8*
13756// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
13757// CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
13758// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
13759// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0
13760// CHECK:   [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
13761// CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
13762// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
13763// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i64 0, i64 1
13764// CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
13765// CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
13766// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
13767// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i64 0, i64 2
13768// CHECK:   [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
13769// CHECK:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
13770// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
13771// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i64 0, i64 3
13772// CHECK:   [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
13773// CHECK:   [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
13774// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
13775// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
13776// CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
13777// CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
13778// CHECK:   call void @llvm.aarch64.neon.st4.v2i32.p0i8(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], i8* [[TMP2]])
13779// CHECK:   ret void
13780void test_vst4_s32(int32_t *a, int32x2x4_t b) {
13781  vst4_s32(a, b);
13782}
13783
13784// CHECK-LABEL: define void @test_vst4_s64(i64* %a, [4 x <1 x i64>] %b.coerce) #0 {
13785// CHECK:   [[B:%.*]] = alloca %struct.int64x1x4_t, align 8
13786// CHECK:   [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8
13787// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[B]], i32 0, i32 0
13788// CHECK:   store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
13789// CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__S1]] to i8*
13790// CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x1x4_t* [[B]] to i8*
13791// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
13792// CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
13793// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
13794// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0
13795// CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
13796// CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
13797// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
13798// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1
13799// CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
13800// CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
13801// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
13802// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2
13803// CHECK:   [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
13804// CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
13805// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
13806// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3
13807// CHECK:   [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8
13808// CHECK:   [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
13809// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
13810// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
13811// CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
13812// CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
13813// CHECK:   call void @llvm.aarch64.neon.st4.v1i64.p0i8(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i8* [[TMP2]])
13814// CHECK:   ret void
13815void test_vst4_s64(int64_t *a, int64x1x4_t b) {
13816  vst4_s64(a, b);
13817}
13818
13819// CHECK-LABEL: define void @test_vst4_f16(half* %a, [4 x <4 x half>] %b.coerce) #0 {
13820// CHECK:   [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
13821// CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
13822// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0
13823// CHECK:   store [4 x <4 x half>] [[B]].coerce, [4 x <4 x half>]* [[COERCE_DIVE]], align 8
13824// CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8*
13825// CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8*
13826// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
13827// CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
13828// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
13829// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i64 0, i64 0
13830// CHECK:   [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
13831// CHECK:   [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
13832// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
13833// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL1]], i64 0, i64 1
13834// CHECK:   [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
13835// CHECK:   [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
13836// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
13837// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL3]], i64 0, i64 2
13838// CHECK:   [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
13839// CHECK:   [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
13840// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
13841// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i64 0, i64 3
13842// CHECK:   [[TMP9:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8
13843// CHECK:   [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8>
13844// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
13845// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
13846// CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
13847// CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
13848// CHECK:   call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i8* [[TMP2]])
13849// CHECK:   ret void
13850void test_vst4_f16(float16_t *a, float16x4x4_t b) {
13851  vst4_f16(a, b);
13852}
13853
13854// CHECK-LABEL: define void @test_vst4_f32(float* %a, [4 x <2 x float>] %b.coerce) #0 {
13855// CHECK:   [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
13856// CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
13857// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0
13858// CHECK:   store [4 x <2 x float>] [[B]].coerce, [4 x <2 x float>]* [[COERCE_DIVE]], align 8
13859// CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8*
13860// CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8*
13861// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
13862// CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
13863// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
13864// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i64 0, i64 0
13865// CHECK:   [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
13866// CHECK:   [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
13867// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
13868// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL1]], i64 0, i64 1
13869// CHECK:   [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
13870// CHECK:   [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
13871// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
13872// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL3]], i64 0, i64 2
13873// CHECK:   [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
13874// CHECK:   [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
13875// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
13876// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL5]], i64 0, i64 3
13877// CHECK:   [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX6]], align 8
13878// CHECK:   [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8>
13879// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
13880// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
13881// CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
13882// CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float>
13883// CHECK:   call void @llvm.aarch64.neon.st4.v2f32.p0i8(<2 x float> [[TMP11]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], i8* [[TMP2]])
13884// CHECK:   ret void
13885void test_vst4_f32(float32_t *a, float32x2x4_t b) {
13886  vst4_f32(a, b);
13887}
13888
13889// CHECK-LABEL: define void @test_vst4_f64(double* %a, [4 x <1 x double>] %b.coerce) #0 {
13890// CHECK:   [[B:%.*]] = alloca %struct.float64x1x4_t, align 8
13891// CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8
13892// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[B]], i32 0, i32 0
13893// CHECK:   store [4 x <1 x double>] [[B]].coerce, [4 x <1 x double>]* [[COERCE_DIVE]], align 8
13894// CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__S1]] to i8*
13895// CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x1x4_t* [[B]] to i8*
13896// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
13897// CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
13898// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
13899// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL]], i64 0, i64 0
13900// CHECK:   [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
13901// CHECK:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
13902// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
13903// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL1]], i64 0, i64 1
13904// CHECK:   [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
13905// CHECK:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
13906// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
13907// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL3]], i64 0, i64 2
13908// CHECK:   [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8
13909// CHECK:   [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
13910// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
13911// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL5]], i64 0, i64 3
13912// CHECK:   [[TMP9:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX6]], align 8
13913// CHECK:   [[TMP10:%.*]] = bitcast <1 x double> [[TMP9]] to <8 x i8>
13914// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
13915// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
13916// CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
13917// CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x double>
13918// CHECK:   call void @llvm.aarch64.neon.st4.v1f64.p0i8(<1 x double> [[TMP11]], <1 x double> [[TMP12]], <1 x double> [[TMP13]], <1 x double> [[TMP14]], i8* [[TMP2]])
13919// CHECK:   ret void
13920void test_vst4_f64(float64_t *a, float64x1x4_t b) {
13921  vst4_f64(a, b);
13922}
13923
13924// CHECK-LABEL: define void @test_vst4_p8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 {
13925// CHECK:   [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
13926// CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
13927// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0
13928// CHECK:   store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
13929// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8*
13930// CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8*
13931// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
13932// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
13933// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
13934// CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
13935// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
13936// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1
13937// CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
13938// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
13939// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2
13940// CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
13941// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
13942// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3
13943// CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
13944// CHECK:   call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a)
13945// CHECK:   ret void
13946void test_vst4_p8(poly8_t *a, poly8x8x4_t b) {
13947  vst4_p8(a, b);
13948}
13949
13950// CHECK-LABEL: define void @test_vst4_p16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 {
13951// CHECK:   [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
13952// CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
13953// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0
13954// CHECK:   store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
13955// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8*
13956// CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8*
13957// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
13958// CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
13959// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
13960// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
13961// CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
13962// CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
13963// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
13964// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1
13965// CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
13966// CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
13967// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
13968// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2
13969// CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
13970// CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
13971// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
13972// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3
13973// CHECK:   [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
13974// CHECK:   [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
13975// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
13976// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
13977// CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
13978// CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
13979// CHECK:   call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i8* [[TMP2]])
13980// CHECK:   ret void
13981void test_vst4_p16(poly16_t *a, poly16x4x4_t b) {
13982  vst4_p16(a, b);
13983}
13984
13985// CHECK-LABEL: define %struct.uint8x16x2_t @test_vld1q_u8_x2(i8* %a) #0 {
13986// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
13987// CHECK:   [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 16
13988// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8*
13989// CHECK:   [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %a)
13990// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
13991// CHECK:   store { <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8> }* [[TMP1]]
13992// CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL]] to i8*
13993// CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8*
13994// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 16, i1 false)
13995// CHECK:   [[TMP4:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16
13996// CHECK:   ret %struct.uint8x16x2_t [[TMP4]]
13997uint8x16x2_t test_vld1q_u8_x2(uint8_t const *a) {
13998  return vld1q_u8_x2(a);
13999}
14000
14001// CHECK-LABEL: define %struct.uint16x8x2_t @test_vld1q_u16_x2(i16* %a) #0 {
14002// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
14003// CHECK:   [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16
14004// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
14005// CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
14006// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14007// CHECK:   [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* [[TMP2]])
14008// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
14009// CHECK:   store { <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16> }* [[TMP3]]
14010// CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL]] to i8*
14011// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
14012// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
14013// CHECK:   [[TMP6:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16
14014// CHECK:   ret %struct.uint16x8x2_t [[TMP6]]
14015uint16x8x2_t test_vld1q_u16_x2(uint16_t const *a) {
14016  return vld1q_u16_x2(a);
14017}
14018
14019// CHECK-LABEL: define %struct.uint32x4x2_t @test_vld1q_u32_x2(i32* %a) #0 {
14020// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
14021// CHECK:   [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16
14022// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
14023// CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
14024// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
14025// CHECK:   [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32* [[TMP2]])
14026// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }*
14027// CHECK:   store { <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32> }* [[TMP3]]
14028// CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL]] to i8*
14029// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
14030// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
14031// CHECK:   [[TMP6:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16
14032// CHECK:   ret %struct.uint32x4x2_t [[TMP6]]
14033uint32x4x2_t test_vld1q_u32_x2(uint32_t const *a) {
14034  return vld1q_u32_x2(a);
14035}
14036
14037// CHECK-LABEL: define %struct.uint64x2x2_t @test_vld1q_u64_x2(i64* %a) #0 {
14038// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x2x2_t, align 16
14039// CHECK:   [[__RET:%.*]] = alloca %struct.uint64x2x2_t, align 16
14040// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8*
14041// CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
14042// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
14043// CHECK:   [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* [[TMP2]])
14044// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }*
14045// CHECK:   store { <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64> }* [[TMP3]]
14046// CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x2x2_t* [[RETVAL]] to i8*
14047// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8*
14048// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
14049// CHECK:   [[TMP6:%.*]] = load %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[RETVAL]], align 16
14050// CHECK:   ret %struct.uint64x2x2_t [[TMP6]]
14051uint64x2x2_t test_vld1q_u64_x2(uint64_t const *a) {
14052  return vld1q_u64_x2(a);
14053}
14054
14055// CHECK-LABEL: define %struct.int8x16x2_t @test_vld1q_s8_x2(i8* %a) #0 {
14056// CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
14057// CHECK:   [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 16
14058// CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8*
14059// CHECK:   [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %a)
14060// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
14061// CHECK:   store { <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8> }* [[TMP1]]
14062// CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL]] to i8*
14063// CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8*
14064// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 16, i1 false)
14065// CHECK:   [[TMP4:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16
14066// CHECK:   ret %struct.int8x16x2_t [[TMP4]]
14067int8x16x2_t test_vld1q_s8_x2(int8_t const *a) {
14068  return vld1q_s8_x2(a);
14069}
14070
14071// CHECK-LABEL: define %struct.int16x8x2_t @test_vld1q_s16_x2(i16* %a) #0 {
14072// CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
14073// CHECK:   [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16
14074// CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
14075// CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
14076// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14077// CHECK:   [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* [[TMP2]])
14078// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
14079// CHECK:   store { <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16> }* [[TMP3]]
14080// CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL]] to i8*
14081// CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
14082// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
14083// CHECK:   [[TMP6:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16
14084// CHECK:   ret %struct.int16x8x2_t [[TMP6]]
14085int16x8x2_t test_vld1q_s16_x2(int16_t const *a) {
14086  return vld1q_s16_x2(a);
14087}
14088
14089// CHECK-LABEL: define %struct.int32x4x2_t @test_vld1q_s32_x2(i32* %a) #0 {
14090// CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
14091// CHECK:   [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16
14092// CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
14093// CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
14094// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
14095// CHECK:   [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32* [[TMP2]])
14096// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }*
14097// CHECK:   store { <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32> }* [[TMP3]]
14098// CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL]] to i8*
14099// CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
14100// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
14101// CHECK:   [[TMP6:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16
14102// CHECK:   ret %struct.int32x4x2_t [[TMP6]]
14103int32x4x2_t test_vld1q_s32_x2(int32_t const *a) {
14104  return vld1q_s32_x2(a);
14105}
14106
14107// CHECK-LABEL: define %struct.int64x2x2_t @test_vld1q_s64_x2(i64* %a) #0 {
14108// CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x2x2_t, align 16
14109// CHECK:   [[__RET:%.*]] = alloca %struct.int64x2x2_t, align 16
14110// CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8*
14111// CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
14112// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
14113// CHECK:   [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* [[TMP2]])
14114// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }*
14115// CHECK:   store { <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64> }* [[TMP3]]
14116// CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x2x2_t* [[RETVAL]] to i8*
14117// CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8*
14118// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
14119// CHECK:   [[TMP6:%.*]] = load %struct.int64x2x2_t, %struct.int64x2x2_t* [[RETVAL]], align 16
14120// CHECK:   ret %struct.int64x2x2_t [[TMP6]]
14121int64x2x2_t test_vld1q_s64_x2(int64_t const *a) {
14122  return vld1q_s64_x2(a);
14123}
14124
14125// CHECK-LABEL: define %struct.float16x8x2_t @test_vld1q_f16_x2(half* %a) #0 {
14126// CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16
14127// CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
14128// CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
14129// CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
14130// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14131// CHECK:   [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* [[TMP2]])
14132// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
14133// CHECK:   store { <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16> }* [[TMP3]]
14134// CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8*
14135// CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
14136// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
14137// CHECK:   [[TMP6:%.*]] = load %struct.float16x8x2_t, %struct.float16x8x2_t* [[RETVAL]], align 16
14138// CHECK:   ret %struct.float16x8x2_t [[TMP6]]
14139float16x8x2_t test_vld1q_f16_x2(float16_t const *a) {
14140  return vld1q_f16_x2(a);
14141}
14142
14143// CHECK-LABEL: define %struct.float32x4x2_t @test_vld1q_f32_x2(float* %a) #0 {
14144// CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
14145// CHECK:   [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16
14146// CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
14147// CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
14148// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float*
14149// CHECK:   [[VLD1XN:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float* [[TMP2]])
14150// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float> }*
14151// CHECK:   store { <4 x float>, <4 x float> } [[VLD1XN]], { <4 x float>, <4 x float> }* [[TMP3]]
14152// CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL]] to i8*
14153// CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
14154// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
14155// CHECK:   [[TMP6:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16
14156// CHECK:   ret %struct.float32x4x2_t [[TMP6]]
14157float32x4x2_t test_vld1q_f32_x2(float32_t const *a) {
14158  return vld1q_f32_x2(a);
14159}
14160
14161// CHECK-LABEL: define %struct.float64x2x2_t @test_vld1q_f64_x2(double* %a) #0 {
14162// CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16
14163// CHECK:   [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16
14164// CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8*
14165// CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
14166// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
14167// CHECK:   [[VLD1XN:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double* [[TMP2]])
14168// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double> }*
14169// CHECK:   store { <2 x double>, <2 x double> } [[VLD1XN]], { <2 x double>, <2 x double> }* [[TMP3]]
14170// CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x2x2_t* [[RETVAL]] to i8*
14171// CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8*
14172// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
14173// CHECK:   [[TMP6:%.*]] = load %struct.float64x2x2_t, %struct.float64x2x2_t* [[RETVAL]], align 16
14174// CHECK:   ret %struct.float64x2x2_t [[TMP6]]
14175float64x2x2_t test_vld1q_f64_x2(float64_t const *a) {
14176  return vld1q_f64_x2(a);
14177}
14178
14179// CHECK-LABEL: define %struct.poly8x16x2_t @test_vld1q_p8_x2(i8* %a) #0 {
14180// CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
14181// CHECK:   [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 16
14182// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8*
14183// CHECK:   [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %a)
14184// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }*
14185// CHECK:   store { <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8> }* [[TMP1]]
14186// CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL]] to i8*
14187// CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8*
14188// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 16, i1 false)
14189// CHECK:   [[TMP4:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16
14190// CHECK:   ret %struct.poly8x16x2_t [[TMP4]]
14191poly8x16x2_t test_vld1q_p8_x2(poly8_t const *a) {
14192  return vld1q_p8_x2(a);
14193}
14194
14195// CHECK-LABEL: define %struct.poly16x8x2_t @test_vld1q_p16_x2(i16* %a) #0 {
14196// CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
14197// CHECK:   [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16
14198// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
14199// CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
14200// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14201// CHECK:   [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* [[TMP2]])
14202// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }*
14203// CHECK:   store { <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16> }* [[TMP3]]
14204// CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL]] to i8*
14205// CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
14206// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
14207// CHECK:   [[TMP6:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16
14208// CHECK:   ret %struct.poly16x8x2_t [[TMP6]]
14209poly16x8x2_t test_vld1q_p16_x2(poly16_t const *a) {
14210  return vld1q_p16_x2(a);
14211}
14212
14213// CHECK-LABEL: define %struct.poly64x2x2_t @test_vld1q_p64_x2(i64* %a) #0 {
14214// CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x2x2_t, align 16
14215// CHECK:   [[__RET:%.*]] = alloca %struct.poly64x2x2_t, align 16
14216// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8*
14217// CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
14218// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
14219// CHECK:   [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* [[TMP2]])
14220// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }*
14221// CHECK:   store { <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64> }* [[TMP3]]
14222// CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x2x2_t* [[RETVAL]] to i8*
14223// CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8*
14224// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false)
14225// CHECK:   [[TMP6:%.*]] = load %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[RETVAL]], align 16
14226// CHECK:   ret %struct.poly64x2x2_t [[TMP6]]
14227poly64x2x2_t test_vld1q_p64_x2(poly64_t const *a) {
14228  return vld1q_p64_x2(a);
14229}
14230
14231// CHECK-LABEL: define %struct.uint8x8x2_t @test_vld1_u8_x2(i8* %a) #0 {
14232// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
14233// CHECK:   [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8
14234// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
14235// CHECK:   [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %a)
14236// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
14237// CHECK:   store { <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8> }* [[TMP1]]
14238// CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL]] to i8*
14239// CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
14240// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 16, i32 8, i1 false)
14241// CHECK:   [[TMP4:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8
14242// CHECK:   ret %struct.uint8x8x2_t [[TMP4]]
14243uint8x8x2_t test_vld1_u8_x2(uint8_t const *a) {
14244  return vld1_u8_x2(a);
14245}
14246
14247// CHECK-LABEL: define %struct.uint16x4x2_t @test_vld1_u16_x2(i16* %a) #0 {
14248// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
14249// CHECK:   [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8
14250// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
14251// CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
14252// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14253// CHECK:   [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* [[TMP2]])
14254// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
14255// CHECK:   store { <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16> }* [[TMP3]]
14256// CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL]] to i8*
14257// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
14258// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
14259// CHECK:   [[TMP6:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8
14260// CHECK:   ret %struct.uint16x4x2_t [[TMP6]]
14261uint16x4x2_t test_vld1_u16_x2(uint16_t const *a) {
14262  return vld1_u16_x2(a);
14263}
14264
14265// CHECK-LABEL: define %struct.uint32x2x2_t @test_vld1_u32_x2(i32* %a) #0 {
14266// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
14267// CHECK:   [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8
14268// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
14269// CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
14270// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
14271// CHECK:   [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32* [[TMP2]])
14272// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }*
14273// CHECK:   store { <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32> }* [[TMP3]]
14274// CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL]] to i8*
14275// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
14276// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
14277// CHECK:   [[TMP6:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8
14278// CHECK:   ret %struct.uint32x2x2_t [[TMP6]]
14279uint32x2x2_t test_vld1_u32_x2(uint32_t const *a) {
14280  return vld1_u32_x2(a);
14281}
14282
14283// CHECK-LABEL: define %struct.uint64x1x2_t @test_vld1_u64_x2(i64* %a) #0 {
14284// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x1x2_t, align 8
14285// CHECK:   [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8
14286// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
14287// CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
14288// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
14289// CHECK:   [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* [[TMP2]])
14290// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
14291// CHECK:   store { <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64> }* [[TMP3]]
14292// CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x1x2_t* [[RETVAL]] to i8*
14293// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
14294// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
14295// CHECK:   [[TMP6:%.*]] = load %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[RETVAL]], align 8
14296// CHECK:   ret %struct.uint64x1x2_t [[TMP6]]
14297uint64x1x2_t test_vld1_u64_x2(uint64_t const *a) {
14298  return vld1_u64_x2(a);
14299}
14300
14301// CHECK-LABEL: define %struct.int8x8x2_t @test_vld1_s8_x2(i8* %a) #0 {
14302// CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
14303// CHECK:   [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8
14304// CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
14305// CHECK:   [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %a)
14306// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
14307// CHECK:   store { <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8> }* [[TMP1]]
14308// CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL]] to i8*
14309// CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
14310// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 16, i32 8, i1 false)
14311// CHECK:   [[TMP4:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8
14312// CHECK:   ret %struct.int8x8x2_t [[TMP4]]
14313int8x8x2_t test_vld1_s8_x2(int8_t const *a) {
14314  return vld1_s8_x2(a);
14315}
14316
14317// CHECK-LABEL: define %struct.int16x4x2_t @test_vld1_s16_x2(i16* %a) #0 {
14318// CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
14319// CHECK:   [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8
14320// CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
14321// CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
14322// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14323// CHECK:   [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* [[TMP2]])
14324// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
14325// CHECK:   store { <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16> }* [[TMP3]]
14326// CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL]] to i8*
14327// CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
14328// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
14329// CHECK:   [[TMP6:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8
14330// CHECK:   ret %struct.int16x4x2_t [[TMP6]]
14331int16x4x2_t test_vld1_s16_x2(int16_t const *a) {
14332  return vld1_s16_x2(a);
14333}
14334
14335// CHECK-LABEL: define %struct.int32x2x2_t @test_vld1_s32_x2(i32* %a) #0 {
14336// CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
14337// CHECK:   [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8
14338// CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
14339// CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
14340// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
14341// CHECK:   [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32* [[TMP2]])
14342// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }*
14343// CHECK:   store { <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32> }* [[TMP3]]
14344// CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL]] to i8*
14345// CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
14346// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
14347// CHECK:   [[TMP6:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8
14348// CHECK:   ret %struct.int32x2x2_t [[TMP6]]
14349int32x2x2_t test_vld1_s32_x2(int32_t const *a) {
14350  return vld1_s32_x2(a);
14351}
14352
14353// CHECK-LABEL: define %struct.int64x1x2_t @test_vld1_s64_x2(i64* %a) #0 {
14354// CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x1x2_t, align 8
14355// CHECK:   [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8
14356// CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
14357// CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
14358// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
14359// CHECK:   [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* [[TMP2]])
14360// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
14361// CHECK:   store { <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64> }* [[TMP3]]
14362// CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x1x2_t* [[RETVAL]] to i8*
14363// CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
14364// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
14365// CHECK:   [[TMP6:%.*]] = load %struct.int64x1x2_t, %struct.int64x1x2_t* [[RETVAL]], align 8
14366// CHECK:   ret %struct.int64x1x2_t [[TMP6]]
14367int64x1x2_t test_vld1_s64_x2(int64_t const *a) {
14368  return vld1_s64_x2(a);
14369}
14370
14371// CHECK-LABEL: define %struct.float16x4x2_t @test_vld1_f16_x2(half* %a) #0 {
14372// CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8
14373// CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
14374// CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
14375// CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
14376// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14377// CHECK:   [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* [[TMP2]])
14378// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
14379// CHECK:   store { <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16> }* [[TMP3]]
14380// CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8*
14381// CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
14382// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
14383// CHECK:   [[TMP6:%.*]] = load %struct.float16x4x2_t, %struct.float16x4x2_t* [[RETVAL]], align 8
14384// CHECK:   ret %struct.float16x4x2_t [[TMP6]]
14385float16x4x2_t test_vld1_f16_x2(float16_t const *a) {
14386  return vld1_f16_x2(a);
14387}
14388
14389// CHECK-LABEL: define %struct.float32x2x2_t @test_vld1_f32_x2(float* %a) #0 {
14390// CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
14391// CHECK:   [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8
14392// CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
14393// CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
14394// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float*
14395// CHECK:   [[VLD1XN:%.*]] = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float* [[TMP2]])
14396// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float> }*
14397// CHECK:   store { <2 x float>, <2 x float> } [[VLD1XN]], { <2 x float>, <2 x float> }* [[TMP3]]
14398// CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL]] to i8*
14399// CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
14400// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
14401// CHECK:   [[TMP6:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8
14402// CHECK:   ret %struct.float32x2x2_t [[TMP6]]
14403float32x2x2_t test_vld1_f32_x2(float32_t const *a) {
14404  return vld1_f32_x2(a);
14405}
14406
14407// CHECK-LABEL: define %struct.float64x1x2_t @test_vld1_f64_x2(double* %a) #0 {
14408// CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8
14409// CHECK:   [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8
14410// CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8*
14411// CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
14412// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
14413// CHECK:   [[VLD1XN:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double* [[TMP2]])
14414// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double> }*
14415// CHECK:   store { <1 x double>, <1 x double> } [[VLD1XN]], { <1 x double>, <1 x double> }* [[TMP3]]
14416// CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x1x2_t* [[RETVAL]] to i8*
14417// CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8*
14418// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
14419// CHECK:   [[TMP6:%.*]] = load %struct.float64x1x2_t, %struct.float64x1x2_t* [[RETVAL]], align 8
14420// CHECK:   ret %struct.float64x1x2_t [[TMP6]]
14421float64x1x2_t test_vld1_f64_x2(float64_t const *a) {
14422  return vld1_f64_x2(a);
14423}
14424
14425// CHECK-LABEL: define %struct.poly8x8x2_t @test_vld1_p8_x2(i8* %a) #0 {
14426// CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
14427// CHECK:   [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8
14428// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
14429// CHECK:   [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %a)
14430// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }*
14431// CHECK:   store { <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8> }* [[TMP1]]
14432// CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL]] to i8*
14433// CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
14434// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 16, i32 8, i1 false)
14435// CHECK:   [[TMP4:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8
14436// CHECK:   ret %struct.poly8x8x2_t [[TMP4]]
14437poly8x8x2_t test_vld1_p8_x2(poly8_t const *a) {
14438  return vld1_p8_x2(a);
14439}
14440
14441// CHECK-LABEL: define %struct.poly16x4x2_t @test_vld1_p16_x2(i16* %a) #0 {
14442// CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
14443// CHECK:   [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8
14444// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
14445// CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
14446// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14447// CHECK:   [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* [[TMP2]])
14448// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }*
14449// CHECK:   store { <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16> }* [[TMP3]]
14450// CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL]] to i8*
14451// CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
14452// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
14453// CHECK:   [[TMP6:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8
14454// CHECK:   ret %struct.poly16x4x2_t [[TMP6]]
14455poly16x4x2_t test_vld1_p16_x2(poly16_t const *a) {
14456  return vld1_p16_x2(a);
14457}
14458
14459// CHECK-LABEL: define %struct.poly64x1x2_t @test_vld1_p64_x2(i64* %a) #0 {
14460// CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x1x2_t, align 8
14461// CHECK:   [[__RET:%.*]] = alloca %struct.poly64x1x2_t, align 8
14462// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8*
14463// CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
14464// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
14465// CHECK:   [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* [[TMP2]])
14466// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
14467// CHECK:   store { <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64> }* [[TMP3]]
14468// CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x1x2_t* [[RETVAL]] to i8*
14469// CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8*
14470// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false)
14471// CHECK:   [[TMP6:%.*]] = load %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[RETVAL]], align 8
14472// CHECK:   ret %struct.poly64x1x2_t [[TMP6]]
14473poly64x1x2_t test_vld1_p64_x2(poly64_t const *a) {
14474  return vld1_p64_x2(a);
14475}
14476
14477// CHECK-LABEL: define %struct.uint8x16x3_t @test_vld1q_u8_x3(i8* %a) #0 {
14478// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x3_t, align 16
14479// CHECK:   [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 16
14480// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8*
14481// CHECK:   [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %a)
14482// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
14483// CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
14484// CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x16x3_t* [[RETVAL]] to i8*
14485// CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8*
14486// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 48, i32 16, i1 false)
14487// CHECK:   [[TMP4:%.*]] = load %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[RETVAL]], align 16
14488// CHECK:   ret %struct.uint8x16x3_t [[TMP4]]
14489uint8x16x3_t test_vld1q_u8_x3(uint8_t const *a) {
14490  return vld1q_u8_x3(a);
14491}
14492
14493// CHECK-LABEL: define %struct.uint16x8x3_t @test_vld1q_u16_x3(i16* %a) #0 {
14494// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x3_t, align 16
14495// CHECK:   [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16
14496// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
14497// CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
14498// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14499// CHECK:   [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* [[TMP2]])
14500// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
14501// CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
14502// CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x8x3_t* [[RETVAL]] to i8*
14503// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
14504// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
14505// CHECK:   [[TMP6:%.*]] = load %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[RETVAL]], align 16
14506// CHECK:   ret %struct.uint16x8x3_t [[TMP6]]
14507uint16x8x3_t test_vld1q_u16_x3(uint16_t const *a) {
14508  return vld1q_u16_x3(a);
14509}
14510
14511// CHECK-LABEL: define %struct.uint32x4x3_t @test_vld1q_u32_x3(i32* %a) #0 {
14512// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x3_t, align 16
14513// CHECK:   [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16
14514// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
14515// CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
14516// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
14517// CHECK:   [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32* [[TMP2]])
14518// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }*
14519// CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
14520// CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x4x3_t* [[RETVAL]] to i8*
14521// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
14522// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
14523// CHECK:   [[TMP6:%.*]] = load %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[RETVAL]], align 16
14524// CHECK:   ret %struct.uint32x4x3_t [[TMP6]]
14525uint32x4x3_t test_vld1q_u32_x3(uint32_t const *a) {
14526  return vld1q_u32_x3(a);
14527}
14528
14529// CHECK-LABEL: define %struct.uint64x2x3_t @test_vld1q_u64_x3(i64* %a) #0 {
14530// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x2x3_t, align 16
14531// CHECK:   [[__RET:%.*]] = alloca %struct.uint64x2x3_t, align 16
14532// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8*
14533// CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
14534// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
14535// CHECK:   [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* [[TMP2]])
14536// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }*
14537// CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
14538// CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x2x3_t* [[RETVAL]] to i8*
14539// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8*
14540// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
14541// CHECK:   [[TMP6:%.*]] = load %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[RETVAL]], align 16
14542// CHECK:   ret %struct.uint64x2x3_t [[TMP6]]
14543uint64x2x3_t test_vld1q_u64_x3(uint64_t const *a) {
14544  return vld1q_u64_x3(a);
14545}
14546
14547// CHECK-LABEL: define %struct.int8x16x3_t @test_vld1q_s8_x3(i8* %a) #0 {
14548// CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x3_t, align 16
14549// CHECK:   [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 16
14550// CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8*
14551// CHECK:   [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %a)
14552// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
14553// CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
14554// CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x16x3_t* [[RETVAL]] to i8*
14555// CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8*
14556// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 48, i32 16, i1 false)
14557// CHECK:   [[TMP4:%.*]] = load %struct.int8x16x3_t, %struct.int8x16x3_t* [[RETVAL]], align 16
14558// CHECK:   ret %struct.int8x16x3_t [[TMP4]]
14559int8x16x3_t test_vld1q_s8_x3(int8_t const *a) {
14560  return vld1q_s8_x3(a);
14561}
14562
14563// CHECK-LABEL: define %struct.int16x8x3_t @test_vld1q_s16_x3(i16* %a) #0 {
14564// CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x3_t, align 16
14565// CHECK:   [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16
14566// CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
14567// CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
14568// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14569// CHECK:   [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* [[TMP2]])
14570// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
14571// CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
14572// CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x8x3_t* [[RETVAL]] to i8*
14573// CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
14574// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
14575// CHECK:   [[TMP6:%.*]] = load %struct.int16x8x3_t, %struct.int16x8x3_t* [[RETVAL]], align 16
14576// CHECK:   ret %struct.int16x8x3_t [[TMP6]]
14577int16x8x3_t test_vld1q_s16_x3(int16_t const *a) {
14578  return vld1q_s16_x3(a);
14579}
14580
14581// CHECK-LABEL: define %struct.int32x4x3_t @test_vld1q_s32_x3(i32* %a) #0 {
14582// CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x3_t, align 16
14583// CHECK:   [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16
14584// CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
14585// CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
14586// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
14587// CHECK:   [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32* [[TMP2]])
14588// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }*
14589// CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
14590// CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x4x3_t* [[RETVAL]] to i8*
14591// CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
14592// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
14593// CHECK:   [[TMP6:%.*]] = load %struct.int32x4x3_t, %struct.int32x4x3_t* [[RETVAL]], align 16
14594// CHECK:   ret %struct.int32x4x3_t [[TMP6]]
14595int32x4x3_t test_vld1q_s32_x3(int32_t const *a) {
14596  return vld1q_s32_x3(a);
14597}
14598
14599// CHECK-LABEL: define %struct.int64x2x3_t @test_vld1q_s64_x3(i64* %a) #0 {
14600// CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x2x3_t, align 16
14601// CHECK:   [[__RET:%.*]] = alloca %struct.int64x2x3_t, align 16
14602// CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8*
14603// CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
14604// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
14605// CHECK:   [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* [[TMP2]])
14606// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }*
14607// CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
14608// CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x2x3_t* [[RETVAL]] to i8*
14609// CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8*
14610// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
14611// CHECK:   [[TMP6:%.*]] = load %struct.int64x2x3_t, %struct.int64x2x3_t* [[RETVAL]], align 16
14612// CHECK:   ret %struct.int64x2x3_t [[TMP6]]
14613int64x2x3_t test_vld1q_s64_x3(int64_t const *a) {
14614  return vld1q_s64_x3(a);
14615}
14616
14617// CHECK-LABEL: define %struct.float16x8x3_t @test_vld1q_f16_x3(half* %a) #0 {
14618// CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x8x3_t, align 16
14619// CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
14620// CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
14621// CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
14622// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14623// CHECK:   [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* [[TMP2]])
14624// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
14625// CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
14626// CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x8x3_t* [[RETVAL]] to i8*
14627// CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
14628// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
14629// CHECK:   [[TMP6:%.*]] = load %struct.float16x8x3_t, %struct.float16x8x3_t* [[RETVAL]], align 16
14630// CHECK:   ret %struct.float16x8x3_t [[TMP6]]
14631float16x8x3_t test_vld1q_f16_x3(float16_t const *a) {
14632  return vld1q_f16_x3(a);
14633}
14634
14635// CHECK-LABEL: define %struct.float32x4x3_t @test_vld1q_f32_x3(float* %a) #0 {
14636// CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x3_t, align 16
14637// CHECK:   [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16
14638// CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
14639// CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
14640// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float*
14641// CHECK:   [[VLD1XN:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float* [[TMP2]])
14642// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float> }*
14643// CHECK:   store { <4 x float>, <4 x float>, <4 x float> } [[VLD1XN]], { <4 x float>, <4 x float>, <4 x float> }* [[TMP3]]
14644// CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x4x3_t* [[RETVAL]] to i8*
14645// CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
14646// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
14647// CHECK:   [[TMP6:%.*]] = load %struct.float32x4x3_t, %struct.float32x4x3_t* [[RETVAL]], align 16
14648// CHECK:   ret %struct.float32x4x3_t [[TMP6]]
14649float32x4x3_t test_vld1q_f32_x3(float32_t const *a) {
14650  return vld1q_f32_x3(a);
14651}
14652
14653// CHECK-LABEL: define %struct.float64x2x3_t @test_vld1q_f64_x3(double* %a) #0 {
14654// CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16
14655// CHECK:   [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16
14656// CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8*
14657// CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
14658// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
14659// CHECK:   [[VLD1XN:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double* [[TMP2]])
14660// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double>, <2 x double> }*
14661// CHECK:   store { <2 x double>, <2 x double>, <2 x double> } [[VLD1XN]], { <2 x double>, <2 x double>, <2 x double> }* [[TMP3]]
14662// CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x2x3_t* [[RETVAL]] to i8*
14663// CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8*
14664// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
14665// CHECK:   [[TMP6:%.*]] = load %struct.float64x2x3_t, %struct.float64x2x3_t* [[RETVAL]], align 16
14666// CHECK:   ret %struct.float64x2x3_t [[TMP6]]
14667float64x2x3_t test_vld1q_f64_x3(float64_t const *a) {
14668  return vld1q_f64_x3(a);
14669}
14670
14671// CHECK-LABEL: define %struct.poly8x16x3_t @test_vld1q_p8_x3(i8* %a) #0 {
14672// CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x3_t, align 16
14673// CHECK:   [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 16
14674// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8*
14675// CHECK:   [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %a)
14676// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }*
14677// CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
14678// CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x16x3_t* [[RETVAL]] to i8*
14679// CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8*
14680// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 48, i32 16, i1 false)
14681// CHECK:   [[TMP4:%.*]] = load %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[RETVAL]], align 16
14682// CHECK:   ret %struct.poly8x16x3_t [[TMP4]]
14683poly8x16x3_t test_vld1q_p8_x3(poly8_t const *a) {
14684  return vld1q_p8_x3(a);
14685}
14686
14687// CHECK-LABEL: define %struct.poly16x8x3_t @test_vld1q_p16_x3(i16* %a) #0 {
14688// CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x3_t, align 16
14689// CHECK:   [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16
14690// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
14691// CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
14692// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14693// CHECK:   [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* [[TMP2]])
14694// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }*
14695// CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
14696// CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x8x3_t* [[RETVAL]] to i8*
14697// CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
14698// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
14699// CHECK:   [[TMP6:%.*]] = load %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[RETVAL]], align 16
14700// CHECK:   ret %struct.poly16x8x3_t [[TMP6]]
14701poly16x8x3_t test_vld1q_p16_x3(poly16_t const *a) {
14702  return vld1q_p16_x3(a);
14703}
14704
14705// CHECK-LABEL: define %struct.poly64x2x3_t @test_vld1q_p64_x3(i64* %a) #0 {
14706// CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x2x3_t, align 16
14707// CHECK:   [[__RET:%.*]] = alloca %struct.poly64x2x3_t, align 16
14708// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8*
14709// CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
14710// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
14711// CHECK:   [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* [[TMP2]])
14712// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }*
14713// CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
14714// CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x2x3_t* [[RETVAL]] to i8*
14715// CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8*
14716// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false)
14717// CHECK:   [[TMP6:%.*]] = load %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[RETVAL]], align 16
14718// CHECK:   ret %struct.poly64x2x3_t [[TMP6]]
14719poly64x2x3_t test_vld1q_p64_x3(poly64_t const *a) {
14720  return vld1q_p64_x3(a);
14721}
14722
14723// CHECK-LABEL: define %struct.uint8x8x3_t @test_vld1_u8_x3(i8* %a) #0 {
14724// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x3_t, align 8
14725// CHECK:   [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8
14726// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
14727// CHECK:   [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %a)
14728// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
14729// CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
14730// CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[RETVAL]] to i8*
14731// CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
14732// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 24, i32 8, i1 false)
14733// CHECK:   [[TMP4:%.*]] = load %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[RETVAL]], align 8
14734// CHECK:   ret %struct.uint8x8x3_t [[TMP4]]
14735uint8x8x3_t test_vld1_u8_x3(uint8_t const *a) {
14736  return vld1_u8_x3(a);
14737}
14738
14739// CHECK-LABEL: define %struct.uint16x4x3_t @test_vld1_u16_x3(i16* %a) #0 {
14740// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x3_t, align 8
14741// CHECK:   [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8
14742// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
14743// CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
14744// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14745// CHECK:   [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* [[TMP2]])
14746// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
14747// CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
14748// CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x4x3_t* [[RETVAL]] to i8*
14749// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
14750// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
14751// CHECK:   [[TMP6:%.*]] = load %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[RETVAL]], align 8
14752// CHECK:   ret %struct.uint16x4x3_t [[TMP6]]
14753uint16x4x3_t test_vld1_u16_x3(uint16_t const *a) {
14754  return vld1_u16_x3(a);
14755}
14756
14757// CHECK-LABEL: define %struct.uint32x2x3_t @test_vld1_u32_x3(i32* %a) #0 {
14758// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x3_t, align 8
14759// CHECK:   [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8
14760// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
14761// CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
14762// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
14763// CHECK:   [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32* [[TMP2]])
14764// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }*
14765// CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
14766// CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x2x3_t* [[RETVAL]] to i8*
14767// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
14768// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
14769// CHECK:   [[TMP6:%.*]] = load %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[RETVAL]], align 8
14770// CHECK:   ret %struct.uint32x2x3_t [[TMP6]]
14771uint32x2x3_t test_vld1_u32_x3(uint32_t const *a) {
14772  return vld1_u32_x3(a);
14773}
14774
14775// CHECK-LABEL: define %struct.uint64x1x3_t @test_vld1_u64_x3(i64* %a) #0 {
14776// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x1x3_t, align 8
14777// CHECK:   [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8
14778// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
14779// CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
14780// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
14781// CHECK:   [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* [[TMP2]])
14782// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
14783// CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
14784// CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x1x3_t* [[RETVAL]] to i8*
14785// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
14786// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
14787// CHECK:   [[TMP6:%.*]] = load %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[RETVAL]], align 8
14788// CHECK:   ret %struct.uint64x1x3_t [[TMP6]]
14789uint64x1x3_t test_vld1_u64_x3(uint64_t const *a) {
14790  return vld1_u64_x3(a);
14791}
14792
14793// CHECK-LABEL: define %struct.int8x8x3_t @test_vld1_s8_x3(i8* %a) #0 {
14794// CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x3_t, align 8
14795// CHECK:   [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8
14796// CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
14797// CHECK:   [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %a)
14798// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
14799// CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
14800// CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[RETVAL]] to i8*
14801// CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
14802// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 24, i32 8, i1 false)
14803// CHECK:   [[TMP4:%.*]] = load %struct.int8x8x3_t, %struct.int8x8x3_t* [[RETVAL]], align 8
14804// CHECK:   ret %struct.int8x8x3_t [[TMP4]]
14805int8x8x3_t test_vld1_s8_x3(int8_t const *a) {
14806  return vld1_s8_x3(a);
14807}
14808
14809// CHECK-LABEL: define %struct.int16x4x3_t @test_vld1_s16_x3(i16* %a) #0 {
14810// CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x3_t, align 8
14811// CHECK:   [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8
14812// CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
14813// CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
14814// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14815// CHECK:   [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* [[TMP2]])
14816// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
14817// CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
14818// CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x4x3_t* [[RETVAL]] to i8*
14819// CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
14820// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
14821// CHECK:   [[TMP6:%.*]] = load %struct.int16x4x3_t, %struct.int16x4x3_t* [[RETVAL]], align 8
14822// CHECK:   ret %struct.int16x4x3_t [[TMP6]]
14823int16x4x3_t test_vld1_s16_x3(int16_t const *a) {
14824  return vld1_s16_x3(a);
14825}
14826
14827// CHECK-LABEL: define %struct.int32x2x3_t @test_vld1_s32_x3(i32* %a) #0 {
14828// CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x3_t, align 8
14829// CHECK:   [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8
14830// CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
14831// CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
14832// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
14833// CHECK:   [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32* [[TMP2]])
14834// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }*
14835// CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
14836// CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x2x3_t* [[RETVAL]] to i8*
14837// CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
14838// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
14839// CHECK:   [[TMP6:%.*]] = load %struct.int32x2x3_t, %struct.int32x2x3_t* [[RETVAL]], align 8
14840// CHECK:   ret %struct.int32x2x3_t [[TMP6]]
14841int32x2x3_t test_vld1_s32_x3(int32_t const *a) {
14842  return vld1_s32_x3(a);
14843}
14844
14845// CHECK-LABEL: define %struct.int64x1x3_t @test_vld1_s64_x3(i64* %a) #0 {
14846// CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x1x3_t, align 8
14847// CHECK:   [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8
14848// CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
14849// CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
14850// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
14851// CHECK:   [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* [[TMP2]])
14852// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
14853// CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
14854// CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x1x3_t* [[RETVAL]] to i8*
14855// CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
14856// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
14857// CHECK:   [[TMP6:%.*]] = load %struct.int64x1x3_t, %struct.int64x1x3_t* [[RETVAL]], align 8
14858// CHECK:   ret %struct.int64x1x3_t [[TMP6]]
14859int64x1x3_t test_vld1_s64_x3(int64_t const *a) {
14860  return vld1_s64_x3(a);
14861}
14862
14863// CHECK-LABEL: define %struct.float16x4x3_t @test_vld1_f16_x3(half* %a) #0 {
14864// CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x4x3_t, align 8
14865// CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
14866// CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
14867// CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
14868// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14869// CHECK:   [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* [[TMP2]])
14870// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
14871// CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
14872// CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x4x3_t* [[RETVAL]] to i8*
14873// CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
14874// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
14875// CHECK:   [[TMP6:%.*]] = load %struct.float16x4x3_t, %struct.float16x4x3_t* [[RETVAL]], align 8
14876// CHECK:   ret %struct.float16x4x3_t [[TMP6]]
14877float16x4x3_t test_vld1_f16_x3(float16_t const *a) {
14878  return vld1_f16_x3(a);
14879}
14880
14881// CHECK-LABEL: define %struct.float32x2x3_t @test_vld1_f32_x3(float* %a) #0 {
14882// CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x3_t, align 8
14883// CHECK:   [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8
14884// CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
14885// CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
14886// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float*
14887// CHECK:   [[VLD1XN:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float* [[TMP2]])
14888// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float> }*
14889// CHECK:   store { <2 x float>, <2 x float>, <2 x float> } [[VLD1XN]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP3]]
14890// CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x2x3_t* [[RETVAL]] to i8*
14891// CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
14892// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
14893// CHECK:   [[TMP6:%.*]] = load %struct.float32x2x3_t, %struct.float32x2x3_t* [[RETVAL]], align 8
14894// CHECK:   ret %struct.float32x2x3_t [[TMP6]]
14895float32x2x3_t test_vld1_f32_x3(float32_t const *a) {
14896  return vld1_f32_x3(a);
14897}
14898
14899// CHECK-LABEL: define %struct.float64x1x3_t @test_vld1_f64_x3(double* %a) #0 {
14900// CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8
14901// CHECK:   [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8
14902// CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8*
14903// CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
14904// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
14905// CHECK:   [[VLD1XN:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double* [[TMP2]])
14906// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double>, <1 x double> }*
14907// CHECK:   store { <1 x double>, <1 x double>, <1 x double> } [[VLD1XN]], { <1 x double>, <1 x double>, <1 x double> }* [[TMP3]]
14908// CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x1x3_t* [[RETVAL]] to i8*
14909// CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8*
14910// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
14911// CHECK:   [[TMP6:%.*]] = load %struct.float64x1x3_t, %struct.float64x1x3_t* [[RETVAL]], align 8
14912// CHECK:   ret %struct.float64x1x3_t [[TMP6]]
14913float64x1x3_t test_vld1_f64_x3(float64_t const *a) {
14914  return vld1_f64_x3(a);
14915}
14916
14917// CHECK-LABEL: define %struct.poly8x8x3_t @test_vld1_p8_x3(i8* %a) #0 {
14918// CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x3_t, align 8
14919// CHECK:   [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8
14920// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
14921// CHECK:   [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %a)
14922// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }*
14923// CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
14924// CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[RETVAL]] to i8*
14925// CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
14926// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 24, i32 8, i1 false)
14927// CHECK:   [[TMP4:%.*]] = load %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[RETVAL]], align 8
14928// CHECK:   ret %struct.poly8x8x3_t [[TMP4]]
14929poly8x8x3_t test_vld1_p8_x3(poly8_t const *a) {
14930  return vld1_p8_x3(a);
14931}
14932
14933// CHECK-LABEL: define %struct.poly16x4x3_t @test_vld1_p16_x3(i16* %a) #0 {
14934// CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x3_t, align 8
14935// CHECK:   [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8
14936// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
14937// CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
14938// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14939// CHECK:   [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* [[TMP2]])
14940// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }*
14941// CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
14942// CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x4x3_t* [[RETVAL]] to i8*
14943// CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
14944// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
14945// CHECK:   [[TMP6:%.*]] = load %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[RETVAL]], align 8
14946// CHECK:   ret %struct.poly16x4x3_t [[TMP6]]
14947poly16x4x3_t test_vld1_p16_x3(poly16_t const *a) {
14948  return vld1_p16_x3(a);
14949}
14950
14951// CHECK-LABEL: define %struct.poly64x1x3_t @test_vld1_p64_x3(i64* %a) #0 {
14952// CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x1x3_t, align 8
14953// CHECK:   [[__RET:%.*]] = alloca %struct.poly64x1x3_t, align 8
14954// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8*
14955// CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
14956// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
14957// CHECK:   [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* [[TMP2]])
14958// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
14959// CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
14960// CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x1x3_t* [[RETVAL]] to i8*
14961// CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8*
14962// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false)
14963// CHECK:   [[TMP6:%.*]] = load %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[RETVAL]], align 8
14964// CHECK:   ret %struct.poly64x1x3_t [[TMP6]]
14965poly64x1x3_t test_vld1_p64_x3(poly64_t const *a) {
14966  return vld1_p64_x3(a);
14967}
14968
14969// CHECK-LABEL: define %struct.uint8x16x4_t @test_vld1q_u8_x4(i8* %a) #0 {
14970// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x4_t, align 16
14971// CHECK:   [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 16
14972// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8*
14973// CHECK:   [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8* %a)
14974// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
14975// CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
14976// CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x16x4_t* [[RETVAL]] to i8*
14977// CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8*
14978// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 64, i32 16, i1 false)
14979// CHECK:   [[TMP4:%.*]] = load %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[RETVAL]], align 16
14980// CHECK:   ret %struct.uint8x16x4_t [[TMP4]]
14981uint8x16x4_t test_vld1q_u8_x4(uint8_t const *a) {
14982  return vld1q_u8_x4(a);
14983}
14984
14985// CHECK-LABEL: define %struct.uint16x8x4_t @test_vld1q_u16_x4(i16* %a) #0 {
14986// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x4_t, align 16
14987// CHECK:   [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16
14988// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
14989// CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
14990// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
14991// CHECK:   [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* [[TMP2]])
14992// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
14993// CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
14994// CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x8x4_t* [[RETVAL]] to i8*
14995// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
14996// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
14997// CHECK:   [[TMP6:%.*]] = load %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[RETVAL]], align 16
14998// CHECK:   ret %struct.uint16x8x4_t [[TMP6]]
14999uint16x8x4_t test_vld1q_u16_x4(uint16_t const *a) {
15000  return vld1q_u16_x4(a);
15001}
15002
15003// CHECK-LABEL: define %struct.uint32x4x4_t @test_vld1q_u32_x4(i32* %a) #0 {
15004// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x4_t, align 16
15005// CHECK:   [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16
15006// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
15007// CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
15008// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
15009// CHECK:   [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32* [[TMP2]])
15010// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }*
15011// CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
15012// CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x4x4_t* [[RETVAL]] to i8*
15013// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
15014// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
15015// CHECK:   [[TMP6:%.*]] = load %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[RETVAL]], align 16
15016// CHECK:   ret %struct.uint32x4x4_t [[TMP6]]
15017uint32x4x4_t test_vld1q_u32_x4(uint32_t const *a) {
15018  return vld1q_u32_x4(a);
15019}
15020
15021// CHECK-LABEL: define %struct.uint64x2x4_t @test_vld1q_u64_x4(i64* %a) #0 {
15022// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x2x4_t, align 16
15023// CHECK:   [[__RET:%.*]] = alloca %struct.uint64x2x4_t, align 16
15024// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8*
15025// CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
15026// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
15027// CHECK:   [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* [[TMP2]])
15028// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }*
15029// CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
15030// CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x2x4_t* [[RETVAL]] to i8*
15031// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8*
15032// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
15033// CHECK:   [[TMP6:%.*]] = load %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[RETVAL]], align 16
15034// CHECK:   ret %struct.uint64x2x4_t [[TMP6]]
15035uint64x2x4_t test_vld1q_u64_x4(uint64_t const *a) {
15036  return vld1q_u64_x4(a);
15037}
15038
15039// CHECK-LABEL: define %struct.int8x16x4_t @test_vld1q_s8_x4(i8* %a) #0 {
15040// CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x4_t, align 16
15041// CHECK:   [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 16
15042// CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8*
15043// CHECK:   [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8* %a)
15044// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
15045// CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
15046// CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x16x4_t* [[RETVAL]] to i8*
15047// CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8*
15048// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 64, i32 16, i1 false)
15049// CHECK:   [[TMP4:%.*]] = load %struct.int8x16x4_t, %struct.int8x16x4_t* [[RETVAL]], align 16
15050// CHECK:   ret %struct.int8x16x4_t [[TMP4]]
15051int8x16x4_t test_vld1q_s8_x4(int8_t const *a) {
15052  return vld1q_s8_x4(a);
15053}
15054
15055// CHECK-LABEL: define %struct.int16x8x4_t @test_vld1q_s16_x4(i16* %a) #0 {
15056// CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x4_t, align 16
15057// CHECK:   [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16
15058// CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
15059// CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
15060// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
15061// CHECK:   [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* [[TMP2]])
15062// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
15063// CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
15064// CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x8x4_t* [[RETVAL]] to i8*
15065// CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
15066// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
15067// CHECK:   [[TMP6:%.*]] = load %struct.int16x8x4_t, %struct.int16x8x4_t* [[RETVAL]], align 16
15068// CHECK:   ret %struct.int16x8x4_t [[TMP6]]
15069int16x8x4_t test_vld1q_s16_x4(int16_t const *a) {
15070  return vld1q_s16_x4(a);
15071}
15072
15073// CHECK-LABEL: define %struct.int32x4x4_t @test_vld1q_s32_x4(i32* %a) #0 {
15074// CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x4_t, align 16
15075// CHECK:   [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16
15076// CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
15077// CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
15078// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
15079// CHECK:   [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32* [[TMP2]])
15080// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }*
15081// CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]]
15082// CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x4x4_t* [[RETVAL]] to i8*
15083// CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
15084// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
15085// CHECK:   [[TMP6:%.*]] = load %struct.int32x4x4_t, %struct.int32x4x4_t* [[RETVAL]], align 16
15086// CHECK:   ret %struct.int32x4x4_t [[TMP6]]
15087int32x4x4_t test_vld1q_s32_x4(int32_t const *a) {
15088  return vld1q_s32_x4(a);
15089}
15090
15091// CHECK-LABEL: define %struct.int64x2x4_t @test_vld1q_s64_x4(i64* %a) #0 {
15092// CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x2x4_t, align 16
15093// CHECK:   [[__RET:%.*]] = alloca %struct.int64x2x4_t, align 16
15094// CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8*
15095// CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
15096// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
15097// CHECK:   [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* [[TMP2]])
15098// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }*
15099// CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
15100// CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x2x4_t* [[RETVAL]] to i8*
15101// CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8*
15102// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
15103// CHECK:   [[TMP6:%.*]] = load %struct.int64x2x4_t, %struct.int64x2x4_t* [[RETVAL]], align 16
15104// CHECK:   ret %struct.int64x2x4_t [[TMP6]]
15105int64x2x4_t test_vld1q_s64_x4(int64_t const *a) {
15106  return vld1q_s64_x4(a);
15107}
15108
15109// CHECK-LABEL: define %struct.float16x8x4_t @test_vld1q_f16_x4(half* %a) #0 {
15110// CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x8x4_t, align 16
15111// CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
15112// CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
15113// CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
15114// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
15115// CHECK:   [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* [[TMP2]])
15116// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
15117// CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
15118// CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x8x4_t* [[RETVAL]] to i8*
15119// CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
15120// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
15121// CHECK:   [[TMP6:%.*]] = load %struct.float16x8x4_t, %struct.float16x8x4_t* [[RETVAL]], align 16
15122// CHECK:   ret %struct.float16x8x4_t [[TMP6]]
15123float16x8x4_t test_vld1q_f16_x4(float16_t const *a) {
15124  return vld1q_f16_x4(a);
15125}
15126
15127// CHECK-LABEL: define %struct.float32x4x4_t @test_vld1q_f32_x4(float* %a) #0 {
15128// CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x4_t, align 16
15129// CHECK:   [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16
15130// CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
15131// CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
15132// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float*
15133// CHECK:   [[VLD1XN:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float* [[TMP2]])
15134// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float>, <4 x float> }*
15135// CHECK:   store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD1XN]], { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* [[TMP3]]
15136// CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x4x4_t* [[RETVAL]] to i8*
15137// CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
15138// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
15139// CHECK:   [[TMP6:%.*]] = load %struct.float32x4x4_t, %struct.float32x4x4_t* [[RETVAL]], align 16
15140// CHECK:   ret %struct.float32x4x4_t [[TMP6]]
15141float32x4x4_t test_vld1q_f32_x4(float32_t const *a) {
15142  return vld1q_f32_x4(a);
15143}
15144
15145// CHECK-LABEL: define %struct.float64x2x4_t @test_vld1q_f64_x4(double* %a) #0 {
15146// CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16
15147// CHECK:   [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16
15148// CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8*
15149// CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
15150// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
15151// CHECK:   [[VLD1XN:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double* [[TMP2]])
15152// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double>, <2 x double>, <2 x double> }*
15153// CHECK:   store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD1XN]], { <2 x double>, <2 x double>, <2 x double>, <2 x double> }* [[TMP3]]
15154// CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x2x4_t* [[RETVAL]] to i8*
15155// CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8*
15156// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
15157// CHECK:   [[TMP6:%.*]] = load %struct.float64x2x4_t, %struct.float64x2x4_t* [[RETVAL]], align 16
15158// CHECK:   ret %struct.float64x2x4_t [[TMP6]]
15159float64x2x4_t test_vld1q_f64_x4(float64_t const *a) {
15160  return vld1q_f64_x4(a);
15161}
15162
15163// CHECK-LABEL: define %struct.poly8x16x4_t @test_vld1q_p8_x4(i8* %a) #0 {
15164// CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x4_t, align 16
15165// CHECK:   [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 16
15166// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8*
15167// CHECK:   [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8* %a)
15168// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }*
15169// CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]]
15170// CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x16x4_t* [[RETVAL]] to i8*
15171// CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8*
15172// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 64, i32 16, i1 false)
15173// CHECK:   [[TMP4:%.*]] = load %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[RETVAL]], align 16
15174// CHECK:   ret %struct.poly8x16x4_t [[TMP4]]
15175poly8x16x4_t test_vld1q_p8_x4(poly8_t const *a) {
15176  return vld1q_p8_x4(a);
15177}
15178
15179// CHECK-LABEL: define %struct.poly16x8x4_t @test_vld1q_p16_x4(i16* %a) #0 {
15180// CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x4_t, align 16
15181// CHECK:   [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16
15182// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
15183// CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
15184// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
15185// CHECK:   [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* [[TMP2]])
15186// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }*
15187// CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]]
15188// CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x8x4_t* [[RETVAL]] to i8*
15189// CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
15190// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
15191// CHECK:   [[TMP6:%.*]] = load %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[RETVAL]], align 16
15192// CHECK:   ret %struct.poly16x8x4_t [[TMP6]]
15193poly16x8x4_t test_vld1q_p16_x4(poly16_t const *a) {
15194  return vld1q_p16_x4(a);
15195}
15196
15197// CHECK-LABEL: define %struct.poly64x2x4_t @test_vld1q_p64_x4(i64* %a) #0 {
15198// CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x2x4_t, align 16
15199// CHECK:   [[__RET:%.*]] = alloca %struct.poly64x2x4_t, align 16
15200// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8*
15201// CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
15202// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
15203// CHECK:   [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* [[TMP2]])
15204// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }*
15205// CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
15206// CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x2x4_t* [[RETVAL]] to i8*
15207// CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8*
15208// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false)
15209// CHECK:   [[TMP6:%.*]] = load %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[RETVAL]], align 16
15210// CHECK:   ret %struct.poly64x2x4_t [[TMP6]]
15211poly64x2x4_t test_vld1q_p64_x4(poly64_t const *a) {
15212  return vld1q_p64_x4(a);
15213}
15214
15215// CHECK-LABEL: define %struct.uint8x8x4_t @test_vld1_u8_x4(i8* %a) #0 {
15216// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x4_t, align 8
15217// CHECK:   [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8
15218// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
15219// CHECK:   [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8* %a)
15220// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
15221// CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
15222// CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[RETVAL]] to i8*
15223// CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
15224// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 8, i1 false)
15225// CHECK:   [[TMP4:%.*]] = load %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[RETVAL]], align 8
15226// CHECK:   ret %struct.uint8x8x4_t [[TMP4]]
15227uint8x8x4_t test_vld1_u8_x4(uint8_t const *a) {
15228  return vld1_u8_x4(a);
15229}
15230
15231// CHECK-LABEL: define %struct.uint16x4x4_t @test_vld1_u16_x4(i16* %a) #0 {
15232// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x4_t, align 8
15233// CHECK:   [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8
15234// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
15235// CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
15236// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
15237// CHECK:   [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* [[TMP2]])
15238// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
15239// CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
15240// CHECK:   [[TMP4:%.*]] = bitcast %struct.uint16x4x4_t* [[RETVAL]] to i8*
15241// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
15242// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
15243// CHECK:   [[TMP6:%.*]] = load %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[RETVAL]], align 8
15244// CHECK:   ret %struct.uint16x4x4_t [[TMP6]]
15245uint16x4x4_t test_vld1_u16_x4(uint16_t const *a) {
15246  return vld1_u16_x4(a);
15247}
15248
15249// CHECK-LABEL: define %struct.uint32x2x4_t @test_vld1_u32_x4(i32* %a) #0 {
15250// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x4_t, align 8
15251// CHECK:   [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8
15252// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
15253// CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
15254// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
15255// CHECK:   [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32* [[TMP2]])
15256// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }*
15257// CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
15258// CHECK:   [[TMP4:%.*]] = bitcast %struct.uint32x2x4_t* [[RETVAL]] to i8*
15259// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
15260// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
15261// CHECK:   [[TMP6:%.*]] = load %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[RETVAL]], align 8
15262// CHECK:   ret %struct.uint32x2x4_t [[TMP6]]
15263uint32x2x4_t test_vld1_u32_x4(uint32_t const *a) {
15264  return vld1_u32_x4(a);
15265}
15266
15267// CHECK-LABEL: define %struct.uint64x1x4_t @test_vld1_u64_x4(i64* %a) #0 {
15268// CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x1x4_t, align 8
15269// CHECK:   [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8
15270// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
15271// CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
15272// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
15273// CHECK:   [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* [[TMP2]])
15274// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
15275// CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
15276// CHECK:   [[TMP4:%.*]] = bitcast %struct.uint64x1x4_t* [[RETVAL]] to i8*
15277// CHECK:   [[TMP5:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
15278// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
15279// CHECK:   [[TMP6:%.*]] = load %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[RETVAL]], align 8
15280// CHECK:   ret %struct.uint64x1x4_t [[TMP6]]
15281uint64x1x4_t test_vld1_u64_x4(uint64_t const *a) {
15282  return vld1_u64_x4(a);
15283}
15284
15285// CHECK-LABEL: define %struct.int8x8x4_t @test_vld1_s8_x4(i8* %a) #0 {
15286// CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x4_t, align 8
15287// CHECK:   [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8
15288// CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
15289// CHECK:   [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8* %a)
15290// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
15291// CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
15292// CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[RETVAL]] to i8*
15293// CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
15294// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 8, i1 false)
15295// CHECK:   [[TMP4:%.*]] = load %struct.int8x8x4_t, %struct.int8x8x4_t* [[RETVAL]], align 8
15296// CHECK:   ret %struct.int8x8x4_t [[TMP4]]
15297int8x8x4_t test_vld1_s8_x4(int8_t const *a) {
15298  return vld1_s8_x4(a);
15299}
15300
15301// CHECK-LABEL: define %struct.int16x4x4_t @test_vld1_s16_x4(i16* %a) #0 {
15302// CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x4_t, align 8
15303// CHECK:   [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8
15304// CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
15305// CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
15306// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
15307// CHECK:   [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* [[TMP2]])
15308// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
15309// CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
15310// CHECK:   [[TMP4:%.*]] = bitcast %struct.int16x4x4_t* [[RETVAL]] to i8*
15311// CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
15312// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
15313// CHECK:   [[TMP6:%.*]] = load %struct.int16x4x4_t, %struct.int16x4x4_t* [[RETVAL]], align 8
15314// CHECK:   ret %struct.int16x4x4_t [[TMP6]]
15315int16x4x4_t test_vld1_s16_x4(int16_t const *a) {
15316  return vld1_s16_x4(a);
15317}
15318
15319// CHECK-LABEL: define %struct.int32x2x4_t @test_vld1_s32_x4(i32* %a) #0 {
15320// CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x4_t, align 8
15321// CHECK:   [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8
15322// CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
15323// CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
15324// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
15325// CHECK:   [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32* [[TMP2]])
15326// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }*
15327// CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]]
15328// CHECK:   [[TMP4:%.*]] = bitcast %struct.int32x2x4_t* [[RETVAL]] to i8*
15329// CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
15330// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
15331// CHECK:   [[TMP6:%.*]] = load %struct.int32x2x4_t, %struct.int32x2x4_t* [[RETVAL]], align 8
15332// CHECK:   ret %struct.int32x2x4_t [[TMP6]]
15333int32x2x4_t test_vld1_s32_x4(int32_t const *a) {
15334  return vld1_s32_x4(a);
15335}
15336
15337// CHECK-LABEL: define %struct.int64x1x4_t @test_vld1_s64_x4(i64* %a) #0 {
15338// CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x1x4_t, align 8
15339// CHECK:   [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8
15340// CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
15341// CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
15342// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
15343// CHECK:   [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* [[TMP2]])
15344// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
15345// CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
15346// CHECK:   [[TMP4:%.*]] = bitcast %struct.int64x1x4_t* [[RETVAL]] to i8*
15347// CHECK:   [[TMP5:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
15348// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
15349// CHECK:   [[TMP6:%.*]] = load %struct.int64x1x4_t, %struct.int64x1x4_t* [[RETVAL]], align 8
15350// CHECK:   ret %struct.int64x1x4_t [[TMP6]]
15351int64x1x4_t test_vld1_s64_x4(int64_t const *a) {
15352  return vld1_s64_x4(a);
15353}
15354
15355// CHECK-LABEL: define %struct.float16x4x4_t @test_vld1_f16_x4(half* %a) #0 {
15356// CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x4x4_t, align 8
15357// CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
15358// CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
15359// CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
15360// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
15361// CHECK:   [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* [[TMP2]])
15362// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
15363// CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
15364// CHECK:   [[TMP4:%.*]] = bitcast %struct.float16x4x4_t* [[RETVAL]] to i8*
15365// CHECK:   [[TMP5:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
15366// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
15367// CHECK:   [[TMP6:%.*]] = load %struct.float16x4x4_t, %struct.float16x4x4_t* [[RETVAL]], align 8
15368// CHECK:   ret %struct.float16x4x4_t [[TMP6]]
15369float16x4x4_t test_vld1_f16_x4(float16_t const *a) {
15370  return vld1_f16_x4(a);
15371}
15372
15373// CHECK-LABEL: define %struct.float32x2x4_t @test_vld1_f32_x4(float* %a) #0 {
15374// CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x4_t, align 8
15375// CHECK:   [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8
15376// CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
15377// CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
15378// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float*
15379// CHECK:   [[VLD1XN:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float* [[TMP2]])
15380// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float>, <2 x float> }*
15381// CHECK:   store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD1XN]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP3]]
15382// CHECK:   [[TMP4:%.*]] = bitcast %struct.float32x2x4_t* [[RETVAL]] to i8*
15383// CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
15384// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
15385// CHECK:   [[TMP6:%.*]] = load %struct.float32x2x4_t, %struct.float32x2x4_t* [[RETVAL]], align 8
15386// CHECK:   ret %struct.float32x2x4_t [[TMP6]]
15387float32x2x4_t test_vld1_f32_x4(float32_t const *a) {
15388  return vld1_f32_x4(a);
15389}
15390
15391// CHECK-LABEL: define %struct.float64x1x4_t @test_vld1_f64_x4(double* %a) #0 {
15392// CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8
15393// CHECK:   [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8
15394// CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8*
15395// CHECK:   [[TMP1:%.*]] = bitcast double* %a to i8*
15396// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double*
15397// CHECK:   [[VLD1XN:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double* [[TMP2]])
15398// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double>, <1 x double>, <1 x double> }*
15399// CHECK:   store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD1XN]], { <1 x double>, <1 x double>, <1 x double>, <1 x double> }* [[TMP3]]
15400// CHECK:   [[TMP4:%.*]] = bitcast %struct.float64x1x4_t* [[RETVAL]] to i8*
15401// CHECK:   [[TMP5:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8*
15402// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
15403// CHECK:   [[TMP6:%.*]] = load %struct.float64x1x4_t, %struct.float64x1x4_t* [[RETVAL]], align 8
15404// CHECK:   ret %struct.float64x1x4_t [[TMP6]]
15405float64x1x4_t test_vld1_f64_x4(float64_t const *a) {
15406  return vld1_f64_x4(a);
15407}
15408
15409// CHECK-LABEL: define %struct.poly8x8x4_t @test_vld1_p8_x4(i8* %a) #0 {
15410// CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x4_t, align 8
15411// CHECK:   [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8
15412// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
15413// CHECK:   [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8* %a)
15414// CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }*
15415// CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]]
15416// CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[RETVAL]] to i8*
15417// CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
15418// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 8, i1 false)
15419// CHECK:   [[TMP4:%.*]] = load %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[RETVAL]], align 8
15420// CHECK:   ret %struct.poly8x8x4_t [[TMP4]]
15421poly8x8x4_t test_vld1_p8_x4(poly8_t const *a) {
15422  return vld1_p8_x4(a);
15423}
15424
15425// CHECK-LABEL: define %struct.poly16x4x4_t @test_vld1_p16_x4(i16* %a) #0 {
15426// CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x4_t, align 8
15427// CHECK:   [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8
15428// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
15429// CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
15430// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16*
15431// CHECK:   [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* [[TMP2]])
15432// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }*
15433// CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]]
15434// CHECK:   [[TMP4:%.*]] = bitcast %struct.poly16x4x4_t* [[RETVAL]] to i8*
15435// CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
15436// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
15437// CHECK:   [[TMP6:%.*]] = load %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[RETVAL]], align 8
15438// CHECK:   ret %struct.poly16x4x4_t [[TMP6]]
15439poly16x4x4_t test_vld1_p16_x4(poly16_t const *a) {
15440  return vld1_p16_x4(a);
15441}
15442
15443// CHECK-LABEL: define %struct.poly64x1x4_t @test_vld1_p64_x4(i64* %a) #0 {
15444// CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x1x4_t, align 8
15445// CHECK:   [[__RET:%.*]] = alloca %struct.poly64x1x4_t, align 8
15446// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8*
15447// CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
15448// CHECK:   [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64*
15449// CHECK:   [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* [[TMP2]])
15450// CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
15451// CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
15452// CHECK:   [[TMP4:%.*]] = bitcast %struct.poly64x1x4_t* [[RETVAL]] to i8*
15453// CHECK:   [[TMP5:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8*
15454// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false)
15455// CHECK:   [[TMP6:%.*]] = load %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[RETVAL]], align 8
15456// CHECK:   ret %struct.poly64x1x4_t [[TMP6]]
15457poly64x1x4_t test_vld1_p64_x4(poly64_t const *a) {
15458  return vld1_p64_x4(a);
15459}
15460
15461// CHECK-LABEL: define void @test_vst1q_u8_x2(i8* %a, [2 x <16 x i8>] %b.coerce) #0 {
15462// CHECK:   [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16
15463// CHECK:   [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16
15464// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0
15465// CHECK:   store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
15466// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__S1]] to i8*
15467// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x16x2_t* [[B]] to i8*
15468// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
15469// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0
15470// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
15471// CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
15472// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0
15473// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1
15474// CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
15475// CHECK:   call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a)
15476// CHECK:   ret void
15477void test_vst1q_u8_x2(uint8_t *a, uint8x16x2_t b) {
15478  vst1q_u8_x2(a, b);
15479}
15480
15481// CHECK-LABEL: define void @test_vst1q_u16_x2(i16* %a, [2 x <8 x i16>] %b.coerce) #0 {
15482// CHECK:   [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
15483// CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
15484// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0
15485// CHECK:   store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
15486// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8*
15487// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8*
15488// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
15489// CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
15490// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
15491// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
15492// CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
15493// CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
15494// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
15495// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1
15496// CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
15497// CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
15498// CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
15499// CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
15500// CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16*
15501// CHECK:   call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i16* [[TMP9]])
15502// CHECK:   ret void
15503void test_vst1q_u16_x2(uint16_t *a, uint16x8x2_t b) {
15504  vst1q_u16_x2(a, b);
15505}
15506
15507// CHECK-LABEL: define void @test_vst1q_u32_x2(i32* %a, [2 x <4 x i32>] %b.coerce) #0 {
15508// CHECK:   [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
15509// CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
15510// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0
15511// CHECK:   store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16
15512// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8*
15513// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8*
15514// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
15515// CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
15516// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
15517// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0
15518// CHECK:   [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
15519// CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
15520// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
15521// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i64 0, i64 1
15522// CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
15523// CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
15524// CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
15525// CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
15526// CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i32*
15527// CHECK:   call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], i32* [[TMP9]])
15528// CHECK:   ret void
15529void test_vst1q_u32_x2(uint32_t *a, uint32x4x2_t b) {
15530  vst1q_u32_x2(a, b);
15531}
15532
15533// CHECK-LABEL: define void @test_vst1q_u64_x2(i64* %a, [2 x <2 x i64>] %b.coerce) #0 {
15534// CHECK:   [[B:%.*]] = alloca %struct.uint64x2x2_t, align 16
15535// CHECK:   [[__S1:%.*]] = alloca %struct.uint64x2x2_t, align 16
15536// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[B]], i32 0, i32 0
15537// CHECK:   store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
15538// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__S1]] to i8*
15539// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x2x2_t* [[B]] to i8*
15540// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
15541// CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
15542// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0
15543// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0
15544// CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
15545// CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
15546// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0
15547// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1
15548// CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
15549// CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
15550// CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
15551// CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
15552// CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i64*
15553// CHECK:   call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i64* [[TMP9]])
15554// CHECK:   ret void
15555void test_vst1q_u64_x2(uint64_t *a, uint64x2x2_t b) {
15556  vst1q_u64_x2(a, b);
15557}
15558
15559// CHECK-LABEL: define void @test_vst1q_s8_x2(i8* %a, [2 x <16 x i8>] %b.coerce) #0 {
15560// CHECK:   [[B:%.*]] = alloca %struct.int8x16x2_t, align 16
15561// CHECK:   [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16
15562// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0
15563// CHECK:   store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
15564// CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__S1]] to i8*
15565// CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x16x2_t* [[B]] to i8*
15566// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
15567// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0
15568// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
15569// CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
15570// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0
15571// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1
15572// CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
15573// CHECK:   call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a)
15574// CHECK:   ret void
15575void test_vst1q_s8_x2(int8_t *a, int8x16x2_t b) {
15576  vst1q_s8_x2(a, b);
15577}
15578
15579// CHECK-LABEL: define void @test_vst1q_s16_x2(i16* %a, [2 x <8 x i16>] %b.coerce) #0 {
15580// CHECK:   [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
15581// CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
15582// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0
15583// CHECK:   store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
15584// CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8*
15585// CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8*
15586// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
15587// CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
15588// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
15589// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
15590// CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
15591// CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
15592// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
15593// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1
15594// CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
15595// CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
15596// CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
15597// CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
15598// CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16*
15599// CHECK:   call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i16* [[TMP9]])
15600// CHECK:   ret void
15601void test_vst1q_s16_x2(int16_t *a, int16x8x2_t b) {
15602  vst1q_s16_x2(a, b);
15603}
15604
15605// CHECK-LABEL: define void @test_vst1q_s32_x2(i32* %a, [2 x <4 x i32>] %b.coerce) #0 {
15606// CHECK:   [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
15607// CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
15608// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0
15609// CHECK:   store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16
15610// CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8*
15611// CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8*
15612// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
15613// CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
15614// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
15615// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0
15616// CHECK:   [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
15617// CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
15618// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
15619// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i64 0, i64 1
15620// CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
15621// CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
15622// CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
15623// CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
15624// CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i32*
15625// CHECK:   call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], i32* [[TMP9]])
15626// CHECK:   ret void
15627void test_vst1q_s32_x2(int32_t *a, int32x4x2_t b) {
15628  vst1q_s32_x2(a, b);
15629}
15630
15631// CHECK-LABEL: define void @test_vst1q_s64_x2(i64* %a, [2 x <2 x i64>] %b.coerce) #0 {
15632// CHECK:   [[B:%.*]] = alloca %struct.int64x2x2_t, align 16
15633// CHECK:   [[__S1:%.*]] = alloca %struct.int64x2x2_t, align 16
15634// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[B]], i32 0, i32 0
15635// CHECK:   store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
15636// CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__S1]] to i8*
15637// CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x2x2_t* [[B]] to i8*
15638// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
15639// CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
15640// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0
15641// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0
15642// CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
15643// CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
15644// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0
15645// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1
15646// CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
15647// CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
15648// CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
15649// CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
15650// CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i64*
15651// CHECK:   call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i64* [[TMP9]])
15652// CHECK:   ret void
15653void test_vst1q_s64_x2(int64_t *a, int64x2x2_t b) {
15654  vst1q_s64_x2(a, b);
15655}
15656
15657// CHECK-LABEL: define void @test_vst1q_f16_x2(half* %a, [2 x <8 x half>] %b.coerce) #0 {
15658// CHECK:   [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
15659// CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
15660// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0
15661// CHECK:   store [2 x <8 x half>] [[B]].coerce, [2 x <8 x half>]* [[COERCE_DIVE]], align 16
15662// CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8*
15663// CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8*
15664// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
15665// CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
15666// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
15667// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i64 0, i64 0
15668// CHECK:   [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
15669// CHECK:   [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
15670// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
15671// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i64 0, i64 1
15672// CHECK:   [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
15673// CHECK:   [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
15674// CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
15675// CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
15676// CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16*
15677// CHECK:   call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i16* [[TMP9]])
15678// CHECK:   ret void
15679void test_vst1q_f16_x2(float16_t *a, float16x8x2_t b) {
15680  vst1q_f16_x2(a, b);
15681}
15682
15683// CHECK-LABEL: define void @test_vst1q_f32_x2(float* %a, [2 x <4 x float>] %b.coerce) #0 {
15684// CHECK:   [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
15685// CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
15686// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0
15687// CHECK:   store [2 x <4 x float>] [[B]].coerce, [2 x <4 x float>]* [[COERCE_DIVE]], align 16
15688// CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8*
15689// CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8*
15690// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
15691// CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
15692// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
15693// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i64 0, i64 0
15694// CHECK:   [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
15695// CHECK:   [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
15696// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
15697// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i64 0, i64 1
15698// CHECK:   [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
15699// CHECK:   [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
15700// CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
15701// CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
15702// CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to float*
15703// CHECK:   call void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float> [[TMP7]], <4 x float> [[TMP8]], float* [[TMP9]])
15704// CHECK:   ret void
15705void test_vst1q_f32_x2(float32_t *a, float32x4x2_t b) {
15706  vst1q_f32_x2(a, b);
15707}
15708
15709// CHECK-LABEL: define void @test_vst1q_f64_x2(double* %a, [2 x <2 x double>] %b.coerce) #0 {
15710// CHECK:   [[B:%.*]] = alloca %struct.float64x2x2_t, align 16
15711// CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16
15712// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[B]], i32 0, i32 0
15713// CHECK:   store [2 x <2 x double>] [[B]].coerce, [2 x <2 x double>]* [[COERCE_DIVE]], align 16
15714// CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__S1]] to i8*
15715// CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x2x2_t* [[B]] to i8*
15716// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
15717// CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
15718// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0
15719// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL]], i64 0, i64 0
15720// CHECK:   [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
15721// CHECK:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
15722// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0
15723// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL1]], i64 0, i64 1
15724// CHECK:   [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
15725// CHECK:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
15726// CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
15727// CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
15728// CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to double*
15729// CHECK:   call void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double> [[TMP7]], <2 x double> [[TMP8]], double* [[TMP9]])
15730// CHECK:   ret void
15731void test_vst1q_f64_x2(float64_t *a, float64x2x2_t b) {
15732  vst1q_f64_x2(a, b);
15733}
15734
15735// CHECK-LABEL: define void @test_vst1q_p8_x2(i8* %a, [2 x <16 x i8>] %b.coerce) #0 {
15736// CHECK:   [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16
15737// CHECK:   [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16
15738// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0
15739// CHECK:   store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
15740// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__S1]] to i8*
15741// CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x16x2_t* [[B]] to i8*
15742// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
15743// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0
15744// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0
15745// CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
15746// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0
15747// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1
15748// CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
15749// CHECK:   call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a)
15750// CHECK:   ret void
15751void test_vst1q_p8_x2(poly8_t *a, poly8x16x2_t b) {
15752  vst1q_p8_x2(a, b);
15753}
15754
15755// CHECK-LABEL: define void @test_vst1q_p16_x2(i16* %a, [2 x <8 x i16>] %b.coerce) #0 {
15756// CHECK:   [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
15757// CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
15758// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0
15759// CHECK:   store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
15760// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8*
15761// CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8*
15762// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
15763// CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
15764// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
15765// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0
15766// CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
15767// CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
15768// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
15769// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1
15770// CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
15771// CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
15772// CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
15773// CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
15774// CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16*
15775// CHECK:   call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i16* [[TMP9]])
15776// CHECK:   ret void
15777void test_vst1q_p16_x2(poly16_t *a, poly16x8x2_t b) {
15778  vst1q_p16_x2(a, b);
15779}
15780
15781// CHECK-LABEL: define void @test_vst1q_p64_x2(i64* %a, [2 x <2 x i64>] %b.coerce) #0 {
15782// CHECK:   [[B:%.*]] = alloca %struct.poly64x2x2_t, align 16
15783// CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x2_t, align 16
15784// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[B]], i32 0, i32 0
15785// CHECK:   store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
15786// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__S1]] to i8*
15787// CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x2x2_t* [[B]] to i8*
15788// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false)
15789// CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
15790// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0
15791// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0
15792// CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
15793// CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
15794// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0
15795// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1
15796// CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
15797// CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
15798// CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
15799// CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
15800// CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i64*
15801// CHECK:   call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i64* [[TMP9]])
15802// CHECK:   ret void
15803void test_vst1q_p64_x2(poly64_t *a, poly64x2x2_t b) {
15804  vst1q_p64_x2(a, b);
15805}
15806
15807// CHECK-LABEL: define void @test_vst1_u8_x2(i8* %a, [2 x <8 x i8>] %b.coerce) #0 {
15808// CHECK:   [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
15809// CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
15810// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0
15811// CHECK:   store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
15812// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8*
15813// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8*
15814// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
15815// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
15816// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
15817// CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
15818// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
15819// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1
15820// CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
15821// CHECK:   call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a)
15822// CHECK:   ret void
15823void test_vst1_u8_x2(uint8_t *a, uint8x8x2_t b) {
15824  vst1_u8_x2(a, b);
15825}
15826
15827// CHECK-LABEL: define void @test_vst1_u16_x2(i16* %a, [2 x <4 x i16>] %b.coerce) #0 {
15828// CHECK:   [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
15829// CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
15830// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0
15831// CHECK:   store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
15832// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8*
15833// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8*
15834// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
15835// CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
15836// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
15837// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
15838// CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
15839// CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
15840// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
15841// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1
15842// CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
15843// CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
15844// CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
15845// CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
15846// CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16*
15847// CHECK:   call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i16* [[TMP9]])
15848// CHECK:   ret void
15849void test_vst1_u16_x2(uint16_t *a, uint16x4x2_t b) {
15850  vst1_u16_x2(a, b);
15851}
15852
15853// CHECK-LABEL: define void @test_vst1_u32_x2(i32* %a, [2 x <2 x i32>] %b.coerce) #0 {
15854// CHECK:   [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
15855// CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
15856// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0
15857// CHECK:   store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8
15858// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8*
15859// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8*
15860// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
15861// CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
15862// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
15863// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0
15864// CHECK:   [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
15865// CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
15866// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
15867// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i64 0, i64 1
15868// CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
15869// CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
15870// CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
15871// CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
15872// CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i32*
15873// CHECK:   call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i32* [[TMP9]])
15874// CHECK:   ret void
15875void test_vst1_u32_x2(uint32_t *a, uint32x2x2_t b) {
15876  vst1_u32_x2(a, b);
15877}
15878
15879// CHECK-LABEL: define void @test_vst1_u64_x2(i64* %a, [2 x <1 x i64>] %b.coerce) #0 {
15880// CHECK:   [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8
15881// CHECK:   [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8
15882// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[B]], i32 0, i32 0
15883// CHECK:   store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
15884// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__S1]] to i8*
15885// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x1x2_t* [[B]] to i8*
15886// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
15887// CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
15888// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0
15889// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0
15890// CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
15891// CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
15892// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0
15893// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1
15894// CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
15895// CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
15896// CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
15897// CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
15898// CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i64*
15899// CHECK:   call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i64* [[TMP9]])
15900// CHECK:   ret void
15901void test_vst1_u64_x2(uint64_t *a, uint64x1x2_t b) {
15902  vst1_u64_x2(a, b);
15903}
15904
15905// CHECK-LABEL: define void @test_vst1_s8_x2(i8* %a, [2 x <8 x i8>] %b.coerce) #0 {
15906// CHECK:   [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
15907// CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
15908// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0
15909// CHECK:   store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
15910// CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8*
15911// CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8*
15912// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
15913// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
15914// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
15915// CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
15916// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
15917// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1
15918// CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
15919// CHECK:   call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a)
15920// CHECK:   ret void
15921void test_vst1_s8_x2(int8_t *a, int8x8x2_t b) {
15922  vst1_s8_x2(a, b);
15923}
15924
15925// CHECK-LABEL: define void @test_vst1_s16_x2(i16* %a, [2 x <4 x i16>] %b.coerce) #0 {
15926// CHECK:   [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
15927// CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
15928// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0
15929// CHECK:   store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
15930// CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8*
15931// CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8*
15932// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
15933// CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
15934// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
15935// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
15936// CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
15937// CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
15938// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
15939// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1
15940// CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
15941// CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
15942// CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
15943// CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
15944// CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16*
15945// CHECK:   call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i16* [[TMP9]])
15946// CHECK:   ret void
15947void test_vst1_s16_x2(int16_t *a, int16x4x2_t b) {
15948  vst1_s16_x2(a, b);
15949}
15950
15951// CHECK-LABEL: define void @test_vst1_s32_x2(i32* %a, [2 x <2 x i32>] %b.coerce) #0 {
15952// CHECK:   [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
15953// CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
15954// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0
15955// CHECK:   store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8
15956// CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8*
15957// CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8*
15958// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
15959// CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
15960// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
15961// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0
15962// CHECK:   [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
15963// CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
15964// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
15965// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i64 0, i64 1
15966// CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
15967// CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
15968// CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
15969// CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
15970// CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i32*
15971// CHECK:   call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i32* [[TMP9]])
15972// CHECK:   ret void
15973void test_vst1_s32_x2(int32_t *a, int32x2x2_t b) {
15974  vst1_s32_x2(a, b);
15975}
15976
15977// CHECK-LABEL: define void @test_vst1_s64_x2(i64* %a, [2 x <1 x i64>] %b.coerce) #0 {
15978// CHECK:   [[B:%.*]] = alloca %struct.int64x1x2_t, align 8
15979// CHECK:   [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8
15980// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[B]], i32 0, i32 0
15981// CHECK:   store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
15982// CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__S1]] to i8*
15983// CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x1x2_t* [[B]] to i8*
15984// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
15985// CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
15986// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0
15987// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0
15988// CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
15989// CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
15990// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0
15991// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1
15992// CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
15993// CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
15994// CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
15995// CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
15996// CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i64*
15997// CHECK:   call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i64* [[TMP9]])
15998// CHECK:   ret void
15999void test_vst1_s64_x2(int64_t *a, int64x1x2_t b) {
16000  vst1_s64_x2(a, b);
16001}
16002
16003// CHECK-LABEL: define void @test_vst1_f16_x2(half* %a, [2 x <4 x half>] %b.coerce) #0 {
16004// CHECK:   [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
16005// CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
16006// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0
16007// CHECK:   store [2 x <4 x half>] [[B]].coerce, [2 x <4 x half>]* [[COERCE_DIVE]], align 8
16008// CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8*
16009// CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8*
16010// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
16011// CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
16012// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
16013// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i64 0, i64 0
16014// CHECK:   [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
16015// CHECK:   [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
16016// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
16017// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i64 0, i64 1
16018// CHECK:   [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
16019// CHECK:   [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
16020// CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
16021// CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
16022// CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16*
16023// CHECK:   call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i16* [[TMP9]])
16024// CHECK:   ret void
16025void test_vst1_f16_x2(float16_t *a, float16x4x2_t b) {
16026  vst1_f16_x2(a, b);
16027}
16028
16029// CHECK-LABEL: define void @test_vst1_f32_x2(float* %a, [2 x <2 x float>] %b.coerce) #0 {
16030// CHECK:   [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
16031// CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
16032// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0
16033// CHECK:   store [2 x <2 x float>] [[B]].coerce, [2 x <2 x float>]* [[COERCE_DIVE]], align 8
16034// CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8*
16035// CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8*
16036// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
16037// CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
16038// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
16039// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i64 0, i64 0
16040// CHECK:   [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
16041// CHECK:   [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
16042// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
16043// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL1]], i64 0, i64 1
16044// CHECK:   [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
16045// CHECK:   [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
16046// CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
16047// CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
16048// CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to float*
16049// CHECK:   call void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float> [[TMP7]], <2 x float> [[TMP8]], float* [[TMP9]])
16050// CHECK:   ret void
16051void test_vst1_f32_x2(float32_t *a, float32x2x2_t b) {
16052  vst1_f32_x2(a, b);
16053}
16054
16055// CHECK-LABEL: define void @test_vst1_f64_x2(double* %a, [2 x <1 x double>] %b.coerce) #0 {
16056// CHECK:   [[B:%.*]] = alloca %struct.float64x1x2_t, align 8
16057// CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8
16058// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[B]], i32 0, i32 0
16059// CHECK:   store [2 x <1 x double>] [[B]].coerce, [2 x <1 x double>]* [[COERCE_DIVE]], align 8
16060// CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__S1]] to i8*
16061// CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x1x2_t* [[B]] to i8*
16062// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
16063// CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
16064// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0
16065// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL]], i64 0, i64 0
16066// CHECK:   [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
16067// CHECK:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
16068// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0
16069// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL1]], i64 0, i64 1
16070// CHECK:   [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
16071// CHECK:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
16072// CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
16073// CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
16074// CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to double*
16075// CHECK:   call void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double> [[TMP7]], <1 x double> [[TMP8]], double* [[TMP9]])
16076// CHECK:   ret void
16077void test_vst1_f64_x2(float64_t *a, float64x1x2_t b) {
16078  vst1_f64_x2(a, b);
16079}
16080
16081// CHECK-LABEL: define void @test_vst1_p8_x2(i8* %a, [2 x <8 x i8>] %b.coerce) #0 {
16082// CHECK:   [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
16083// CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
16084// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0
16085// CHECK:   store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
16086// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8*
16087// CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8*
16088// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
16089// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
16090// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0
16091// CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
16092// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
16093// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1
16094// CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
16095// CHECK:   call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a)
16096// CHECK:   ret void
16097void test_vst1_p8_x2(poly8_t *a, poly8x8x2_t b) {
16098  vst1_p8_x2(a, b);
16099}
16100
16101// CHECK-LABEL: define void @test_vst1_p16_x2(i16* %a, [2 x <4 x i16>] %b.coerce) #0 {
16102// CHECK:   [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
16103// CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
16104// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0
16105// CHECK:   store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
16106// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8*
16107// CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8*
16108// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
16109// CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
16110// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
16111// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0
16112// CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
16113// CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
16114// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
16115// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1
16116// CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
16117// CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
16118// CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
16119// CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
16120// CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16*
16121// CHECK:   call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i16* [[TMP9]])
16122// CHECK:   ret void
16123void test_vst1_p16_x2(poly16_t *a, poly16x4x2_t b) {
16124  vst1_p16_x2(a, b);
16125}
16126
16127// CHECK-LABEL: define void @test_vst1_p64_x2(i64* %a, [2 x <1 x i64>] %b.coerce) #0 {
16128// CHECK:   [[B:%.*]] = alloca %struct.poly64x1x2_t, align 8
16129// CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x2_t, align 8
16130// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[B]], i32 0, i32 0
16131// CHECK:   store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
16132// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__S1]] to i8*
16133// CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x1x2_t* [[B]] to i8*
16134// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false)
16135// CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
16136// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0
16137// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0
16138// CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
16139// CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
16140// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0
16141// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1
16142// CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
16143// CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
16144// CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
16145// CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
16146// CHECK:   [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i64*
16147// CHECK:   call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i64* [[TMP9]])
16148// CHECK:   ret void
16149void test_vst1_p64_x2(poly64_t *a, poly64x1x2_t b) {
16150  vst1_p64_x2(a, b);
16151}
16152
16153// CHECK-LABEL: define void @test_vst1q_u8_x3(i8* %a, [3 x <16 x i8>] %b.coerce) #0 {
16154// CHECK:   [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16
16155// CHECK:   [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16
16156// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0
16157// CHECK:   store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
16158// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__S1]] to i8*
16159// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x16x3_t* [[B]] to i8*
16160// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
16161// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
16162// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
16163// CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
16164// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
16165// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1
16166// CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
16167// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
16168// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2
16169// CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
16170// CHECK:   call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a)
16171// CHECK:   ret void
16172void test_vst1q_u8_x3(uint8_t *a, uint8x16x3_t b) {
16173  vst1q_u8_x3(a, b);
16174}
16175
16176// CHECK-LABEL: define void @test_vst1q_u16_x3(i16* %a, [3 x <8 x i16>] %b.coerce) #0 {
16177// CHECK:   [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
16178// CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
16179// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0
16180// CHECK:   store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
16181// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8*
16182// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8*
16183// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
16184// CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
16185// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
16186// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
16187// CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
16188// CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
16189// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
16190// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1
16191// CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
16192// CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
16193// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
16194// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2
16195// CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
16196// CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
16197// CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
16198// CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
16199// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
16200// CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16*
16201// CHECK:   call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i16* [[TMP12]])
16202// CHECK:   ret void
16203void test_vst1q_u16_x3(uint16_t *a, uint16x8x3_t b) {
16204  vst1q_u16_x3(a, b);
16205}
16206
16207// CHECK-LABEL: define void @test_vst1q_u32_x3(i32* %a, [3 x <4 x i32>] %b.coerce) #0 {
16208// CHECK:   [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
16209// CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
16210// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0
16211// CHECK:   store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16
16212// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8*
16213// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8*
16214// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
16215// CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
16216// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
16217// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0
16218// CHECK:   [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
16219// CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
16220// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
16221// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i64 0, i64 1
16222// CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
16223// CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
16224// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
16225// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i64 0, i64 2
16226// CHECK:   [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
16227// CHECK:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
16228// CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
16229// CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
16230// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
16231// CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i32*
16232// CHECK:   call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i32* [[TMP12]])
16233// CHECK:   ret void
16234void test_vst1q_u32_x3(uint32_t *a, uint32x4x3_t b) {
16235  vst1q_u32_x3(a, b);
16236}
16237
16238// CHECK-LABEL: define void @test_vst1q_u64_x3(i64* %a, [3 x <2 x i64>] %b.coerce) #0 {
16239// CHECK:   [[B:%.*]] = alloca %struct.uint64x2x3_t, align 16
16240// CHECK:   [[__S1:%.*]] = alloca %struct.uint64x2x3_t, align 16
16241// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[B]], i32 0, i32 0
16242// CHECK:   store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
16243// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__S1]] to i8*
16244// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x2x3_t* [[B]] to i8*
16245// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
16246// CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
16247// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0
16248// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0
16249// CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
16250// CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
16251// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0
16252// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1
16253// CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
16254// CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
16255// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0
16256// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2
16257// CHECK:   [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
16258// CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
16259// CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
16260// CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
16261// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
16262// CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64*
16263// CHECK:   call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i64* [[TMP12]])
16264// CHECK:   ret void
16265void test_vst1q_u64_x3(uint64_t *a, uint64x2x3_t b) {
16266  vst1q_u64_x3(a, b);
16267}
16268
16269// CHECK-LABEL: define void @test_vst1q_s8_x3(i8* %a, [3 x <16 x i8>] %b.coerce) #0 {
16270// CHECK:   [[B:%.*]] = alloca %struct.int8x16x3_t, align 16
16271// CHECK:   [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16
16272// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0
16273// CHECK:   store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
16274// CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__S1]] to i8*
16275// CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x16x3_t* [[B]] to i8*
16276// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
16277// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
16278// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
16279// CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
16280// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
16281// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1
16282// CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
16283// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
16284// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2
16285// CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
16286// CHECK:   call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a)
16287// CHECK:   ret void
16288void test_vst1q_s8_x3(int8_t *a, int8x16x3_t b) {
16289  vst1q_s8_x3(a, b);
16290}
16291
16292// CHECK-LABEL: define void @test_vst1q_s16_x3(i16* %a, [3 x <8 x i16>] %b.coerce) #0 {
16293// CHECK:   [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
16294// CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
16295// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0
16296// CHECK:   store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
16297// CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8*
16298// CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8*
16299// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
16300// CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
16301// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
16302// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
16303// CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
16304// CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
16305// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
16306// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1
16307// CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
16308// CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
16309// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
16310// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2
16311// CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
16312// CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
16313// CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
16314// CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
16315// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
16316// CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16*
16317// CHECK:   call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i16* [[TMP12]])
16318// CHECK:   ret void
16319void test_vst1q_s16_x3(int16_t *a, int16x8x3_t b) {
16320  vst1q_s16_x3(a, b);
16321}
16322
16323// CHECK-LABEL: define void @test_vst1q_s32_x3(i32* %a, [3 x <4 x i32>] %b.coerce) #0 {
16324// CHECK:   [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
16325// CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
16326// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0
16327// CHECK:   store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16
16328// CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8*
16329// CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8*
16330// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
16331// CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
16332// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
16333// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0
16334// CHECK:   [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
16335// CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
16336// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
16337// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i64 0, i64 1
16338// CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
16339// CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
16340// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
16341// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i64 0, i64 2
16342// CHECK:   [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
16343// CHECK:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
16344// CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
16345// CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
16346// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
16347// CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i32*
16348// CHECK:   call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i32* [[TMP12]])
16349// CHECK:   ret void
16350void test_vst1q_s32_x3(int32_t *a, int32x4x3_t b) {
16351  vst1q_s32_x3(a, b);
16352}
16353
16354// CHECK-LABEL: define void @test_vst1q_s64_x3(i64* %a, [3 x <2 x i64>] %b.coerce) #0 {
16355// CHECK:   [[B:%.*]] = alloca %struct.int64x2x3_t, align 16
16356// CHECK:   [[__S1:%.*]] = alloca %struct.int64x2x3_t, align 16
16357// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[B]], i32 0, i32 0
16358// CHECK:   store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
16359// CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__S1]] to i8*
16360// CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x2x3_t* [[B]] to i8*
16361// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
16362// CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
16363// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0
16364// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0
16365// CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
16366// CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
16367// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0
16368// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1
16369// CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
16370// CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
16371// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0
16372// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2
16373// CHECK:   [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
16374// CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
16375// CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
16376// CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
16377// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
16378// CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64*
16379// CHECK:   call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i64* [[TMP12]])
16380// CHECK:   ret void
16381void test_vst1q_s64_x3(int64_t *a, int64x2x3_t b) {
16382  vst1q_s64_x3(a, b);
16383}
16384
16385// CHECK-LABEL: define void @test_vst1q_f16_x3(half* %a, [3 x <8 x half>] %b.coerce) #0 {
16386// CHECK:   [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
16387// CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
16388// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0
16389// CHECK:   store [3 x <8 x half>] [[B]].coerce, [3 x <8 x half>]* [[COERCE_DIVE]], align 16
16390// CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8*
16391// CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8*
16392// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
16393// CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
16394// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
16395// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i64 0, i64 0
16396// CHECK:   [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
16397// CHECK:   [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
16398// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
16399// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i64 0, i64 1
16400// CHECK:   [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
16401// CHECK:   [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
16402// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
16403// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i64 0, i64 2
16404// CHECK:   [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
16405// CHECK:   [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
16406// CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
16407// CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
16408// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
16409// CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16*
16410// CHECK:   call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i16* [[TMP12]])
16411// CHECK:   ret void
16412void test_vst1q_f16_x3(float16_t *a, float16x8x3_t b) {
16413  vst1q_f16_x3(a, b);
16414}
16415
16416// CHECK-LABEL: define void @test_vst1q_f32_x3(float* %a, [3 x <4 x float>] %b.coerce) #0 {
16417// CHECK:   [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
16418// CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
16419// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0
16420// CHECK:   store [3 x <4 x float>] [[B]].coerce, [3 x <4 x float>]* [[COERCE_DIVE]], align 16
16421// CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8*
16422// CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8*
16423// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
16424// CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
16425// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
16426// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i64 0, i64 0
16427// CHECK:   [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
16428// CHECK:   [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
16429// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
16430// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i64 0, i64 1
16431// CHECK:   [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
16432// CHECK:   [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
16433// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
16434// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i64 0, i64 2
16435// CHECK:   [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
16436// CHECK:   [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
16437// CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
16438// CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
16439// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
16440// CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to float*
16441// CHECK:   call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], float* [[TMP12]])
16442// CHECK:   ret void
16443void test_vst1q_f32_x3(float32_t *a, float32x4x3_t b) {
16444  vst1q_f32_x3(a, b);
16445}
16446
16447// CHECK-LABEL: define void @test_vst1q_f64_x3(double* %a, [3 x <2 x double>] %b.coerce) #0 {
16448// CHECK:   [[B:%.*]] = alloca %struct.float64x2x3_t, align 16
16449// CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16
16450// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[B]], i32 0, i32 0
16451// CHECK:   store [3 x <2 x double>] [[B]].coerce, [3 x <2 x double>]* [[COERCE_DIVE]], align 16
16452// CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__S1]] to i8*
16453// CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x2x3_t* [[B]] to i8*
16454// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
16455// CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
16456// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
16457// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL]], i64 0, i64 0
16458// CHECK:   [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
16459// CHECK:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
16460// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
16461// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL1]], i64 0, i64 1
16462// CHECK:   [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
16463// CHECK:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
16464// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0
16465// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL3]], i64 0, i64 2
16466// CHECK:   [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16
16467// CHECK:   [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
16468// CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
16469// CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
16470// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
16471// CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to double*
16472// CHECK:   call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> [[TMP9]], <2 x double> [[TMP10]], <2 x double> [[TMP11]], double* [[TMP12]])
16473// CHECK:   ret void
16474void test_vst1q_f64_x3(float64_t *a, float64x2x3_t b) {
16475  vst1q_f64_x3(a, b);
16476}
16477
16478// CHECK-LABEL: define void @test_vst1q_p8_x3(i8* %a, [3 x <16 x i8>] %b.coerce) #0 {
16479// CHECK:   [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16
16480// CHECK:   [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16
16481// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0
16482// CHECK:   store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
16483// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__S1]] to i8*
16484// CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x16x3_t* [[B]] to i8*
16485// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
16486// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
16487// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0
16488// CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
16489// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
16490// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1
16491// CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
16492// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
16493// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2
16494// CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
16495// CHECK:   call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a)
16496// CHECK:   ret void
16497void test_vst1q_p8_x3(poly8_t *a, poly8x16x3_t b) {
16498  vst1q_p8_x3(a, b);
16499}
16500
16501// CHECK-LABEL: define void @test_vst1q_p16_x3(i16* %a, [3 x <8 x i16>] %b.coerce) #0 {
16502// CHECK:   [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
16503// CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
16504// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0
16505// CHECK:   store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
16506// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8*
16507// CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8*
16508// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
16509// CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
16510// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
16511// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0
16512// CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
16513// CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
16514// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
16515// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1
16516// CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
16517// CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
16518// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
16519// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2
16520// CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
16521// CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
16522// CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
16523// CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
16524// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
16525// CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16*
16526// CHECK:   call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i16* [[TMP12]])
16527// CHECK:   ret void
16528void test_vst1q_p16_x3(poly16_t *a, poly16x8x3_t b) {
16529  vst1q_p16_x3(a, b);
16530}
16531
16532// CHECK-LABEL: define void @test_vst1q_p64_x3(i64* %a, [3 x <2 x i64>] %b.coerce) #0 {
16533// CHECK:   [[B:%.*]] = alloca %struct.poly64x2x3_t, align 16
16534// CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x3_t, align 16
16535// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[B]], i32 0, i32 0
16536// CHECK:   store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
16537// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__S1]] to i8*
16538// CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x2x3_t* [[B]] to i8*
16539// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false)
16540// CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
16541// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0
16542// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0
16543// CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
16544// CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
16545// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0
16546// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1
16547// CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
16548// CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
16549// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0
16550// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2
16551// CHECK:   [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
16552// CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
16553// CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
16554// CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
16555// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
16556// CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64*
16557// CHECK:   call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i64* [[TMP12]])
16558// CHECK:   ret void
16559void test_vst1q_p64_x3(poly64_t *a, poly64x2x3_t b) {
16560  vst1q_p64_x3(a, b);
16561}
16562
16563// CHECK-LABEL: define void @test_vst1_u8_x3(i8* %a, [3 x <8 x i8>] %b.coerce) #0 {
16564// CHECK:   [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
16565// CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
16566// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0
16567// CHECK:   store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
16568// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8*
16569// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8*
16570// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
16571// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
16572// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
16573// CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
16574// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
16575// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1
16576// CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
16577// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
16578// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2
16579// CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
16580// CHECK:   call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a)
16581// CHECK:   ret void
16582void test_vst1_u8_x3(uint8_t *a, uint8x8x3_t b) {
16583  vst1_u8_x3(a, b);
16584}
16585
16586// CHECK-LABEL: define void @test_vst1_u16_x3(i16* %a, [3 x <4 x i16>] %b.coerce) #0 {
16587// CHECK:   [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
16588// CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
16589// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0
16590// CHECK:   store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
16591// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8*
16592// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8*
16593// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
16594// CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
16595// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
16596// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
16597// CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
16598// CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
16599// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
16600// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1
16601// CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
16602// CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
16603// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
16604// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2
16605// CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
16606// CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
16607// CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
16608// CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
16609// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
16610// CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16*
16611// CHECK:   call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i16* [[TMP12]])
16612// CHECK:   ret void
16613void test_vst1_u16_x3(uint16_t *a, uint16x4x3_t b) {
16614  vst1_u16_x3(a, b);
16615}
16616
16617// CHECK-LABEL: define void @test_vst1_u32_x3(i32* %a, [3 x <2 x i32>] %b.coerce) #0 {
16618// CHECK:   [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
16619// CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
16620// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0
16621// CHECK:   store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8
16622// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8*
16623// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8*
16624// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
16625// CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
16626// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
16627// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0
16628// CHECK:   [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
16629// CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
16630// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
16631// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i64 0, i64 1
16632// CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
16633// CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
16634// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
16635// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i64 0, i64 2
16636// CHECK:   [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
16637// CHECK:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
16638// CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
16639// CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
16640// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
16641// CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i32*
16642// CHECK:   call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i32* [[TMP12]])
16643// CHECK:   ret void
16644void test_vst1_u32_x3(uint32_t *a, uint32x2x3_t b) {
16645  vst1_u32_x3(a, b);
16646}
16647
16648// CHECK-LABEL: define void @test_vst1_u64_x3(i64* %a, [3 x <1 x i64>] %b.coerce) #0 {
16649// CHECK:   [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8
16650// CHECK:   [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8
16651// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[B]], i32 0, i32 0
16652// CHECK:   store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
16653// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__S1]] to i8*
16654// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x1x3_t* [[B]] to i8*
16655// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
16656// CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
16657// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
16658// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0
16659// CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
16660// CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
16661// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
16662// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1
16663// CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
16664// CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
16665// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
16666// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2
16667// CHECK:   [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
16668// CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
16669// CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
16670// CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
16671// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
16672// CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64*
16673// CHECK:   call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i64* [[TMP12]])
16674// CHECK:   ret void
16675void test_vst1_u64_x3(uint64_t *a, uint64x1x3_t b) {
16676  vst1_u64_x3(a, b);
16677}
16678
16679// CHECK-LABEL: define void @test_vst1_s8_x3(i8* %a, [3 x <8 x i8>] %b.coerce) #0 {
16680// CHECK:   [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
16681// CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
16682// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0
16683// CHECK:   store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
16684// CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8*
16685// CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8*
16686// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
16687// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
16688// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
16689// CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
16690// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
16691// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1
16692// CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
16693// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
16694// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2
16695// CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
16696// CHECK:   call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a)
16697// CHECK:   ret void
16698void test_vst1_s8_x3(int8_t *a, int8x8x3_t b) {
16699  vst1_s8_x3(a, b);
16700}
16701
16702// CHECK-LABEL: define void @test_vst1_s16_x3(i16* %a, [3 x <4 x i16>] %b.coerce) #0 {
16703// CHECK:   [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
16704// CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
16705// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0
16706// CHECK:   store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
16707// CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8*
16708// CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8*
16709// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
16710// CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
16711// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
16712// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
16713// CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
16714// CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
16715// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
16716// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1
16717// CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
16718// CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
16719// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
16720// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2
16721// CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
16722// CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
16723// CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
16724// CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
16725// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
16726// CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16*
16727// CHECK:   call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i16* [[TMP12]])
16728// CHECK:   ret void
16729void test_vst1_s16_x3(int16_t *a, int16x4x3_t b) {
16730  vst1_s16_x3(a, b);
16731}
16732
16733// CHECK-LABEL: define void @test_vst1_s32_x3(i32* %a, [3 x <2 x i32>] %b.coerce) #0 {
16734// CHECK:   [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
16735// CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
16736// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0
16737// CHECK:   store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8
16738// CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8*
16739// CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8*
16740// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
16741// CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
16742// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
16743// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0
16744// CHECK:   [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
16745// CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
16746// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
16747// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i64 0, i64 1
16748// CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
16749// CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
16750// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
16751// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i64 0, i64 2
16752// CHECK:   [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
16753// CHECK:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
16754// CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
16755// CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
16756// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
16757// CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i32*
16758// CHECK:   call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i32* [[TMP12]])
16759// CHECK:   ret void
16760void test_vst1_s32_x3(int32_t *a, int32x2x3_t b) {
16761  vst1_s32_x3(a, b);
16762}
16763
16764// CHECK-LABEL: define void @test_vst1_s64_x3(i64* %a, [3 x <1 x i64>] %b.coerce) #0 {
16765// CHECK:   [[B:%.*]] = alloca %struct.int64x1x3_t, align 8
16766// CHECK:   [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8
16767// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[B]], i32 0, i32 0
16768// CHECK:   store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
16769// CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__S1]] to i8*
16770// CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x1x3_t* [[B]] to i8*
16771// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
16772// CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
16773// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
16774// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0
16775// CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
16776// CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
16777// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
16778// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1
16779// CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
16780// CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
16781// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
16782// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2
16783// CHECK:   [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
16784// CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
16785// CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
16786// CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
16787// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
16788// CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64*
16789// CHECK:   call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i64* [[TMP12]])
16790// CHECK:   ret void
16791void test_vst1_s64_x3(int64_t *a, int64x1x3_t b) {
16792  vst1_s64_x3(a, b);
16793}
16794
16795// CHECK-LABEL: define void @test_vst1_f16_x3(half* %a, [3 x <4 x half>] %b.coerce) #0 {
16796// CHECK:   [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
16797// CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
16798// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0
16799// CHECK:   store [3 x <4 x half>] [[B]].coerce, [3 x <4 x half>]* [[COERCE_DIVE]], align 8
16800// CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8*
16801// CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8*
16802// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
16803// CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
16804// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
16805// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i64 0, i64 0
16806// CHECK:   [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
16807// CHECK:   [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
16808// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
16809// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL1]], i64 0, i64 1
16810// CHECK:   [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
16811// CHECK:   [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
16812// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
16813// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i64 0, i64 2
16814// CHECK:   [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
16815// CHECK:   [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
16816// CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
16817// CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
16818// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
16819// CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16*
16820// CHECK:   call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i16* [[TMP12]])
16821// CHECK:   ret void
16822void test_vst1_f16_x3(float16_t *a, float16x4x3_t b) {
16823  vst1_f16_x3(a, b);
16824}
16825
16826// CHECK-LABEL: define void @test_vst1_f32_x3(float* %a, [3 x <2 x float>] %b.coerce) #0 {
16827// CHECK:   [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
16828// CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
16829// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0
16830// CHECK:   store [3 x <2 x float>] [[B]].coerce, [3 x <2 x float>]* [[COERCE_DIVE]], align 8
16831// CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8*
16832// CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8*
16833// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
16834// CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
16835// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
16836// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i64 0, i64 0
16837// CHECK:   [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
16838// CHECK:   [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
16839// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
16840// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL1]], i64 0, i64 1
16841// CHECK:   [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
16842// CHECK:   [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
16843// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
16844// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL3]], i64 0, i64 2
16845// CHECK:   [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
16846// CHECK:   [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
16847// CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
16848// CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
16849// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
16850// CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to float*
16851// CHECK:   call void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], float* [[TMP12]])
16852// CHECK:   ret void
16853void test_vst1_f32_x3(float32_t *a, float32x2x3_t b) {
16854  vst1_f32_x3(a, b);
16855}
16856
16857// CHECK-LABEL: define void @test_vst1_f64_x3(double* %a, [3 x <1 x double>] %b.coerce) #0 {
16858// CHECK:   [[B:%.*]] = alloca %struct.float64x1x3_t, align 8
16859// CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8
16860// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[B]], i32 0, i32 0
16861// CHECK:   store [3 x <1 x double>] [[B]].coerce, [3 x <1 x double>]* [[COERCE_DIVE]], align 8
16862// CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__S1]] to i8*
16863// CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x1x3_t* [[B]] to i8*
16864// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
16865// CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
16866// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
16867// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL]], i64 0, i64 0
16868// CHECK:   [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
16869// CHECK:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
16870// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
16871// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL1]], i64 0, i64 1
16872// CHECK:   [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
16873// CHECK:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
16874// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0
16875// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL3]], i64 0, i64 2
16876// CHECK:   [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8
16877// CHECK:   [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
16878// CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
16879// CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
16880// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
16881// CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to double*
16882// CHECK:   call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> [[TMP9]], <1 x double> [[TMP10]], <1 x double> [[TMP11]], double* [[TMP12]])
16883// CHECK:   ret void
16884void test_vst1_f64_x3(float64_t *a, float64x1x3_t b) {
16885  vst1_f64_x3(a, b);
16886}
16887
16888// CHECK-LABEL: define void @test_vst1_p8_x3(i8* %a, [3 x <8 x i8>] %b.coerce) #0 {
16889// CHECK:   [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
16890// CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
16891// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0
16892// CHECK:   store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
16893// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8*
16894// CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8*
16895// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
16896// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
16897// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0
16898// CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
16899// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
16900// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1
16901// CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
16902// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
16903// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2
16904// CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
16905// CHECK:   call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a)
16906// CHECK:   ret void
16907void test_vst1_p8_x3(poly8_t *a, poly8x8x3_t b) {
16908  vst1_p8_x3(a, b);
16909}
16910
16911// CHECK-LABEL: define void @test_vst1_p16_x3(i16* %a, [3 x <4 x i16>] %b.coerce) #0 {
16912// CHECK:   [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
16913// CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
16914// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0
16915// CHECK:   store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
16916// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8*
16917// CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8*
16918// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
16919// CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
16920// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
16921// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0
16922// CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
16923// CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
16924// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
16925// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1
16926// CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
16927// CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
16928// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
16929// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2
16930// CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
16931// CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
16932// CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
16933// CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
16934// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
16935// CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16*
16936// CHECK:   call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i16* [[TMP12]])
16937// CHECK:   ret void
16938void test_vst1_p16_x3(poly16_t *a, poly16x4x3_t b) {
16939  vst1_p16_x3(a, b);
16940}
16941
16942// CHECK-LABEL: define void @test_vst1_p64_x3(i64* %a, [3 x <1 x i64>] %b.coerce) #0 {
16943// CHECK:   [[B:%.*]] = alloca %struct.poly64x1x3_t, align 8
16944// CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x3_t, align 8
16945// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[B]], i32 0, i32 0
16946// CHECK:   store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
16947// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__S1]] to i8*
16948// CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x1x3_t* [[B]] to i8*
16949// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false)
16950// CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
16951// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0
16952// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0
16953// CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
16954// CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
16955// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0
16956// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1
16957// CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
16958// CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
16959// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0
16960// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2
16961// CHECK:   [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
16962// CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
16963// CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
16964// CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
16965// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
16966// CHECK:   [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64*
16967// CHECK:   call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i64* [[TMP12]])
16968// CHECK:   ret void
16969void test_vst1_p64_x3(poly64_t *a, poly64x1x3_t b) {
16970  vst1_p64_x3(a, b);
16971}
16972
16973// CHECK-LABEL: define void @test_vst1q_u8_x4(i8* %a, [4 x <16 x i8>] %b.coerce) #0 {
16974// CHECK:   [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16
16975// CHECK:   [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16
16976// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0
16977// CHECK:   store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
16978// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__S1]] to i8*
16979// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x16x4_t* [[B]] to i8*
16980// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
16981// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
16982// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
16983// CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
16984// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
16985// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1
16986// CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
16987// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
16988// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2
16989// CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
16990// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
16991// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3
16992// CHECK:   [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
16993// CHECK:   call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a)
16994// CHECK:   ret void
16995void test_vst1q_u8_x4(uint8_t *a, uint8x16x4_t b) {
16996  vst1q_u8_x4(a, b);
16997}
16998
16999// CHECK-LABEL: define void @test_vst1q_u16_x4(i16* %a, [4 x <8 x i16>] %b.coerce) #0 {
17000// CHECK:   [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
17001// CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
17002// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0
17003// CHECK:   store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
17004// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8*
17005// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8*
17006// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
17007// CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
17008// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
17009// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
17010// CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
17011// CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
17012// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
17013// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1
17014// CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
17015// CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
17016// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
17017// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2
17018// CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
17019// CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
17020// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
17021// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3
17022// CHECK:   [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
17023// CHECK:   [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
17024// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
17025// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
17026// CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
17027// CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
17028// CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16*
17029// CHECK:   call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i16* [[TMP15]])
17030// CHECK:   ret void
17031void test_vst1q_u16_x4(uint16_t *a, uint16x8x4_t b) {
17032  vst1q_u16_x4(a, b);
17033}
17034
17035// CHECK-LABEL: define void @test_vst1q_u32_x4(i32* %a, [4 x <4 x i32>] %b.coerce) #0 {
17036// CHECK:   [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
17037// CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
17038// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0
17039// CHECK:   store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16
17040// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8*
17041// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8*
17042// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
17043// CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
17044// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
17045// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0
17046// CHECK:   [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
17047// CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
17048// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
17049// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i64 0, i64 1
17050// CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
17051// CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
17052// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
17053// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i64 0, i64 2
17054// CHECK:   [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
17055// CHECK:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
17056// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
17057// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i64 0, i64 3
17058// CHECK:   [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
17059// CHECK:   [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
17060// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
17061// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
17062// CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
17063// CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
17064// CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i32*
17065// CHECK:   call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], i32* [[TMP15]])
17066// CHECK:   ret void
17067void test_vst1q_u32_x4(uint32_t *a, uint32x4x4_t b) {
17068  vst1q_u32_x4(a, b);
17069}
17070
17071// CHECK-LABEL: define void @test_vst1q_u64_x4(i64* %a, [4 x <2 x i64>] %b.coerce) #0 {
17072// CHECK:   [[B:%.*]] = alloca %struct.uint64x2x4_t, align 16
17073// CHECK:   [[__S1:%.*]] = alloca %struct.uint64x2x4_t, align 16
17074// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[B]], i32 0, i32 0
17075// CHECK:   store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
17076// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__S1]] to i8*
17077// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x2x4_t* [[B]] to i8*
17078// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
17079// CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
17080// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
17081// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0
17082// CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
17083// CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
17084// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
17085// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1
17086// CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
17087// CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
17088// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
17089// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2
17090// CHECK:   [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
17091// CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
17092// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0
17093// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3
17094// CHECK:   [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16
17095// CHECK:   [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
17096// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
17097// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
17098// CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
17099// CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
17100// CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64*
17101// CHECK:   call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i64* [[TMP15]])
17102// CHECK:   ret void
17103void test_vst1q_u64_x4(uint64_t *a, uint64x2x4_t b) {
17104  vst1q_u64_x4(a, b);
17105}
17106
17107// CHECK-LABEL: define void @test_vst1q_s8_x4(i8* %a, [4 x <16 x i8>] %b.coerce) #0 {
17108// CHECK:   [[B:%.*]] = alloca %struct.int8x16x4_t, align 16
17109// CHECK:   [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16
17110// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0
17111// CHECK:   store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
17112// CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__S1]] to i8*
17113// CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x16x4_t* [[B]] to i8*
17114// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
17115// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
17116// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
17117// CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
17118// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
17119// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1
17120// CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
17121// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
17122// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2
17123// CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
17124// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
17125// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3
17126// CHECK:   [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
17127// CHECK:   call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a)
17128// CHECK:   ret void
17129void test_vst1q_s8_x4(int8_t *a, int8x16x4_t b) {
17130  vst1q_s8_x4(a, b);
17131}
17132
17133// CHECK-LABEL: define void @test_vst1q_s16_x4(i16* %a, [4 x <8 x i16>] %b.coerce) #0 {
17134// CHECK:   [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
17135// CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
17136// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0
17137// CHECK:   store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
17138// CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8*
17139// CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8*
17140// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
17141// CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
17142// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
17143// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
17144// CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
17145// CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
17146// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
17147// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1
17148// CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
17149// CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
17150// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
17151// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2
17152// CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
17153// CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
17154// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
17155// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3
17156// CHECK:   [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
17157// CHECK:   [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
17158// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
17159// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
17160// CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
17161// CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
17162// CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16*
17163// CHECK:   call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i16* [[TMP15]])
17164// CHECK:   ret void
17165void test_vst1q_s16_x4(int16_t *a, int16x8x4_t b) {
17166  vst1q_s16_x4(a, b);
17167}
17168
17169// CHECK-LABEL: define void @test_vst1q_s32_x4(i32* %a, [4 x <4 x i32>] %b.coerce) #0 {
17170// CHECK:   [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
17171// CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
17172// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0
17173// CHECK:   store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16
17174// CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8*
17175// CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8*
17176// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
17177// CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
17178// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
17179// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0
17180// CHECK:   [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
17181// CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
17182// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
17183// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i64 0, i64 1
17184// CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
17185// CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
17186// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
17187// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i64 0, i64 2
17188// CHECK:   [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
17189// CHECK:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
17190// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
17191// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i64 0, i64 3
17192// CHECK:   [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
17193// CHECK:   [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
17194// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
17195// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
17196// CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
17197// CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
17198// CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i32*
17199// CHECK:   call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], i32* [[TMP15]])
17200// CHECK:   ret void
17201void test_vst1q_s32_x4(int32_t *a, int32x4x4_t b) {
17202  vst1q_s32_x4(a, b);
17203}
17204
17205// CHECK-LABEL: define void @test_vst1q_s64_x4(i64* %a, [4 x <2 x i64>] %b.coerce) #0 {
17206// CHECK:   [[B:%.*]] = alloca %struct.int64x2x4_t, align 16
17207// CHECK:   [[__S1:%.*]] = alloca %struct.int64x2x4_t, align 16
17208// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[B]], i32 0, i32 0
17209// CHECK:   store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
17210// CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__S1]] to i8*
17211// CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x2x4_t* [[B]] to i8*
17212// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
17213// CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
17214// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
17215// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0
17216// CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
17217// CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
17218// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
17219// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1
17220// CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
17221// CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
17222// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
17223// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2
17224// CHECK:   [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
17225// CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
17226// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0
17227// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3
17228// CHECK:   [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16
17229// CHECK:   [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
17230// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
17231// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
17232// CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
17233// CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
17234// CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64*
17235// CHECK:   call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i64* [[TMP15]])
17236// CHECK:   ret void
17237void test_vst1q_s64_x4(int64_t *a, int64x2x4_t b) {
17238  vst1q_s64_x4(a, b);
17239}
17240
17241// CHECK-LABEL: define void @test_vst1q_f16_x4(half* %a, [4 x <8 x half>] %b.coerce) #0 {
17242// CHECK:   [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
17243// CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
17244// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0
17245// CHECK:   store [4 x <8 x half>] [[B]].coerce, [4 x <8 x half>]* [[COERCE_DIVE]], align 16
17246// CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8*
17247// CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8*
17248// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
17249// CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
17250// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
17251// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i64 0, i64 0
17252// CHECK:   [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
17253// CHECK:   [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
17254// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
17255// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i64 0, i64 1
17256// CHECK:   [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
17257// CHECK:   [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
17258// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
17259// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i64 0, i64 2
17260// CHECK:   [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
17261// CHECK:   [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
17262// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
17263// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i64 0, i64 3
17264// CHECK:   [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16
17265// CHECK:   [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8>
17266// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
17267// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
17268// CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
17269// CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
17270// CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16*
17271// CHECK:   call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i16* [[TMP15]])
17272// CHECK:   ret void
17273void test_vst1q_f16_x4(float16_t *a, float16x8x4_t b) {
17274  vst1q_f16_x4(a, b);
17275}
17276
17277// CHECK-LABEL: define void @test_vst1q_f32_x4(float* %a, [4 x <4 x float>] %b.coerce) #0 {
17278// CHECK:   [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
17279// CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
17280// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0
17281// CHECK:   store [4 x <4 x float>] [[B]].coerce, [4 x <4 x float>]* [[COERCE_DIVE]], align 16
17282// CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8*
17283// CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8*
17284// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
17285// CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
17286// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
17287// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i64 0, i64 0
17288// CHECK:   [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
17289// CHECK:   [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
17290// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
17291// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i64 0, i64 1
17292// CHECK:   [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
17293// CHECK:   [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
17294// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
17295// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i64 0, i64 2
17296// CHECK:   [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
17297// CHECK:   [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
17298// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
17299// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i64 0, i64 3
17300// CHECK:   [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16
17301// CHECK:   [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8>
17302// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
17303// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
17304// CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
17305// CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float>
17306// CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to float*
17307// CHECK:   call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], float* [[TMP15]])
17308// CHECK:   ret void
17309void test_vst1q_f32_x4(float32_t *a, float32x4x4_t b) {
17310  vst1q_f32_x4(a, b);
17311}
17312
17313// CHECK-LABEL: define void @test_vst1q_f64_x4(double* %a, [4 x <2 x double>] %b.coerce) #0 {
17314// CHECK:   [[B:%.*]] = alloca %struct.float64x2x4_t, align 16
17315// CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16
17316// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[B]], i32 0, i32 0
17317// CHECK:   store [4 x <2 x double>] [[B]].coerce, [4 x <2 x double>]* [[COERCE_DIVE]], align 16
17318// CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__S1]] to i8*
17319// CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x2x4_t* [[B]] to i8*
17320// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
17321// CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
17322// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
17323// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL]], i64 0, i64 0
17324// CHECK:   [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16
17325// CHECK:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
17326// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
17327// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL1]], i64 0, i64 1
17328// CHECK:   [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16
17329// CHECK:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
17330// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
17331// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL3]], i64 0, i64 2
17332// CHECK:   [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16
17333// CHECK:   [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
17334// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0
17335// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL5]], i64 0, i64 3
17336// CHECK:   [[TMP9:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX6]], align 16
17337// CHECK:   [[TMP10:%.*]] = bitcast <2 x double> [[TMP9]] to <16 x i8>
17338// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
17339// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
17340// CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
17341// CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x double>
17342// CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to double*
17343// CHECK:   call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x double> [[TMP13]], <2 x double> [[TMP14]], double* [[TMP15]])
17344// CHECK:   ret void
17345void test_vst1q_f64_x4(float64_t *a, float64x2x4_t b) {
17346  vst1q_f64_x4(a, b);
17347}
17348
17349// CHECK-LABEL: define void @test_vst1q_p8_x4(i8* %a, [4 x <16 x i8>] %b.coerce) #0 {
17350// CHECK:   [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16
17351// CHECK:   [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16
17352// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0
17353// CHECK:   store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
17354// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__S1]] to i8*
17355// CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x16x4_t* [[B]] to i8*
17356// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
17357// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
17358// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0
17359// CHECK:   [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
17360// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
17361// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1
17362// CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
17363// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
17364// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2
17365// CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
17366// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
17367// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3
17368// CHECK:   [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
17369// CHECK:   call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a)
17370// CHECK:   ret void
17371void test_vst1q_p8_x4(poly8_t *a, poly8x16x4_t b) {
17372  vst1q_p8_x4(a, b);
17373}
17374
17375// CHECK-LABEL: define void @test_vst1q_p16_x4(i16* %a, [4 x <8 x i16>] %b.coerce) #0 {
17376// CHECK:   [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
17377// CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
17378// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0
17379// CHECK:   store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
17380// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8*
17381// CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8*
17382// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
17383// CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
17384// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
17385// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0
17386// CHECK:   [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
17387// CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
17388// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
17389// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1
17390// CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
17391// CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
17392// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
17393// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2
17394// CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
17395// CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
17396// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
17397// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3
17398// CHECK:   [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
17399// CHECK:   [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
17400// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
17401// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
17402// CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
17403// CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
17404// CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16*
17405// CHECK:   call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i16* [[TMP15]])
17406// CHECK:   ret void
17407void test_vst1q_p16_x4(poly16_t *a, poly16x8x4_t b) {
17408  vst1q_p16_x4(a, b);
17409}
17410
17411// CHECK-LABEL: define void @test_vst1q_p64_x4(i64* %a, [4 x <2 x i64>] %b.coerce) #0 {
17412// CHECK:   [[B:%.*]] = alloca %struct.poly64x2x4_t, align 16
17413// CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x4_t, align 16
17414// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[B]], i32 0, i32 0
17415// CHECK:   store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
17416// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__S1]] to i8*
17417// CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x2x4_t* [[B]] to i8*
17418// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false)
17419// CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
17420// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0
17421// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0
17422// CHECK:   [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
17423// CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
17424// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0
17425// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1
17426// CHECK:   [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16
17427// CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
17428// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0
17429// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2
17430// CHECK:   [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16
17431// CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
17432// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0
17433// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3
17434// CHECK:   [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16
17435// CHECK:   [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
17436// CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
17437// CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
17438// CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
17439// CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
17440// CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64*
17441// CHECK:   call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i64* [[TMP15]])
17442// CHECK:   ret void
17443void test_vst1q_p64_x4(poly64_t *a, poly64x2x4_t b) {
17444  vst1q_p64_x4(a, b);
17445}
17446
17447// CHECK-LABEL: define void @test_vst1_u8_x4(i8* %a, [4 x <8 x i8>] %b.coerce) #0 {
17448// CHECK:   [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
17449// CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
17450// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0
17451// CHECK:   store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
17452// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8*
17453// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8*
17454// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
17455// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
17456// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
17457// CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
17458// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
17459// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1
17460// CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
17461// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
17462// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2
17463// CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
17464// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
17465// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3
17466// CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
17467// CHECK:   call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a)
17468// CHECK:   ret void
17469void test_vst1_u8_x4(uint8_t *a, uint8x8x4_t b) {
17470  vst1_u8_x4(a, b);
17471}
17472
17473// CHECK-LABEL: define void @test_vst1_u16_x4(i16* %a, [4 x <4 x i16>] %b.coerce) #0 {
17474// CHECK:   [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
17475// CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
17476// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0
17477// CHECK:   store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
17478// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8*
17479// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8*
17480// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
17481// CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
17482// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
17483// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
17484// CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
17485// CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
17486// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
17487// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1
17488// CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
17489// CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
17490// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
17491// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2
17492// CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
17493// CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
17494// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
17495// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3
17496// CHECK:   [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
17497// CHECK:   [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
17498// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
17499// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
17500// CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
17501// CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
17502// CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16*
17503// CHECK:   call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i16* [[TMP15]])
17504// CHECK:   ret void
17505void test_vst1_u16_x4(uint16_t *a, uint16x4x4_t b) {
17506  vst1_u16_x4(a, b);
17507}
17508
17509// CHECK-LABEL: define void @test_vst1_u32_x4(i32* %a, [4 x <2 x i32>] %b.coerce) #0 {
17510// CHECK:   [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
17511// CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
17512// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0
17513// CHECK:   store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8
17514// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8*
17515// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8*
17516// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
17517// CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
17518// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
17519// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0
17520// CHECK:   [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
17521// CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
17522// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
17523// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i64 0, i64 1
17524// CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
17525// CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
17526// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
17527// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i64 0, i64 2
17528// CHECK:   [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
17529// CHECK:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
17530// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
17531// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i64 0, i64 3
17532// CHECK:   [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
17533// CHECK:   [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
17534// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
17535// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
17536// CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
17537// CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
17538// CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i32*
17539// CHECK:   call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], i32* [[TMP15]])
17540// CHECK:   ret void
17541void test_vst1_u32_x4(uint32_t *a, uint32x2x4_t b) {
17542  vst1_u32_x4(a, b);
17543}
17544
17545// CHECK-LABEL: define void @test_vst1_u64_x4(i64* %a, [4 x <1 x i64>] %b.coerce) #0 {
17546// CHECK:   [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8
17547// CHECK:   [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8
17548// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[B]], i32 0, i32 0
17549// CHECK:   store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
17550// CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__S1]] to i8*
17551// CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x1x4_t* [[B]] to i8*
17552// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
17553// CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
17554// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
17555// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0
17556// CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
17557// CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
17558// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
17559// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1
17560// CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
17561// CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
17562// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
17563// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2
17564// CHECK:   [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
17565// CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
17566// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
17567// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3
17568// CHECK:   [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8
17569// CHECK:   [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
17570// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
17571// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
17572// CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
17573// CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
17574// CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64*
17575// CHECK:   call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i64* [[TMP15]])
17576// CHECK:   ret void
17577void test_vst1_u64_x4(uint64_t *a, uint64x1x4_t b) {
17578  vst1_u64_x4(a, b);
17579}
17580
17581// CHECK-LABEL: define void @test_vst1_s8_x4(i8* %a, [4 x <8 x i8>] %b.coerce) #0 {
17582// CHECK:   [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
17583// CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
17584// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0
17585// CHECK:   store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
17586// CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8*
17587// CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8*
17588// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
17589// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
17590// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
17591// CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
17592// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
17593// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1
17594// CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
17595// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
17596// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2
17597// CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
17598// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
17599// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3
17600// CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
17601// CHECK:   call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a)
17602// CHECK:   ret void
17603void test_vst1_s8_x4(int8_t *a, int8x8x4_t b) {
17604  vst1_s8_x4(a, b);
17605}
17606
17607// CHECK-LABEL: define void @test_vst1_s16_x4(i16* %a, [4 x <4 x i16>] %b.coerce) #0 {
17608// CHECK:   [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
17609// CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
17610// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0
17611// CHECK:   store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
17612// CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8*
17613// CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8*
17614// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
17615// CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
17616// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
17617// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
17618// CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
17619// CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
17620// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
17621// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1
17622// CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
17623// CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
17624// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
17625// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2
17626// CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
17627// CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
17628// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
17629// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3
17630// CHECK:   [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
17631// CHECK:   [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
17632// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
17633// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
17634// CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
17635// CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
17636// CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16*
17637// CHECK:   call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i16* [[TMP15]])
17638// CHECK:   ret void
17639void test_vst1_s16_x4(int16_t *a, int16x4x4_t b) {
17640  vst1_s16_x4(a, b);
17641}
17642
17643// CHECK-LABEL: define void @test_vst1_s32_x4(i32* %a, [4 x <2 x i32>] %b.coerce) #0 {
17644// CHECK:   [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
17645// CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
17646// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0
17647// CHECK:   store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8
17648// CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8*
17649// CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8*
17650// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
17651// CHECK:   [[TMP2:%.*]] = bitcast i32* %a to i8*
17652// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
17653// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0
17654// CHECK:   [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
17655// CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
17656// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
17657// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i64 0, i64 1
17658// CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
17659// CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
17660// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
17661// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i64 0, i64 2
17662// CHECK:   [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
17663// CHECK:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
17664// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
17665// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i64 0, i64 3
17666// CHECK:   [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
17667// CHECK:   [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
17668// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
17669// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
17670// CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
17671// CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
17672// CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i32*
17673// CHECK:   call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], i32* [[TMP15]])
17674// CHECK:   ret void
17675void test_vst1_s32_x4(int32_t *a, int32x2x4_t b) {
17676  vst1_s32_x4(a, b);
17677}
17678
17679// CHECK-LABEL: define void @test_vst1_s64_x4(i64* %a, [4 x <1 x i64>] %b.coerce) #0 {
17680// CHECK:   [[B:%.*]] = alloca %struct.int64x1x4_t, align 8
17681// CHECK:   [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8
17682// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[B]], i32 0, i32 0
17683// CHECK:   store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
17684// CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__S1]] to i8*
17685// CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x1x4_t* [[B]] to i8*
17686// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
17687// CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
17688// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
17689// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0
17690// CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
17691// CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
17692// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
17693// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1
17694// CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
17695// CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
17696// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
17697// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2
17698// CHECK:   [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
17699// CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
17700// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
17701// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3
17702// CHECK:   [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8
17703// CHECK:   [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
17704// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
17705// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
17706// CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
17707// CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
17708// CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64*
17709// CHECK:   call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i64* [[TMP15]])
17710// CHECK:   ret void
17711void test_vst1_s64_x4(int64_t *a, int64x1x4_t b) {
17712  vst1_s64_x4(a, b);
17713}
17714
17715// CHECK-LABEL: define void @test_vst1_f16_x4(half* %a, [4 x <4 x half>] %b.coerce) #0 {
17716// CHECK:   [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
17717// CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
17718// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0
17719// CHECK:   store [4 x <4 x half>] [[B]].coerce, [4 x <4 x half>]* [[COERCE_DIVE]], align 8
17720// CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8*
17721// CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8*
17722// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
17723// CHECK:   [[TMP2:%.*]] = bitcast half* %a to i8*
17724// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
17725// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i64 0, i64 0
17726// CHECK:   [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
17727// CHECK:   [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
17728// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
17729// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL1]], i64 0, i64 1
17730// CHECK:   [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
17731// CHECK:   [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
17732// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
17733// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL3]], i64 0, i64 2
17734// CHECK:   [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
17735// CHECK:   [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
17736// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
17737// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i64 0, i64 3
17738// CHECK:   [[TMP9:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8
17739// CHECK:   [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8>
17740// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
17741// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
17742// CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
17743// CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
17744// CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16*
17745// CHECK:   call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i16* [[TMP15]])
17746// CHECK:   ret void
17747void test_vst1_f16_x4(float16_t *a, float16x4x4_t b) {
17748  vst1_f16_x4(a, b);
17749}
17750
17751// CHECK-LABEL: define void @test_vst1_f32_x4(float* %a, [4 x <2 x float>] %b.coerce) #0 {
17752// CHECK:   [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
17753// CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
17754// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0
17755// CHECK:   store [4 x <2 x float>] [[B]].coerce, [4 x <2 x float>]* [[COERCE_DIVE]], align 8
17756// CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8*
17757// CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8*
17758// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
17759// CHECK:   [[TMP2:%.*]] = bitcast float* %a to i8*
17760// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
17761// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i64 0, i64 0
17762// CHECK:   [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
17763// CHECK:   [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
17764// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
17765// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL1]], i64 0, i64 1
17766// CHECK:   [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
17767// CHECK:   [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
17768// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
17769// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL3]], i64 0, i64 2
17770// CHECK:   [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
17771// CHECK:   [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
17772// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
17773// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL5]], i64 0, i64 3
17774// CHECK:   [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX6]], align 8
17775// CHECK:   [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8>
17776// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
17777// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
17778// CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
17779// CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float>
17780// CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to float*
17781// CHECK:   call void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float> [[TMP11]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], float* [[TMP15]])
17782// CHECK:   ret void
17783void test_vst1_f32_x4(float32_t *a, float32x2x4_t b) {
17784  vst1_f32_x4(a, b);
17785}
17786
17787// CHECK-LABEL: define void @test_vst1_f64_x4(double* %a, [4 x <1 x double>] %b.coerce) #0 {
17788// CHECK:   [[B:%.*]] = alloca %struct.float64x1x4_t, align 8
17789// CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8
17790// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[B]], i32 0, i32 0
17791// CHECK:   store [4 x <1 x double>] [[B]].coerce, [4 x <1 x double>]* [[COERCE_DIVE]], align 8
17792// CHECK:   [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__S1]] to i8*
17793// CHECK:   [[TMP1:%.*]] = bitcast %struct.float64x1x4_t* [[B]] to i8*
17794// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
17795// CHECK:   [[TMP2:%.*]] = bitcast double* %a to i8*
17796// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
17797// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL]], i64 0, i64 0
17798// CHECK:   [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8
17799// CHECK:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
17800// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
17801// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL1]], i64 0, i64 1
17802// CHECK:   [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8
17803// CHECK:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
17804// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
17805// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL3]], i64 0, i64 2
17806// CHECK:   [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8
17807// CHECK:   [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
17808// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0
17809// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL5]], i64 0, i64 3
17810// CHECK:   [[TMP9:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX6]], align 8
17811// CHECK:   [[TMP10:%.*]] = bitcast <1 x double> [[TMP9]] to <8 x i8>
17812// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
17813// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
17814// CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
17815// CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x double>
17816// CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to double*
17817// CHECK:   call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> [[TMP11]], <1 x double> [[TMP12]], <1 x double> [[TMP13]], <1 x double> [[TMP14]], double* [[TMP15]])
17818// CHECK:   ret void
17819void test_vst1_f64_x4(float64_t *a, float64x1x4_t b) {
17820  vst1_f64_x4(a, b);
17821}
17822
17823// CHECK-LABEL: define void @test_vst1_p8_x4(i8* %a, [4 x <8 x i8>] %b.coerce) #0 {
17824// CHECK:   [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
17825// CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
17826// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0
17827// CHECK:   store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
17828// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8*
17829// CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8*
17830// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
17831// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
17832// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0
17833// CHECK:   [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
17834// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
17835// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1
17836// CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
17837// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
17838// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2
17839// CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
17840// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
17841// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3
17842// CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
17843// CHECK:   call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a)
17844// CHECK:   ret void
17845void test_vst1_p8_x4(poly8_t *a, poly8x8x4_t b) {
17846  vst1_p8_x4(a, b);
17847}
17848
17849// CHECK-LABEL: define void @test_vst1_p16_x4(i16* %a, [4 x <4 x i16>] %b.coerce) #0 {
17850// CHECK:   [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
17851// CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
17852// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0
17853// CHECK:   store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
17854// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8*
17855// CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8*
17856// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
17857// CHECK:   [[TMP2:%.*]] = bitcast i16* %a to i8*
17858// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
17859// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0
17860// CHECK:   [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
17861// CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
17862// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
17863// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1
17864// CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
17865// CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
17866// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
17867// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2
17868// CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
17869// CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
17870// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
17871// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3
17872// CHECK:   [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
17873// CHECK:   [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
17874// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
17875// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
17876// CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
17877// CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
17878// CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16*
17879// CHECK:   call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i16* [[TMP15]])
17880// CHECK:   ret void
17881void test_vst1_p16_x4(poly16_t *a, poly16x4x4_t b) {
17882  vst1_p16_x4(a, b);
17883}
17884
17885// CHECK-LABEL: define void @test_vst1_p64_x4(i64* %a, [4 x <1 x i64>] %b.coerce) #0 {
17886// CHECK:   [[B:%.*]] = alloca %struct.poly64x1x4_t, align 8
17887// CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x4_t, align 8
17888// CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[B]], i32 0, i32 0
17889// CHECK:   store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
17890// CHECK:   [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__S1]] to i8*
17891// CHECK:   [[TMP1:%.*]] = bitcast %struct.poly64x1x4_t* [[B]] to i8*
17892// CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false)
17893// CHECK:   [[TMP2:%.*]] = bitcast i64* %a to i8*
17894// CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0
17895// CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0
17896// CHECK:   [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
17897// CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
17898// CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0
17899// CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1
17900// CHECK:   [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
17901// CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
17902// CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0
17903// CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2
17904// CHECK:   [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
17905// CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
17906// CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0
17907// CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3
17908// CHECK:   [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8
17909// CHECK:   [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
17910// CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
17911// CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
17912// CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
17913// CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
17914// CHECK:   [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64*
17915// CHECK:   call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i64* [[TMP15]])
17916// CHECK:   ret void
17917void test_vst1_p64_x4(poly64_t *a, poly64x1x4_t b) {
17918  vst1_p64_x4(a, b);
17919}
17920
17921// CHECK-LABEL: define i64 @test_vceqd_s64(i64 %a, i64 %b) #0 {
17922// CHECK:   [[TMP0:%.*]] = icmp eq i64 %a, %b
17923// CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
17924// CHECK:   ret i64 [[VCEQD_I]]
17925int64_t test_vceqd_s64(int64_t a, int64_t b) {
17926  return (int64_t)vceqd_s64(a, b);
17927}
17928
17929// CHECK-LABEL: define i64 @test_vceqd_u64(i64 %a, i64 %b) #0 {
17930// CHECK:   [[TMP0:%.*]] = icmp eq i64 %a, %b
17931// CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
17932// CHECK:   ret i64 [[VCEQD_I]]
17933uint64_t test_vceqd_u64(uint64_t a, uint64_t b) {
17934  return (int64_t)vceqd_u64(a, b);
17935}
17936
17937// CHECK-LABEL: define i64 @test_vceqzd_s64(i64 %a) #0 {
17938// CHECK:   [[TMP0:%.*]] = icmp eq i64 %a, 0
17939// CHECK:   [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i64
17940// CHECK:   ret i64 [[VCEQZ_I]]
17941int64_t test_vceqzd_s64(int64_t a) {
17942  return (int64_t)vceqzd_s64(a);
17943}
17944
17945// CHECK-LABEL: define i64 @test_vceqzd_u64(i64 %a) #0 {
17946// CHECK:   [[TMP0:%.*]] = icmp eq i64 %a, 0
17947// CHECK:   [[VCEQZD_I:%.*]] = sext i1 [[TMP0]] to i64
17948// CHECK:   ret i64 [[VCEQZD_I]]
17949int64_t test_vceqzd_u64(int64_t a) {
17950  return (int64_t)vceqzd_u64(a);
17951}
17952
17953// CHECK-LABEL: define i64 @test_vcged_s64(i64 %a, i64 %b) #0 {
17954// CHECK:   [[TMP0:%.*]] = icmp sge i64 %a, %b
17955// CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
17956// CHECK:   ret i64 [[VCEQD_I]]
17957int64_t test_vcged_s64(int64_t a, int64_t b) {
17958  return (int64_t)vcged_s64(a, b);
17959}
17960
17961// CHECK-LABEL: define i64 @test_vcged_u64(i64 %a, i64 %b) #0 {
17962// CHECK:   [[TMP0:%.*]] = icmp uge i64 %a, %b
17963// CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
17964// CHECK:   ret i64 [[VCEQD_I]]
17965uint64_t test_vcged_u64(uint64_t a, uint64_t b) {
17966    return (uint64_t)vcged_u64(a, b);
17967}
17968
17969// CHECK-LABEL: define i64 @test_vcgezd_s64(i64 %a) #0 {
17970// CHECK:   [[TMP0:%.*]] = icmp sge i64 %a, 0
17971// CHECK:   [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i64
17972// CHECK:   ret i64 [[VCGEZ_I]]
17973int64_t test_vcgezd_s64(int64_t a) {
17974  return (int64_t)vcgezd_s64(a);
17975}
17976
17977// CHECK-LABEL: define i64 @test_vcgtd_s64(i64 %a, i64 %b) #0 {
17978// CHECK:   [[TMP0:%.*]] = icmp sgt i64 %a, %b
17979// CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
17980// CHECK:   ret i64 [[VCEQD_I]]
17981int64_t test_vcgtd_s64(int64_t a, int64_t b) {
17982  return (int64_t)vcgtd_s64(a, b);
17983}
17984
17985// CHECK-LABEL: define i64 @test_vcgtd_u64(i64 %a, i64 %b) #0 {
17986// CHECK:   [[TMP0:%.*]] = icmp ugt i64 %a, %b
17987// CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
17988// CHECK:   ret i64 [[VCEQD_I]]
17989uint64_t test_vcgtd_u64(uint64_t a, uint64_t b) {
17990  return (uint64_t)vcgtd_u64(a, b);
17991}
17992
17993// CHECK-LABEL: define i64 @test_vcgtzd_s64(i64 %a) #0 {
17994// CHECK:   [[TMP0:%.*]] = icmp sgt i64 %a, 0
17995// CHECK:   [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i64
17996// CHECK:   ret i64 [[VCGTZ_I]]
17997int64_t test_vcgtzd_s64(int64_t a) {
17998  return (int64_t)vcgtzd_s64(a);
17999}
18000
18001// CHECK-LABEL: define i64 @test_vcled_s64(i64 %a, i64 %b) #0 {
18002// CHECK:   [[TMP0:%.*]] = icmp sle i64 %a, %b
18003// CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
18004// CHECK:   ret i64 [[VCEQD_I]]
18005int64_t test_vcled_s64(int64_t a, int64_t b) {
18006  return (int64_t)vcled_s64(a, b);
18007}
18008
18009// CHECK-LABEL: define i64 @test_vcled_u64(i64 %a, i64 %b) #0 {
18010// CHECK:   [[TMP0:%.*]] = icmp ule i64 %a, %b
18011// CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
18012// CHECK:   ret i64 [[VCEQD_I]]
18013uint64_t test_vcled_u64(uint64_t a, uint64_t b) {
18014  return (uint64_t)vcled_u64(a, b);
18015}
18016
18017// CHECK-LABEL: define i64 @test_vclezd_s64(i64 %a) #0 {
18018// CHECK:   [[TMP0:%.*]] = icmp sle i64 %a, 0
18019// CHECK:   [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i64
18020// CHECK:   ret i64 [[VCLEZ_I]]
18021int64_t test_vclezd_s64(int64_t a) {
18022  return (int64_t)vclezd_s64(a);
18023}
18024
18025// CHECK-LABEL: define i64 @test_vcltd_s64(i64 %a, i64 %b) #0 {
18026// CHECK:   [[TMP0:%.*]] = icmp slt i64 %a, %b
18027// CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
18028// CHECK:   ret i64 [[VCEQD_I]]
18029int64_t test_vcltd_s64(int64_t a, int64_t b) {
18030  return (int64_t)vcltd_s64(a, b);
18031}
18032
18033// CHECK-LABEL: define i64 @test_vcltd_u64(i64 %a, i64 %b) #0 {
18034// CHECK:   [[TMP0:%.*]] = icmp ult i64 %a, %b
18035// CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
18036// CHECK:   ret i64 [[VCEQD_I]]
18037uint64_t test_vcltd_u64(uint64_t a, uint64_t b) {
18038  return (uint64_t)vcltd_u64(a, b);
18039}
18040
18041// CHECK-LABEL: define i64 @test_vcltzd_s64(i64 %a) #0 {
18042// CHECK:   [[TMP0:%.*]] = icmp slt i64 %a, 0
18043// CHECK:   [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i64
18044// CHECK:   ret i64 [[VCLTZ_I]]
18045int64_t test_vcltzd_s64(int64_t a) {
18046  return (int64_t)vcltzd_s64(a);
18047}
18048
18049// CHECK-LABEL: define i64 @test_vtstd_s64(i64 %a, i64 %b) #0 {
18050// CHECK:   [[TMP0:%.*]] = and i64 %a, %b
18051// CHECK:   [[TMP1:%.*]] = icmp ne i64 [[TMP0]], 0
18052// CHECK:   [[VTSTD_I:%.*]] = sext i1 [[TMP1]] to i64
18053// CHECK:   ret i64 [[VTSTD_I]]
18054int64_t test_vtstd_s64(int64_t a, int64_t b) {
18055  return (int64_t)vtstd_s64(a, b);
18056}
18057
18058// CHECK-LABEL: define i64 @test_vtstd_u64(i64 %a, i64 %b) #0 {
18059// CHECK:   [[TMP0:%.*]] = and i64 %a, %b
18060// CHECK:   [[TMP1:%.*]] = icmp ne i64 [[TMP0]], 0
18061// CHECK:   [[VTSTD_I:%.*]] = sext i1 [[TMP1]] to i64
18062// CHECK:   ret i64 [[VTSTD_I]]
18063uint64_t test_vtstd_u64(uint64_t a, uint64_t b) {
18064  return (uint64_t)vtstd_u64(a, b);
18065}
18066
18067// CHECK-LABEL: define i64 @test_vabsd_s64(i64 %a) #0 {
18068// CHECK:   [[VABSD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.abs.i64(i64 %a) #4
18069// CHECK:   ret i64 [[VABSD_S64_I]]
18070int64_t test_vabsd_s64(int64_t a) {
18071  return (int64_t)vabsd_s64(a);
18072}
18073
18074// CHECK-LABEL: define i8 @test_vqabsb_s8(i8 %a) #0 {
18075// CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
18076// CHECK:   [[VQABSB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqabs.v8i8(<8 x i8> [[TMP0]]) #4
18077// CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQABSB_S8_I]], i64 0
18078// CHECK:   ret i8 [[TMP1]]
18079int8_t test_vqabsb_s8(int8_t a) {
18080  return (int8_t)vqabsb_s8(a);
18081}
18082
18083// CHECK-LABEL: define i16 @test_vqabsh_s16(i16 %a) #0 {
18084// CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
18085// CHECK:   [[VQABSH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16> [[TMP0]]) #4
18086// CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQABSH_S16_I]], i64 0
18087// CHECK:   ret i16 [[TMP1]]
18088int16_t test_vqabsh_s16(int16_t a) {
18089  return (int16_t)vqabsh_s16(a);
18090}
18091
18092// CHECK-LABEL: define i32 @test_vqabss_s32(i32 %a) #0 {
18093// CHECK:   [[VQABSS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a) #4
18094// CHECK:   ret i32 [[VQABSS_S32_I]]
18095int32_t test_vqabss_s32(int32_t a) {
18096  return (int32_t)vqabss_s32(a);
18097}
18098
18099// CHECK-LABEL: define i64 @test_vqabsd_s64(i64 %a) #0 {
18100// CHECK:   [[VQABSD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqabs.i64(i64 %a) #4
18101// CHECK:   ret i64 [[VQABSD_S64_I]]
18102int64_t test_vqabsd_s64(int64_t a) {
18103  return (int64_t)vqabsd_s64(a);
18104}
18105
18106// CHECK-LABEL: define i64 @test_vnegd_s64(i64 %a) #0 {
18107// CHECK:   [[VNEGD_I:%.*]] = sub i64 0, %a
18108// CHECK:   ret i64 [[VNEGD_I]]
18109int64_t test_vnegd_s64(int64_t a) {
18110  return (int64_t)vnegd_s64(a);
18111}
18112
18113// CHECK-LABEL: define i8 @test_vqnegb_s8(i8 %a) #0 {
18114// CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
18115// CHECK:   [[VQNEGB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqneg.v8i8(<8 x i8> [[TMP0]]) #4
18116// CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQNEGB_S8_I]], i64 0
18117// CHECK:   ret i8 [[TMP1]]
18118int8_t test_vqnegb_s8(int8_t a) {
18119  return (int8_t)vqnegb_s8(a);
18120}
18121
18122// CHECK-LABEL: define i16 @test_vqnegh_s16(i16 %a) #0 {
18123// CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
18124// CHECK:   [[VQNEGH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16> [[TMP0]]) #4
18125// CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQNEGH_S16_I]], i64 0
18126// CHECK:   ret i16 [[TMP1]]
18127int16_t test_vqnegh_s16(int16_t a) {
18128  return (int16_t)vqnegh_s16(a);
18129}
18130
18131// CHECK-LABEL: define i32 @test_vqnegs_s32(i32 %a) #0 {
18132// CHECK:   [[VQNEGS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqneg.i32(i32 %a) #4
18133// CHECK:   ret i32 [[VQNEGS_S32_I]]
18134int32_t test_vqnegs_s32(int32_t a) {
18135  return (int32_t)vqnegs_s32(a);
18136}
18137
18138// CHECK-LABEL: define i64 @test_vqnegd_s64(i64 %a) #0 {
18139// CHECK:   [[VQNEGD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqneg.i64(i64 %a) #4
18140// CHECK:   ret i64 [[VQNEGD_S64_I]]
18141int64_t test_vqnegd_s64(int64_t a) {
18142  return (int64_t)vqnegd_s64(a);
18143}
18144
18145// CHECK-LABEL: define i8 @test_vuqaddb_s8(i8 %a, i8 %b) #0 {
18146// CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
18147// CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
18148// CHECK:   [[VUQADDB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4
18149// CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VUQADDB_S8_I]], i64 0
18150// CHECK:   ret i8 [[TMP2]]
18151int8_t test_vuqaddb_s8(int8_t a, int8_t b) {
18152  return (int8_t)vuqaddb_s8(a, b);
18153}
18154
18155// CHECK-LABEL: define i16 @test_vuqaddh_s16(i16 %a, i16 %b) #0 {
18156// CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
18157// CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
18158// CHECK:   [[VUQADDH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
18159// CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VUQADDH_S16_I]], i64 0
18160// CHECK:   ret i16 [[TMP2]]
18161int16_t test_vuqaddh_s16(int16_t a, int16_t b) {
18162  return (int16_t)vuqaddh_s16(a, b);
18163}
18164
18165// CHECK-LABEL: define i32 @test_vuqadds_s32(i32 %a, i32 %b) #0 {
18166// CHECK:   [[VUQADDS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.suqadd.i32(i32 %a, i32 %b) #4
18167// CHECK:   ret i32 [[VUQADDS_S32_I]]
18168int32_t test_vuqadds_s32(int32_t a, int32_t b) {
18169  return (int32_t)vuqadds_s32(a, b);
18170}
18171
18172// CHECK-LABEL: define i64 @test_vuqaddd_s64(i64 %a, i64 %b) #0 {
18173// CHECK:   [[VUQADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.suqadd.i64(i64 %a, i64 %b) #4
18174// CHECK:   ret i64 [[VUQADDD_S64_I]]
18175int64_t test_vuqaddd_s64(int64_t a, int64_t b) {
18176  return (int64_t)vuqaddd_s64(a, b);
18177}
18178
18179// CHECK-LABEL: define i8 @test_vsqaddb_u8(i8 %a, i8 %b) #0 {
18180// CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
18181// CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0
18182// CHECK:   [[VSQADDB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4
18183// CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VSQADDB_U8_I]], i64 0
18184// CHECK:   ret i8 [[TMP2]]
18185uint8_t test_vsqaddb_u8(uint8_t a, uint8_t b) {
18186  return (uint8_t)vsqaddb_u8(a, b);
18187}
18188
18189// CHECK-LABEL: define i16 @test_vsqaddh_u16(i16 %a, i16 %b) #0 {
18190// CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
18191// CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
18192// CHECK:   [[VSQADDH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
18193// CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VSQADDH_U16_I]], i64 0
18194// CHECK:   ret i16 [[TMP2]]
18195uint16_t test_vsqaddh_u16(uint16_t a, uint16_t b) {
18196  return (uint16_t)vsqaddh_u16(a, b);
18197}
18198
18199// CHECK-LABEL: define i32 @test_vsqadds_u32(i32 %a, i32 %b) #0 {
18200// CHECK:   [[VSQADDS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.usqadd.i32(i32 %a, i32 %b) #4
18201// CHECK:   ret i32 [[VSQADDS_U32_I]]
18202uint32_t test_vsqadds_u32(uint32_t a, uint32_t b) {
18203  return (uint32_t)vsqadds_u32(a, b);
18204}
18205
18206// CHECK-LABEL: define i64 @test_vsqaddd_u64(i64 %a, i64 %b) #0 {
18207// CHECK:   [[VSQADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.usqadd.i64(i64 %a, i64 %b) #4
18208// CHECK:   ret i64 [[VSQADDD_U64_I]]
18209uint64_t test_vsqaddd_u64(uint64_t a, uint64_t b) {
18210  return (uint64_t)vsqaddd_u64(a, b);
18211}
18212
18213// CHECK-LABEL: define i32 @test_vqdmlalh_s16(i32 %a, i16 %b, i16 %c) #0 {
18214// CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
18215// CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %c, i64 0
18216// CHECK:   [[VQDMLXL_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
18217// CHECK:   [[LANE0_I:%.*]] = extractelement <4 x i32> [[VQDMLXL_I]], i64 0
18218// CHECK:   [[VQDMLXL1_I:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 [[LANE0_I]]) #4
18219// CHECK:   ret i32 [[VQDMLXL1_I]]
18220int32_t test_vqdmlalh_s16(int32_t a, int16_t b, int16_t c) {
18221
18222  return (int32_t)vqdmlalh_s16(a, b, c);
18223}
18224
18225// CHECK-LABEL: define i64 @test_vqdmlals_s32(i64 %a, i32 %b, i32 %c) #0 {
18226// CHECK:   [[VQDMLXL_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 %c) #4
18227// CHECK:   [[VQDMLXL1_I:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 [[VQDMLXL_I]]) #4
18228// CHECK:   ret i64 [[VQDMLXL1_I]]
18229int64_t test_vqdmlals_s32(int64_t a, int32_t b, int32_t c) {
18230  return (int64_t)vqdmlals_s32(a, b, c);
18231}
18232
18233// CHECK-LABEL: define i32 @test_vqdmlslh_s16(i32 %a, i16 %b, i16 %c) #0 {
18234// CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
18235// CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %c, i64 0
18236// CHECK:   [[VQDMLXL_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
18237// CHECK:   [[LANE0_I:%.*]] = extractelement <4 x i32> [[VQDMLXL_I]], i64 0
18238// CHECK:   [[VQDMLXL1_I:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 [[LANE0_I]]) #4
18239// CHECK:   ret i32 [[VQDMLXL1_I]]
18240int32_t test_vqdmlslh_s16(int32_t a, int16_t b, int16_t c) {
18241
18242  return (int32_t)vqdmlslh_s16(a, b, c);
18243}
18244
18245// CHECK-LABEL: define i64 @test_vqdmlsls_s32(i64 %a, i32 %b, i32 %c) #0 {
18246// CHECK:   [[VQDMLXL_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 %c) #4
18247// CHECK:   [[VQDMLXL1_I:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 [[VQDMLXL_I]]) #4
18248// CHECK:   ret i64 [[VQDMLXL1_I]]
18249int64_t test_vqdmlsls_s32(int64_t a, int32_t b, int32_t c) {
18250  return (int64_t)vqdmlsls_s32(a, b, c);
18251}
18252
18253// CHECK-LABEL: define i32 @test_vqdmullh_s16(i16 %a, i16 %b) #0 {
18254// CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
18255// CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
18256// CHECK:   [[VQDMULLH_S16_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4
18257// CHECK:   [[TMP2:%.*]] = extractelement <4 x i32> [[VQDMULLH_S16_I]], i64 0
18258// CHECK:   ret i32 [[TMP2]]
18259int32_t test_vqdmullh_s16(int16_t a, int16_t b) {
18260  return (int32_t)vqdmullh_s16(a, b);
18261}
18262
18263// CHECK-LABEL: define i64 @test_vqdmulls_s32(i32 %a, i32 %b) #0 {
18264// CHECK:   [[VQDMULLS_S32_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %a, i32 %b) #4
18265// CHECK:   ret i64 [[VQDMULLS_S32_I]]
18266int64_t test_vqdmulls_s32(int32_t a, int32_t b) {
18267  return (int64_t)vqdmulls_s32(a, b);
18268}
18269
18270// CHECK-LABEL: define i8 @test_vqmovunh_s16(i16 %a) #0 {
18271// CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
18272// CHECK:   [[VQMOVUNH_S16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> [[TMP0]]) #4
18273// CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVUNH_S16_I]], i64 0
18274// CHECK:   ret i8 [[TMP1]]
18275int8_t test_vqmovunh_s16(int16_t a) {
18276  return (int8_t)vqmovunh_s16(a);
18277}
18278
18279// CHECK-LABEL: define i16 @test_vqmovuns_s32(i32 %a) #0 {
18280// CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
18281// CHECK:   [[VQMOVUNS_S32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[TMP0]]) #4
18282// CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVUNS_S32_I]], i64 0
18283// CHECK:   ret i16 [[TMP1]]
18284int16_t test_vqmovuns_s32(int32_t a) {
18285  return (int16_t)vqmovuns_s32(a);
18286}
18287
18288// CHECK-LABEL: define i32 @test_vqmovund_s64(i64 %a) #0 {
18289// CHECK:   [[VQMOVUND_S64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.sqxtun.i32.i64(i64 %a) #4
18290// CHECK:   ret i32 [[VQMOVUND_S64_I]]
18291int32_t test_vqmovund_s64(int64_t a) {
18292  return (int32_t)vqmovund_s64(a);
18293}
18294
18295// CHECK-LABEL: define i8 @test_vqmovnh_s16(i16 %a) #0 {
18296// CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
18297// CHECK:   [[VQMOVNH_S16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> [[TMP0]]) #4
18298// CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVNH_S16_I]], i64 0
18299// CHECK:   ret i8 [[TMP1]]
18300int8_t test_vqmovnh_s16(int16_t a) {
18301  return (int8_t)vqmovnh_s16(a);
18302}
18303
18304// CHECK-LABEL: define i16 @test_vqmovns_s32(i32 %a) #0 {
18305// CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
18306// CHECK:   [[VQMOVNS_S32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> [[TMP0]]) #4
18307// CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVNS_S32_I]], i64 0
18308// CHECK:   ret i16 [[TMP1]]
18309int16_t test_vqmovns_s32(int32_t a) {
18310  return (int16_t)vqmovns_s32(a);
18311}
18312
18313// CHECK-LABEL: define i32 @test_vqmovnd_s64(i64 %a) #0 {
18314// CHECK:   [[VQMOVND_S64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64 %a) #4
18315// CHECK:   ret i32 [[VQMOVND_S64_I]]
18316int32_t test_vqmovnd_s64(int64_t a) {
18317  return (int32_t)vqmovnd_s64(a);
18318}
18319
18320// CHECK-LABEL: define i8 @test_vqmovnh_u16(i16 %a) #0 {
18321// CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
18322// CHECK:   [[VQMOVNH_U16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> [[TMP0]]) #4
18323// CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVNH_U16_I]], i64 0
18324// CHECK:   ret i8 [[TMP1]]
18325int8_t test_vqmovnh_u16(int16_t a) {
18326  return (int8_t)vqmovnh_u16(a);
18327}
18328
18329// CHECK-LABEL: define i16 @test_vqmovns_u32(i32 %a) #0 {
18330// CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
18331// CHECK:   [[VQMOVNS_U32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> [[TMP0]]) #4
18332// CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVNS_U32_I]], i64 0
18333// CHECK:   ret i16 [[TMP1]]
18334int16_t test_vqmovns_u32(int32_t a) {
18335  return (int16_t)vqmovns_u32(a);
18336}
18337
18338// CHECK-LABEL: define i32 @test_vqmovnd_u64(i64 %a) #0 {
18339// CHECK:   [[VQMOVND_U64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.uqxtn.i32.i64(i64 %a) #4
18340// CHECK:   ret i32 [[VQMOVND_U64_I]]
18341int32_t test_vqmovnd_u64(int64_t a) {
18342  return (int32_t)vqmovnd_u64(a);
18343}
18344
18345// CHECK-LABEL: define i32 @test_vceqs_f32(float %a, float %b) #0 {
18346// CHECK:   [[TMP0:%.*]] = fcmp oeq float %a, %b
18347// CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
18348// CHECK:   ret i32 [[VCMPD_I]]
18349uint32_t test_vceqs_f32(float32_t a, float32_t b) {
18350  return (uint32_t)vceqs_f32(a, b);
18351}
18352
18353// CHECK-LABEL: define i64 @test_vceqd_f64(double %a, double %b) #0 {
18354// CHECK:   [[TMP0:%.*]] = fcmp oeq double %a, %b
18355// CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
18356// CHECK:   ret i64 [[VCMPD_I]]
18357uint64_t test_vceqd_f64(float64_t a, float64_t b) {
18358  return (uint64_t)vceqd_f64(a, b);
18359}
18360
18361// CHECK-LABEL: define i32 @test_vceqzs_f32(float %a) #0 {
18362// CHECK:   [[TMP0:%.*]] = fcmp oeq float %a, 0.000000e+00
18363// CHECK:   [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i32
18364// CHECK:   ret i32 [[VCEQZ_I]]
18365uint32_t test_vceqzs_f32(float32_t a) {
18366  return (uint32_t)vceqzs_f32(a);
18367}
18368
18369// CHECK-LABEL: define i64 @test_vceqzd_f64(double %a) #0 {
18370// CHECK:   [[TMP0:%.*]] = fcmp oeq double %a, 0.000000e+00
18371// CHECK:   [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i64
18372// CHECK:   ret i64 [[VCEQZ_I]]
18373uint64_t test_vceqzd_f64(float64_t a) {
18374  return (uint64_t)vceqzd_f64(a);
18375}
18376
18377// CHECK-LABEL: define i32 @test_vcges_f32(float %a, float %b) #0 {
18378// CHECK:   [[TMP0:%.*]] = fcmp oge float %a, %b
18379// CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
18380// CHECK:   ret i32 [[VCMPD_I]]
18381uint32_t test_vcges_f32(float32_t a, float32_t b) {
18382  return (uint32_t)vcges_f32(a, b);
18383}
18384
18385// CHECK-LABEL: define i64 @test_vcged_f64(double %a, double %b) #0 {
18386// CHECK:   [[TMP0:%.*]] = fcmp oge double %a, %b
18387// CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
18388// CHECK:   ret i64 [[VCMPD_I]]
18389uint64_t test_vcged_f64(float64_t a, float64_t b) {
18390  return (uint64_t)vcged_f64(a, b);
18391}
18392
18393// CHECK-LABEL: define i32 @test_vcgezs_f32(float %a) #0 {
18394// CHECK:   [[TMP0:%.*]] = fcmp oge float %a, 0.000000e+00
18395// CHECK:   [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i32
18396// CHECK:   ret i32 [[VCGEZ_I]]
18397uint32_t test_vcgezs_f32(float32_t a) {
18398  return (uint32_t)vcgezs_f32(a);
18399}
18400
18401// CHECK-LABEL: define i64 @test_vcgezd_f64(double %a) #0 {
18402// CHECK:   [[TMP0:%.*]] = fcmp oge double %a, 0.000000e+00
18403// CHECK:   [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i64
18404// CHECK:   ret i64 [[VCGEZ_I]]
18405uint64_t test_vcgezd_f64(float64_t a) {
18406  return (uint64_t)vcgezd_f64(a);
18407}
18408
18409// CHECK-LABEL: define i32 @test_vcgts_f32(float %a, float %b) #0 {
18410// CHECK:   [[TMP0:%.*]] = fcmp ogt float %a, %b
18411// CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
18412// CHECK:   ret i32 [[VCMPD_I]]
18413uint32_t test_vcgts_f32(float32_t a, float32_t b) {
18414  return (uint32_t)vcgts_f32(a, b);
18415}
18416
18417// CHECK-LABEL: define i64 @test_vcgtd_f64(double %a, double %b) #0 {
18418// CHECK:   [[TMP0:%.*]] = fcmp ogt double %a, %b
18419// CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
18420// CHECK:   ret i64 [[VCMPD_I]]
18421uint64_t test_vcgtd_f64(float64_t a, float64_t b) {
18422  return (uint64_t)vcgtd_f64(a, b);
18423}
18424
18425// CHECK-LABEL: define i32 @test_vcgtzs_f32(float %a) #0 {
18426// CHECK:   [[TMP0:%.*]] = fcmp ogt float %a, 0.000000e+00
18427// CHECK:   [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i32
18428// CHECK:   ret i32 [[VCGTZ_I]]
18429uint32_t test_vcgtzs_f32(float32_t a) {
18430  return (uint32_t)vcgtzs_f32(a);
18431}
18432
18433// CHECK-LABEL: define i64 @test_vcgtzd_f64(double %a) #0 {
18434// CHECK:   [[TMP0:%.*]] = fcmp ogt double %a, 0.000000e+00
18435// CHECK:   [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i64
18436// CHECK:   ret i64 [[VCGTZ_I]]
18437uint64_t test_vcgtzd_f64(float64_t a) {
18438  return (uint64_t)vcgtzd_f64(a);
18439}
18440
18441// CHECK-LABEL: define i32 @test_vcles_f32(float %a, float %b) #0 {
18442// CHECK:   [[TMP0:%.*]] = fcmp ole float %a, %b
18443// CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
18444// CHECK:   ret i32 [[VCMPD_I]]
18445uint32_t test_vcles_f32(float32_t a, float32_t b) {
18446  return (uint32_t)vcles_f32(a, b);
18447}
18448
18449// CHECK-LABEL: define i64 @test_vcled_f64(double %a, double %b) #0 {
18450// CHECK:   [[TMP0:%.*]] = fcmp ole double %a, %b
18451// CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
18452// CHECK:   ret i64 [[VCMPD_I]]
18453uint64_t test_vcled_f64(float64_t a, float64_t b) {
18454  return (uint64_t)vcled_f64(a, b);
18455}
18456
18457// CHECK-LABEL: define i32 @test_vclezs_f32(float %a) #0 {
18458// CHECK:   [[TMP0:%.*]] = fcmp ole float %a, 0.000000e+00
18459// CHECK:   [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i32
18460// CHECK:   ret i32 [[VCLEZ_I]]
18461uint32_t test_vclezs_f32(float32_t a) {
18462  return (uint32_t)vclezs_f32(a);
18463}
18464
18465// CHECK-LABEL: define i64 @test_vclezd_f64(double %a) #0 {
18466// CHECK:   [[TMP0:%.*]] = fcmp ole double %a, 0.000000e+00
18467// CHECK:   [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i64
18468// CHECK:   ret i64 [[VCLEZ_I]]
18469uint64_t test_vclezd_f64(float64_t a) {
18470  return (uint64_t)vclezd_f64(a);
18471}
18472
18473// CHECK-LABEL: define i32 @test_vclts_f32(float %a, float %b) #0 {
18474// CHECK:   [[TMP0:%.*]] = fcmp olt float %a, %b
18475// CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
18476// CHECK:   ret i32 [[VCMPD_I]]
18477uint32_t test_vclts_f32(float32_t a, float32_t b) {
18478  return (uint32_t)vclts_f32(a, b);
18479}
18480
18481// CHECK-LABEL: define i64 @test_vcltd_f64(double %a, double %b) #0 {
18482// CHECK:   [[TMP0:%.*]] = fcmp olt double %a, %b
18483// CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
18484// CHECK:   ret i64 [[VCMPD_I]]
18485uint64_t test_vcltd_f64(float64_t a, float64_t b) {
18486  return (uint64_t)vcltd_f64(a, b);
18487}
18488
18489// CHECK-LABEL: define i32 @test_vcltzs_f32(float %a) #0 {
18490// CHECK:   [[TMP0:%.*]] = fcmp olt float %a, 0.000000e+00
18491// CHECK:   [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i32
18492// CHECK:   ret i32 [[VCLTZ_I]]
18493uint32_t test_vcltzs_f32(float32_t a) {
18494  return (uint32_t)vcltzs_f32(a);
18495}
18496
18497// CHECK-LABEL: define i64 @test_vcltzd_f64(double %a) #0 {
18498// CHECK:   [[TMP0:%.*]] = fcmp olt double %a, 0.000000e+00
18499// CHECK:   [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i64
18500// CHECK:   ret i64 [[VCLTZ_I]]
18501uint64_t test_vcltzd_f64(float64_t a) {
18502  return (uint64_t)vcltzd_f64(a);
18503}
18504
18505// CHECK-LABEL: define i32 @test_vcages_f32(float %a, float %b) #0 {
18506// CHECK:   [[VCAGES_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f32(float %a, float %b) #4
18507// CHECK:   ret i32 [[VCAGES_F32_I]]
18508uint32_t test_vcages_f32(float32_t a, float32_t b) {
18509  return (uint32_t)vcages_f32(a, b);
18510}
18511
18512// CHECK-LABEL: define i64 @test_vcaged_f64(double %a, double %b) #0 {
18513// CHECK:   [[VCAGED_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facge.i64.f64(double %a, double %b) #4
18514// CHECK:   ret i64 [[VCAGED_F64_I]]
18515uint64_t test_vcaged_f64(float64_t a, float64_t b) {
18516  return (uint64_t)vcaged_f64(a, b);
18517}
18518
18519// CHECK-LABEL: define i32 @test_vcagts_f32(float %a, float %b) #0 {
18520// CHECK:   [[VCAGTS_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f32(float %a, float %b) #4
18521// CHECK:   ret i32 [[VCAGTS_F32_I]]
18522uint32_t test_vcagts_f32(float32_t a, float32_t b) {
18523  return (uint32_t)vcagts_f32(a, b);
18524}
18525
18526// CHECK-LABEL: define i64 @test_vcagtd_f64(double %a, double %b) #0 {
18527// CHECK:   [[VCAGTD_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facgt.i64.f64(double %a, double %b) #4
18528// CHECK:   ret i64 [[VCAGTD_F64_I]]
18529uint64_t test_vcagtd_f64(float64_t a, float64_t b) {
18530  return (uint64_t)vcagtd_f64(a, b);
18531}
18532
18533// CHECK-LABEL: define i32 @test_vcales_f32(float %a, float %b) #0 {
18534// CHECK:   [[VCALES_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f32(float %b, float %a) #4
18535// CHECK:   ret i32 [[VCALES_F32_I]]
18536uint32_t test_vcales_f32(float32_t a, float32_t b) {
18537  return (uint32_t)vcales_f32(a, b);
18538}
18539
18540// CHECK-LABEL: define i64 @test_vcaled_f64(double %a, double %b) #0 {
18541// CHECK:   [[VCALED_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facge.i64.f64(double %b, double %a) #4
18542// CHECK:   ret i64 [[VCALED_F64_I]]
18543uint64_t test_vcaled_f64(float64_t a, float64_t b) {
18544  return (uint64_t)vcaled_f64(a, b);
18545}
18546
18547// CHECK-LABEL: define i32 @test_vcalts_f32(float %a, float %b) #0 {
18548// CHECK:   [[VCALTS_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f32(float %b, float %a) #4
18549// CHECK:   ret i32 [[VCALTS_F32_I]]
18550uint32_t test_vcalts_f32(float32_t a, float32_t b) {
18551  return (uint32_t)vcalts_f32(a, b);
18552}
18553
18554// CHECK-LABEL: define i64 @test_vcaltd_f64(double %a, double %b) #0 {
18555// CHECK:   [[VCALTD_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facgt.i64.f64(double %b, double %a) #4
18556// CHECK:   ret i64 [[VCALTD_F64_I]]
18557uint64_t test_vcaltd_f64(float64_t a, float64_t b) {
18558  return (uint64_t)vcaltd_f64(a, b);
18559}
18560
18561// CHECK-LABEL: define i64 @test_vshrd_n_s64(i64 %a) #0 {
18562// CHECK:   [[SHRD_N:%.*]] = ashr i64 %a, 1
18563// CHECK:   ret i64 [[SHRD_N]]
18564int64_t test_vshrd_n_s64(int64_t a) {
18565  return (int64_t)vshrd_n_s64(a, 1);
18566}
18567
18568// CHECK-LABEL: define <1 x i64> @test_vshr_n_s64(<1 x i64> %a) #0 {
18569// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18570// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18571// CHECK:   [[VSHR_N:%.*]] = ashr <1 x i64> [[TMP1]], <i64 1>
18572// CHECK:   ret <1 x i64> [[VSHR_N]]
18573int64x1_t test_vshr_n_s64(int64x1_t a) {
18574  return vshr_n_s64(a, 1);
18575}
18576
18577// CHECK-LABEL: define i64 @test_vshrd_n_u64(i64 %a) #0 {
18578// CHECK:   ret i64 0
18579uint64_t test_vshrd_n_u64(uint64_t a) {
18580
18581  return (uint64_t)vshrd_n_u64(a, 64);
18582}
18583
18584// CHECK-LABEL: define i64 @test_vshrd_n_u64_2() #0 {
18585// CHECK:   ret i64 0
18586uint64_t test_vshrd_n_u64_2() {
18587
18588  uint64_t a = UINT64_C(0xf000000000000000);
18589  return vshrd_n_u64(a, 64);
18590}
18591
18592// CHECK-LABEL: define <1 x i64> @test_vshr_n_u64(<1 x i64> %a) #0 {
18593// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18594// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18595// CHECK:   [[VSHR_N:%.*]] = lshr <1 x i64> [[TMP1]], <i64 1>
18596// CHECK:   ret <1 x i64> [[VSHR_N]]
18597uint64x1_t test_vshr_n_u64(uint64x1_t a) {
18598  return vshr_n_u64(a, 1);
18599}
18600
18601// CHECK-LABEL: define i64 @test_vrshrd_n_s64(i64 %a) #0 {
18602// CHECK:   [[VRSHR_N:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %a, i64 -63)
18603// CHECK:   ret i64 [[VRSHR_N]]
18604int64_t test_vrshrd_n_s64(int64_t a) {
18605  return (int64_t)vrshrd_n_s64(a, 63);
18606}
18607
18608// CHECK-LABEL: define <1 x i64> @test_vrshr_n_s64(<1 x i64> %a) #0 {
18609// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18610// CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18611// CHECK:   [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
18612// CHECK:   ret <1 x i64> [[VRSHR_N1]]
18613int64x1_t test_vrshr_n_s64(int64x1_t a) {
18614  return vrshr_n_s64(a, 1);
18615}
18616
18617// CHECK-LABEL: define i64 @test_vrshrd_n_u64(i64 %a) #0 {
18618// CHECK:   [[VRSHR_N:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %a, i64 -63)
18619// CHECK:   ret i64 [[VRSHR_N]]
18620uint64_t test_vrshrd_n_u64(uint64_t a) {
18621  return (uint64_t)vrshrd_n_u64(a, 63);
18622}
18623
18624// CHECK-LABEL: define <1 x i64> @test_vrshr_n_u64(<1 x i64> %a) #0 {
18625// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18626// CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18627// CHECK:   [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
18628// CHECK:   ret <1 x i64> [[VRSHR_N1]]
18629uint64x1_t test_vrshr_n_u64(uint64x1_t a) {
18630  return vrshr_n_u64(a, 1);
18631}
18632
18633// CHECK-LABEL: define i64 @test_vsrad_n_s64(i64 %a, i64 %b) #0 {
18634// CHECK:   [[SHRD_N:%.*]] = ashr i64 %b, 63
18635// CHECK:   [[TMP0:%.*]] = add i64 %a, [[SHRD_N]]
18636// CHECK:   ret i64 [[TMP0]]
18637int64_t test_vsrad_n_s64(int64_t a, int64_t b) {
18638  return (int64_t)vsrad_n_s64(a, b, 63);
18639}
18640
18641// CHECK-LABEL: define <1 x i64> @test_vsra_n_s64(<1 x i64> %a, <1 x i64> %b) #0 {
18642// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18643// CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
18644// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18645// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
18646// CHECK:   [[VSRA_N:%.*]] = ashr <1 x i64> [[TMP3]], <i64 1>
18647// CHECK:   [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]]
18648// CHECK:   ret <1 x i64> [[TMP4]]
18649int64x1_t test_vsra_n_s64(int64x1_t a, int64x1_t b) {
18650  return vsra_n_s64(a, b, 1);
18651}
18652
18653// CHECK-LABEL: define i64 @test_vsrad_n_u64(i64 %a, i64 %b) #0 {
18654// CHECK:   [[SHRD_N:%.*]] = lshr i64 %b, 63
18655// CHECK:   [[TMP0:%.*]] = add i64 %a, [[SHRD_N]]
18656// CHECK:   ret i64 [[TMP0]]
18657uint64_t test_vsrad_n_u64(uint64_t a, uint64_t b) {
18658  return (uint64_t)vsrad_n_u64(a, b, 63);
18659}
18660
18661// CHECK-LABEL: define i64 @test_vsrad_n_u64_2(i64 %a, i64 %b) #0 {
18662// CHECK:   ret i64 %a
18663uint64_t test_vsrad_n_u64_2(uint64_t a, uint64_t b) {
18664
18665  return (uint64_t)vsrad_n_u64(a, b, 64);
18666}
18667
18668// CHECK-LABEL: define <1 x i64> @test_vsra_n_u64(<1 x i64> %a, <1 x i64> %b) #0 {
18669// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18670// CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
18671// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18672// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
18673// CHECK:   [[VSRA_N:%.*]] = lshr <1 x i64> [[TMP3]], <i64 1>
18674// CHECK:   [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]]
18675// CHECK:   ret <1 x i64> [[TMP4]]
18676uint64x1_t test_vsra_n_u64(uint64x1_t a, uint64x1_t b) {
18677  return vsra_n_u64(a, b, 1);
18678}
18679
18680// CHECK-LABEL: define i64 @test_vrsrad_n_s64(i64 %a, i64 %b) #0 {
18681// CHECK:   [[TMP0:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %b, i64 -63)
18682// CHECK:   [[TMP1:%.*]] = add i64 %a, [[TMP0]]
18683// CHECK:   ret i64 [[TMP1]]
18684int64_t test_vrsrad_n_s64(int64_t a, int64_t b) {
18685  return (int64_t)vrsrad_n_s64(a, b, 63);
18686}
18687
18688// CHECK-LABEL: define <1 x i64> @test_vrsra_n_s64(<1 x i64> %a, <1 x i64> %b) #0 {
18689// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18690// CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
18691// CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
18692// CHECK:   [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
18693// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18694// CHECK:   [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]]
18695// CHECK:   ret <1 x i64> [[TMP3]]
18696int64x1_t test_vrsra_n_s64(int64x1_t a, int64x1_t b) {
18697  return vrsra_n_s64(a, b, 1);
18698}
18699
18700// CHECK-LABEL: define i64 @test_vrsrad_n_u64(i64 %a, i64 %b) #0 {
18701// CHECK:   [[TMP0:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %b, i64 -63)
18702// CHECK:   [[TMP1:%.*]] = add i64 %a, [[TMP0]]
18703// CHECK:   ret i64 [[TMP1]]
18704uint64_t test_vrsrad_n_u64(uint64_t a, uint64_t b) {
18705  return (uint64_t)vrsrad_n_u64(a, b, 63);
18706}
18707
18708// CHECK-LABEL: define <1 x i64> @test_vrsra_n_u64(<1 x i64> %a, <1 x i64> %b) #0 {
18709// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18710// CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
18711// CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
18712// CHECK:   [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
18713// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18714// CHECK:   [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]]
18715// CHECK:   ret <1 x i64> [[TMP3]]
18716uint64x1_t test_vrsra_n_u64(uint64x1_t a, uint64x1_t b) {
18717  return vrsra_n_u64(a, b, 1);
18718}
18719
18720// CHECK-LABEL: define i64 @test_vshld_n_s64(i64 %a) #0 {
18721// CHECK:   [[SHLD_N:%.*]] = shl i64 %a, 1
18722// CHECK:   ret i64 [[SHLD_N]]
18723int64_t test_vshld_n_s64(int64_t a) {
18724  return (int64_t)vshld_n_s64(a, 1);
18725}
18726// CHECK-LABEL: define <1 x i64> @test_vshl_n_s64(<1 x i64> %a) #0 {
18727// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18728// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18729// CHECK:   [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], <i64 1>
18730// CHECK:   ret <1 x i64> [[VSHL_N]]
18731int64x1_t test_vshl_n_s64(int64x1_t a) {
18732  return vshl_n_s64(a, 1);
18733}
18734
18735// CHECK-LABEL: define i64 @test_vshld_n_u64(i64 %a) #0 {
18736// CHECK:   [[SHLD_N:%.*]] = shl i64 %a, 63
18737// CHECK:   ret i64 [[SHLD_N]]
18738uint64_t test_vshld_n_u64(uint64_t a) {
18739  return (uint64_t)vshld_n_u64(a, 63);
18740}
18741
18742// CHECK-LABEL: define <1 x i64> @test_vshl_n_u64(<1 x i64> %a) #0 {
18743// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18744// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18745// CHECK:   [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], <i64 1>
18746// CHECK:   ret <1 x i64> [[VSHL_N]]
18747uint64x1_t test_vshl_n_u64(uint64x1_t a) {
18748  return vshl_n_u64(a, 1);
18749}
18750
18751// CHECK-LABEL: define i8 @test_vqshlb_n_s8(i8 %a) #0 {
18752// CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
18753// CHECK:   [[VQSHLB_N_S8:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
18754// CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLB_N_S8]], i64 0
18755// CHECK:   ret i8 [[TMP1]]
18756int8_t test_vqshlb_n_s8(int8_t a) {
18757  return (int8_t)vqshlb_n_s8(a, 7);
18758}
18759
18760// CHECK-LABEL: define i16 @test_vqshlh_n_s16(i16 %a) #0 {
18761// CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
18762// CHECK:   [[VQSHLH_N_S16:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 undef, i16 undef, i16 undef>)
18763// CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLH_N_S16]], i64 0
18764// CHECK:   ret i16 [[TMP1]]
18765int16_t test_vqshlh_n_s16(int16_t a) {
18766  return (int16_t)vqshlh_n_s16(a, 15);
18767}
18768
18769// CHECK-LABEL: define i32 @test_vqshls_n_s32(i32 %a) #0 {
18770// CHECK:   [[VQSHLS_N_S32:%.*]] = call i32 @llvm.aarch64.neon.sqshl.i32(i32 %a, i32 31)
18771// CHECK:   ret i32 [[VQSHLS_N_S32]]
18772int32_t test_vqshls_n_s32(int32_t a) {
18773  return (int32_t)vqshls_n_s32(a, 31);
18774}
18775
18776// CHECK-LABEL: define i64 @test_vqshld_n_s64(i64 %a) #0 {
18777// CHECK:   [[VQSHL_N:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %a, i64 63)
18778// CHECK:   ret i64 [[VQSHL_N]]
18779int64_t test_vqshld_n_s64(int64_t a) {
18780  return (int64_t)vqshld_n_s64(a, 63);
18781}
18782
18783// CHECK-LABEL: define <8 x i8> @test_vqshl_n_s8(<8 x i8> %a) #0 {
18784// CHECK:   [[VQSHL_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer)
18785// CHECK:   ret <8 x i8> [[VQSHL_N]]
18786int8x8_t test_vqshl_n_s8(int8x8_t a) {
18787  return vqshl_n_s8(a, 0);
18788}
18789
18790// CHECK-LABEL: define <16 x i8> @test_vqshlq_n_s8(<16 x i8> %a) #0 {
18791// CHECK:   [[VQSHL_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer)
18792// CHECK:   ret <16 x i8> [[VQSHL_N]]
18793int8x16_t test_vqshlq_n_s8(int8x16_t a) {
18794  return vqshlq_n_s8(a, 0);
18795}
18796
18797// CHECK-LABEL: define <4 x i16> @test_vqshl_n_s16(<4 x i16> %a) #0 {
18798// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
18799// CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
18800// CHECK:   [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> zeroinitializer)
18801// CHECK:   ret <4 x i16> [[VQSHL_N1]]
18802int16x4_t test_vqshl_n_s16(int16x4_t a) {
18803  return vqshl_n_s16(a, 0);
18804}
18805
18806// CHECK-LABEL: define <8 x i16> @test_vqshlq_n_s16(<8 x i16> %a) #0 {
18807// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
18808// CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
18809// CHECK:   [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> zeroinitializer)
18810// CHECK:   ret <8 x i16> [[VQSHL_N1]]
18811int16x8_t test_vqshlq_n_s16(int16x8_t a) {
18812  return vqshlq_n_s16(a, 0);
18813}
18814
18815// CHECK-LABEL: define <2 x i32> @test_vqshl_n_s32(<2 x i32> %a) #0 {
18816// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
18817// CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
18818// CHECK:   [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> zeroinitializer)
18819// CHECK:   ret <2 x i32> [[VQSHL_N1]]
18820int32x2_t test_vqshl_n_s32(int32x2_t a) {
18821  return vqshl_n_s32(a, 0);
18822}
18823
18824// CHECK-LABEL: define <4 x i32> @test_vqshlq_n_s32(<4 x i32> %a) #0 {
18825// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
18826// CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
18827// CHECK:   [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> zeroinitializer)
18828// CHECK:   ret <4 x i32> [[VQSHL_N1]]
18829int32x4_t test_vqshlq_n_s32(int32x4_t a) {
18830  return vqshlq_n_s32(a, 0);
18831}
18832
18833// CHECK-LABEL: define <2 x i64> @test_vqshlq_n_s64(<2 x i64> %a) #0 {
18834// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
18835// CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
18836// CHECK:   [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> zeroinitializer)
18837// CHECK:   ret <2 x i64> [[VQSHL_N1]]
18838int64x2_t test_vqshlq_n_s64(int64x2_t a) {
18839  return vqshlq_n_s64(a, 0);
18840}
18841
18842// CHECK-LABEL: define <8 x i8> @test_vqshl_n_u8(<8 x i8> %a) #0 {
18843// CHECK:   [[VQSHL_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer)
18844// CHECK:   ret <8 x i8> [[VQSHL_N]]
18845uint8x8_t test_vqshl_n_u8(uint8x8_t a) {
18846  return vqshl_n_u8(a, 0);
18847}
18848
18849// CHECK-LABEL: define <16 x i8> @test_vqshlq_n_u8(<16 x i8> %a) #0 {
18850// CHECK:   [[VQSHL_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer)
18851// CHECK:   ret <16 x i8> [[VQSHL_N]]
18852uint8x16_t test_vqshlq_n_u8(uint8x16_t a) {
18853  return vqshlq_n_u8(a, 0);
18854}
18855
18856// CHECK-LABEL: define <4 x i16> @test_vqshl_n_u16(<4 x i16> %a) #0 {
18857// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
18858// CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
18859// CHECK:   [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> zeroinitializer)
18860// CHECK:   ret <4 x i16> [[VQSHL_N1]]
18861uint16x4_t test_vqshl_n_u16(uint16x4_t a) {
18862  return vqshl_n_u16(a, 0);
18863}
18864
18865// CHECK-LABEL: define <8 x i16> @test_vqshlq_n_u16(<8 x i16> %a) #0 {
18866// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
18867// CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
18868// CHECK:   [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> zeroinitializer)
18869// CHECK:   ret <8 x i16> [[VQSHL_N1]]
18870uint16x8_t test_vqshlq_n_u16(uint16x8_t a) {
18871  return vqshlq_n_u16(a, 0);
18872}
18873
18874// CHECK-LABEL: define <2 x i32> @test_vqshl_n_u32(<2 x i32> %a) #0 {
18875// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
18876// CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
18877// CHECK:   [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> zeroinitializer)
18878// CHECK:   ret <2 x i32> [[VQSHL_N1]]
18879uint32x2_t test_vqshl_n_u32(uint32x2_t a) {
18880  return vqshl_n_u32(a, 0);
18881}
18882
18883// CHECK-LABEL: define <4 x i32> @test_vqshlq_n_u32(<4 x i32> %a) #0 {
18884// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
18885// CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
18886// CHECK:   [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> zeroinitializer)
18887// CHECK:   ret <4 x i32> [[VQSHL_N1]]
18888uint32x4_t test_vqshlq_n_u32(uint32x4_t a) {
18889  return vqshlq_n_u32(a, 0);
18890}
18891
18892// CHECK-LABEL: define <2 x i64> @test_vqshlq_n_u64(<2 x i64> %a) #0 {
18893// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
18894// CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
18895// CHECK:   [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> zeroinitializer)
18896// CHECK:   ret <2 x i64> [[VQSHL_N1]]
18897uint64x2_t test_vqshlq_n_u64(uint64x2_t a) {
18898  return vqshlq_n_u64(a, 0);
18899}
18900
18901// CHECK-LABEL: define <1 x i64> @test_vqshl_n_s64(<1 x i64> %a) #0 {
18902// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18903// CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18904// CHECK:   [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> <i64 1>)
18905// CHECK:   ret <1 x i64> [[VQSHL_N1]]
18906int64x1_t test_vqshl_n_s64(int64x1_t a) {
18907  return vqshl_n_s64(a, 1);
18908}
18909
18910// CHECK-LABEL: define i8 @test_vqshlb_n_u8(i8 %a) #0 {
18911// CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
18912// CHECK:   [[VQSHLB_N_U8:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
18913// CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLB_N_U8]], i64 0
18914// CHECK:   ret i8 [[TMP1]]
18915uint8_t test_vqshlb_n_u8(uint8_t a) {
18916  return (uint8_t)vqshlb_n_u8(a, 7);
18917}
18918
18919// CHECK-LABEL: define i16 @test_vqshlh_n_u16(i16 %a) #0 {
18920// CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
18921// CHECK:   [[VQSHLH_N_U16:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 undef, i16 undef, i16 undef>)
18922// CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLH_N_U16]], i64 0
18923// CHECK:   ret i16 [[TMP1]]
18924uint16_t test_vqshlh_n_u16(uint16_t a) {
18925  return (uint16_t)vqshlh_n_u16(a, 15);
18926}
18927
18928// CHECK-LABEL: define i32 @test_vqshls_n_u32(i32 %a) #0 {
18929// CHECK:   [[VQSHLS_N_U32:%.*]] = call i32 @llvm.aarch64.neon.uqshl.i32(i32 %a, i32 31)
18930// CHECK:   ret i32 [[VQSHLS_N_U32]]
18931uint32_t test_vqshls_n_u32(uint32_t a) {
18932  return (uint32_t)vqshls_n_u32(a, 31);
18933}
18934
18935// CHECK-LABEL: define i64 @test_vqshld_n_u64(i64 %a) #0 {
18936// CHECK:   [[VQSHL_N:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %a, i64 63)
18937// CHECK:   ret i64 [[VQSHL_N]]
18938uint64_t test_vqshld_n_u64(uint64_t a) {
18939  return (uint64_t)vqshld_n_u64(a, 63);
18940}
18941
18942// CHECK-LABEL: define <1 x i64> @test_vqshl_n_u64(<1 x i64> %a) #0 {
18943// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18944// CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18945// CHECK:   [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> <i64 1>)
18946// CHECK:   ret <1 x i64> [[VQSHL_N1]]
18947uint64x1_t test_vqshl_n_u64(uint64x1_t a) {
18948  return vqshl_n_u64(a, 1);
18949}
18950
18951// CHECK-LABEL: define i8 @test_vqshlub_n_s8(i8 %a) #0 {
18952// CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0
18953// CHECK:   [[VQSHLUB_N_S8:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
18954// CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLUB_N_S8]], i64 0
18955// CHECK:   ret i8 [[TMP1]]
18956int8_t test_vqshlub_n_s8(int8_t a) {
18957  return (int8_t)vqshlub_n_s8(a, 7);
18958}
18959
18960// CHECK-LABEL: define i16 @test_vqshluh_n_s16(i16 %a) #0 {
18961// CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
18962// CHECK:   [[VQSHLUH_N_S16:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 undef, i16 undef, i16 undef>)
18963// CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLUH_N_S16]], i64 0
18964// CHECK:   ret i16 [[TMP1]]
18965int16_t test_vqshluh_n_s16(int16_t a) {
18966  return (int16_t)vqshluh_n_s16(a, 15);
18967}
18968
18969// CHECK-LABEL: define i32 @test_vqshlus_n_s32(i32 %a) #0 {
18970// CHECK:   [[VQSHLUS_N_S32:%.*]] = call i32 @llvm.aarch64.neon.sqshlu.i32(i32 %a, i32 31)
18971// CHECK:   ret i32 [[VQSHLUS_N_S32]]
18972int32_t test_vqshlus_n_s32(int32_t a) {
18973  return (int32_t)vqshlus_n_s32(a, 31);
18974}
18975
18976// CHECK-LABEL: define i64 @test_vqshlud_n_s64(i64 %a) #0 {
18977// CHECK:   [[VQSHLU_N:%.*]] = call i64 @llvm.aarch64.neon.sqshlu.i64(i64 %a, i64 63)
18978// CHECK:   ret i64 [[VQSHLU_N]]
18979int64_t test_vqshlud_n_s64(int64_t a) {
18980  return (int64_t)vqshlud_n_s64(a, 63);
18981}
18982
18983// CHECK-LABEL: define <1 x i64> @test_vqshlu_n_s64(<1 x i64> %a) #0 {
18984// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
18985// CHECK:   [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
18986// CHECK:   [[VQSHLU_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> [[VQSHLU_N]], <1 x i64> <i64 1>)
18987// CHECK:   ret <1 x i64> [[VQSHLU_N1]]
18988uint64x1_t test_vqshlu_n_s64(int64x1_t a) {
18989  return vqshlu_n_s64(a, 1);
18990}
18991
18992// CHECK-LABEL: define i64 @test_vsrid_n_s64(i64 %a, i64 %b) #0 {
18993// CHECK:   [[VSRID_N_S64:%.*]] = bitcast i64 %a to <1 x i64>
18994// CHECK:   [[VSRID_N_S641:%.*]] = bitcast i64 %b to <1 x i64>
18995// CHECK:   [[VSRID_N_S642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRID_N_S64]], <1 x i64> [[VSRID_N_S641]], i32 63)
18996// CHECK:   [[VSRID_N_S643:%.*]] = bitcast <1 x i64> [[VSRID_N_S642]] to i64
18997// CHECK:   ret i64 [[VSRID_N_S643]]
18998int64_t test_vsrid_n_s64(int64_t a, int64_t b) {
18999  return (int64_t)vsrid_n_s64(a, b, 63);
19000}
19001
19002// CHECK-LABEL: define <1 x i64> @test_vsri_n_s64(<1 x i64> %a, <1 x i64> %b) #0 {
19003// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
19004// CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
19005// CHECK:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
19006// CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
19007// CHECK:   [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 1)
19008// CHECK:   ret <1 x i64> [[VSRI_N2]]
19009int64x1_t test_vsri_n_s64(int64x1_t a, int64x1_t b) {
19010  return vsri_n_s64(a, b, 1);
19011}
19012
19013// CHECK-LABEL: define i64 @test_vsrid_n_u64(i64 %a, i64 %b) #0 {
19014// CHECK:   [[VSRID_N_U64:%.*]] = bitcast i64 %a to <1 x i64>
19015// CHECK:   [[VSRID_N_U641:%.*]] = bitcast i64 %b to <1 x i64>
19016// CHECK:   [[VSRID_N_U642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRID_N_U64]], <1 x i64> [[VSRID_N_U641]], i32 63)
19017// CHECK:   [[VSRID_N_U643:%.*]] = bitcast <1 x i64> [[VSRID_N_U642]] to i64
19018// CHECK:   ret i64 [[VSRID_N_U643]]
19019uint64_t test_vsrid_n_u64(uint64_t a, uint64_t b) {
19020  return (uint64_t)vsrid_n_u64(a, b, 63);
19021}
19022
19023// CHECK-LABEL: define <1 x i64> @test_vsri_n_u64(<1 x i64> %a, <1 x i64> %b) #0 {
19024// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
19025// CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
19026// CHECK:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
19027// CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
19028// CHECK:   [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 1)
19029// CHECK:   ret <1 x i64> [[VSRI_N2]]
19030uint64x1_t test_vsri_n_u64(uint64x1_t a, uint64x1_t b) {
19031  return vsri_n_u64(a, b, 1);
19032}
19033
19034// CHECK-LABEL: define i64 @test_vslid_n_s64(i64 %a, i64 %b) #0 {
19035// CHECK:   [[VSLID_N_S64:%.*]] = bitcast i64 %a to <1 x i64>
19036// CHECK:   [[VSLID_N_S641:%.*]] = bitcast i64 %b to <1 x i64>
19037// CHECK:   [[VSLID_N_S642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLID_N_S64]], <1 x i64> [[VSLID_N_S641]], i32 63)
19038// CHECK:   [[VSLID_N_S643:%.*]] = bitcast <1 x i64> [[VSLID_N_S642]] to i64
19039// CHECK:   ret i64 [[VSLID_N_S643]]
19040int64_t test_vslid_n_s64(int64_t a, int64_t b) {
19041  return (int64_t)vslid_n_s64(a, b, 63);
19042}
19043
19044// CHECK-LABEL: define <1 x i64> @test_vsli_n_s64(<1 x i64> %a, <1 x i64> %b) #0 {
19045// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
19046// CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
19047// CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
19048// CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
19049// CHECK:   [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 1)
19050// CHECK:   ret <1 x i64> [[VSLI_N2]]
19051int64x1_t test_vsli_n_s64(int64x1_t a, int64x1_t b) {
19052  return vsli_n_s64(a, b, 1);
19053}
19054
19055// CHECK-LABEL: define i64 @test_vslid_n_u64(i64 %a, i64 %b) #0 {
19056// CHECK:   [[VSLID_N_U64:%.*]] = bitcast i64 %a to <1 x i64>
19057// CHECK:   [[VSLID_N_U641:%.*]] = bitcast i64 %b to <1 x i64>
19058// CHECK:   [[VSLID_N_U642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLID_N_U64]], <1 x i64> [[VSLID_N_U641]], i32 63)
19059// CHECK:   [[VSLID_N_U643:%.*]] = bitcast <1 x i64> [[VSLID_N_U642]] to i64
19060// CHECK:   ret i64 [[VSLID_N_U643]]
19061uint64_t test_vslid_n_u64(uint64_t a, uint64_t b) {
19062  return (uint64_t)vslid_n_u64(a, b, 63);
19063}
19064
19065// CHECK-LABEL: define <1 x i64> @test_vsli_n_u64(<1 x i64> %a, <1 x i64> %b) #0 {
19066// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
19067// CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
19068// CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
19069// CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
19070// CHECK:   [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 1)
19071// CHECK:   ret <1 x i64> [[VSLI_N2]]
19072uint64x1_t test_vsli_n_u64(uint64x1_t a, uint64x1_t b) {
19073  return vsli_n_u64(a, b, 1);
19074}
19075
19076// CHECK-LABEL: define i8 @test_vqshrnh_n_s16(i16 %a) #0 {
19077// CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
19078// CHECK:   [[VQSHRNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
19079// CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRNH_N_S16]], i64 0
19080// CHECK:   ret i8 [[TMP1]]
19081int8_t test_vqshrnh_n_s16(int16_t a) {
19082  return (int8_t)vqshrnh_n_s16(a, 8);
19083}
19084
19085// CHECK-LABEL: define i16 @test_vqshrns_n_s32(i32 %a) #0 {
19086// CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
19087// CHECK:   [[VQSHRNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
19088// CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRNS_N_S32]], i64 0
19089// CHECK:   ret i16 [[TMP1]]
19090int16_t test_vqshrns_n_s32(int32_t a) {
19091  return (int16_t)vqshrns_n_s32(a, 16);
19092}
19093
19094// CHECK-LABEL: define i32 @test_vqshrnd_n_s64(i64 %a) #0 {
19095// CHECK:   [[VQSHRND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqshrn.i32(i64 %a, i32 32)
19096// CHECK:   ret i32 [[VQSHRND_N_S64]]
19097int32_t test_vqshrnd_n_s64(int64_t a) {
19098  return (int32_t)vqshrnd_n_s64(a, 32);
19099}
19100
19101// CHECK-LABEL: define i8 @test_vqshrnh_n_u16(i16 %a) #0 {
19102// CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
19103// CHECK:   [[VQSHRNH_N_U16:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
19104// CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRNH_N_U16]], i64 0
19105// CHECK:   ret i8 [[TMP1]]
19106uint8_t test_vqshrnh_n_u16(uint16_t a) {
19107  return (uint8_t)vqshrnh_n_u16(a, 8);
19108}
19109
19110// CHECK-LABEL: define i16 @test_vqshrns_n_u32(i32 %a) #0 {
19111// CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
19112// CHECK:   [[VQSHRNS_N_U32:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
19113// CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRNS_N_U32]], i64 0
19114// CHECK:   ret i16 [[TMP1]]
19115uint16_t test_vqshrns_n_u32(uint32_t a) {
19116  return (uint16_t)vqshrns_n_u32(a, 16);
19117}
19118
19119// CHECK-LABEL: define i32 @test_vqshrnd_n_u64(i64 %a) #0 {
19120// CHECK:   [[VQSHRND_N_U64:%.*]] = call i32 @llvm.aarch64.neon.uqshrn.i32(i64 %a, i32 32)
19121// CHECK:   ret i32 [[VQSHRND_N_U64]]
19122uint32_t test_vqshrnd_n_u64(uint64_t a) {
19123  return (uint32_t)vqshrnd_n_u64(a, 32);
19124}
19125
19126// CHECK-LABEL: define i8 @test_vqrshrnh_n_s16(i16 %a) #0 {
19127// CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
19128// CHECK:   [[VQRSHRNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
19129// CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRNH_N_S16]], i64 0
19130// CHECK:   ret i8 [[TMP1]]
19131int8_t test_vqrshrnh_n_s16(int16_t a) {
19132  return (int8_t)vqrshrnh_n_s16(a, 8);
19133}
19134
19135// CHECK-LABEL: define i16 @test_vqrshrns_n_s32(i32 %a) #0 {
19136// CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
19137// CHECK:   [[VQRSHRNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
19138// CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRNS_N_S32]], i64 0
19139// CHECK:   ret i16 [[TMP1]]
19140int16_t test_vqrshrns_n_s32(int32_t a) {
19141  return (int16_t)vqrshrns_n_s32(a, 16);
19142}
19143
19144// CHECK-LABEL: define i32 @test_vqrshrnd_n_s64(i64 %a) #0 {
19145// CHECK:   [[VQRSHRND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 %a, i32 32)
19146// CHECK:   ret i32 [[VQRSHRND_N_S64]]
19147int32_t test_vqrshrnd_n_s64(int64_t a) {
19148  return (int32_t)vqrshrnd_n_s64(a, 32);
19149}
19150
19151// CHECK-LABEL: define i8 @test_vqrshrnh_n_u16(i16 %a) #0 {
19152// CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
19153// CHECK:   [[VQRSHRNH_N_U16:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
19154// CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRNH_N_U16]], i64 0
19155// CHECK:   ret i8 [[TMP1]]
19156uint8_t test_vqrshrnh_n_u16(uint16_t a) {
19157  return (uint8_t)vqrshrnh_n_u16(a, 8);
19158}
19159
19160// CHECK-LABEL: define i16 @test_vqrshrns_n_u32(i32 %a) #0 {
19161// CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
19162// CHECK:   [[VQRSHRNS_N_U32:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
19163// CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRNS_N_U32]], i64 0
19164// CHECK:   ret i16 [[TMP1]]
19165uint16_t test_vqrshrns_n_u32(uint32_t a) {
19166  return (uint16_t)vqrshrns_n_u32(a, 16);
19167}
19168
19169// CHECK-LABEL: define i32 @test_vqrshrnd_n_u64(i64 %a) #0 {
19170// CHECK:   [[VQRSHRND_N_U64:%.*]] = call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 %a, i32 32)
19171// CHECK:   ret i32 [[VQRSHRND_N_U64]]
19172uint32_t test_vqrshrnd_n_u64(uint64_t a) {
19173  return (uint32_t)vqrshrnd_n_u64(a, 32);
19174}
19175
19176// CHECK-LABEL: define i8 @test_vqshrunh_n_s16(i16 %a) #0 {
19177// CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
19178// CHECK:   [[VQSHRUNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[TMP0]], i32 8)
19179// CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRUNH_N_S16]], i64 0
19180// CHECK:   ret i8 [[TMP1]]
19181int8_t test_vqshrunh_n_s16(int16_t a) {
19182  return (int8_t)vqshrunh_n_s16(a, 8);
19183}
19184
19185// CHECK-LABEL: define i16 @test_vqshruns_n_s32(i32 %a) #0 {
19186// CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
19187// CHECK:   [[VQSHRUNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[TMP0]], i32 16)
19188// CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRUNS_N_S32]], i64 0
19189// CHECK:   ret i16 [[TMP1]]
19190int16_t test_vqshruns_n_s32(int32_t a) {
19191  return (int16_t)vqshruns_n_s32(a, 16);
19192}
19193
19194// CHECK-LABEL: define i32 @test_vqshrund_n_s64(i64 %a) #0 {
19195// CHECK:   [[VQSHRUND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqshrun.i32(i64 %a, i32 32)
19196// CHECK:   ret i32 [[VQSHRUND_N_S64]]
19197int32_t test_vqshrund_n_s64(int64_t a) {
19198  return (int32_t)vqshrund_n_s64(a, 32);
19199}
19200
19201// CHECK-LABEL: define i8 @test_vqrshrunh_n_s16(i16 %a) #0 {
19202// CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0
19203// CHECK:   [[VQRSHRUNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[TMP0]], i32 8)
19204// CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRUNH_N_S16]], i64 0
19205// CHECK:   ret i8 [[TMP1]]
19206int8_t test_vqrshrunh_n_s16(int16_t a) {
19207  return (int8_t)vqrshrunh_n_s16(a, 8);
19208}
19209
19210// CHECK-LABEL: define i16 @test_vqrshruns_n_s32(i32 %a) #0 {
19211// CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0
19212// CHECK:   [[VQRSHRUNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[TMP0]], i32 16)
19213// CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRUNS_N_S32]], i64 0
19214// CHECK:   ret i16 [[TMP1]]
19215int16_t test_vqrshruns_n_s32(int32_t a) {
19216  return (int16_t)vqrshruns_n_s32(a, 16);
19217}
19218
19219// CHECK-LABEL: define i32 @test_vqrshrund_n_s64(i64 %a) #0 {
19220// CHECK:   [[VQRSHRUND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 %a, i32 32)
19221// CHECK:   ret i32 [[VQRSHRUND_N_S64]]
19222int32_t test_vqrshrund_n_s64(int64_t a) {
19223  return (int32_t)vqrshrund_n_s64(a, 32);
19224}
19225
19226// CHECK-LABEL: define float @test_vcvts_n_f32_s32(i32 %a) #0 {
19227// CHECK:   [[VCVTS_N_F32_S32:%.*]] = call float @llvm.aarch64.neon.vcvtfxs2fp.f32.i32(i32 %a, i32 1)
19228// CHECK:   ret float [[VCVTS_N_F32_S32]]
19229float32_t test_vcvts_n_f32_s32(int32_t a) {
19230  return vcvts_n_f32_s32(a, 1);
19231}
19232
19233// CHECK-LABEL: define double @test_vcvtd_n_f64_s64(i64 %a) #0 {
19234// CHECK:   [[VCVTD_N_F64_S64:%.*]] = call double @llvm.aarch64.neon.vcvtfxs2fp.f64.i64(i64 %a, i32 1)
19235// CHECK:   ret double [[VCVTD_N_F64_S64]]
19236float64_t test_vcvtd_n_f64_s64(int64_t a) {
19237  return vcvtd_n_f64_s64(a, 1);
19238}
19239
19240// CHECK-LABEL: define float @test_vcvts_n_f32_u32(i32 %a) #0 {
19241// CHECK:   [[VCVTS_N_F32_U32:%.*]] = call float @llvm.aarch64.neon.vcvtfxu2fp.f32.i32(i32 %a, i32 32)
19242// CHECK:   ret float [[VCVTS_N_F32_U32]]
19243float32_t test_vcvts_n_f32_u32(uint32_t a) {
19244  return vcvts_n_f32_u32(a, 32);
19245}
19246
19247// CHECK-LABEL: define double @test_vcvtd_n_f64_u64(i64 %a) #0 {
19248// CHECK:   [[VCVTD_N_F64_U64:%.*]] = call double @llvm.aarch64.neon.vcvtfxu2fp.f64.i64(i64 %a, i32 64)
19249// CHECK:   ret double [[VCVTD_N_F64_U64]]
19250float64_t test_vcvtd_n_f64_u64(uint64_t a) {
19251  return vcvtd_n_f64_u64(a, 64);
19252}
19253
19254// CHECK-LABEL: define i32 @test_vcvts_n_s32_f32(float %a) #0 {
19255// CHECK:   [[VCVTS_N_S32_F32:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f32(float %a, i32 1)
19256// CHECK:   ret i32 [[VCVTS_N_S32_F32]]
19257int32_t test_vcvts_n_s32_f32(float32_t a) {
19258  return (int32_t)vcvts_n_s32_f32(a, 1);
19259}
19260
19261// CHECK-LABEL: define i64 @test_vcvtd_n_s64_f64(double %a) #0 {
19262// CHECK:   [[VCVTD_N_S64_F64:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxs.i64.f64(double %a, i32 1)
19263// CHECK:   ret i64 [[VCVTD_N_S64_F64]]
19264int64_t test_vcvtd_n_s64_f64(float64_t a) {
19265  return (int64_t)vcvtd_n_s64_f64(a, 1);
19266}
19267
19268// CHECK-LABEL: define i32 @test_vcvts_n_u32_f32(float %a) #0 {
19269// CHECK:   [[VCVTS_N_U32_F32:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f32(float %a, i32 32)
19270// CHECK:   ret i32 [[VCVTS_N_U32_F32]]
19271uint32_t test_vcvts_n_u32_f32(float32_t a) {
19272  return (uint32_t)vcvts_n_u32_f32(a, 32);
19273}
19274
19275// CHECK-LABEL: define i64 @test_vcvtd_n_u64_f64(double %a) #0 {
19276// CHECK:   [[VCVTD_N_U64_F64:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxu.i64.f64(double %a, i32 64)
19277// CHECK:   ret i64 [[VCVTD_N_U64_F64]]
19278uint64_t test_vcvtd_n_u64_f64(float64_t a) {
19279  return (uint64_t)vcvtd_n_u64_f64(a, 64);
19280}
19281
19282// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_s16(<4 x i16> %a) #0 {
19283// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
19284// CHECK:   ret <8 x i8> [[TMP0]]
19285int8x8_t test_vreinterpret_s8_s16(int16x4_t a) {
19286  return vreinterpret_s8_s16(a);
19287}
19288
19289// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_s32(<2 x i32> %a) #0 {
19290// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
19291// CHECK:   ret <8 x i8> [[TMP0]]
19292int8x8_t test_vreinterpret_s8_s32(int32x2_t a) {
19293  return vreinterpret_s8_s32(a);
19294}
19295
19296// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_s64(<1 x i64> %a) #0 {
19297// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
19298// CHECK:   ret <8 x i8> [[TMP0]]
19299int8x8_t test_vreinterpret_s8_s64(int64x1_t a) {
19300  return vreinterpret_s8_s64(a);
19301}
19302
19303// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u8(<8 x i8> %a) #0 {
19304// CHECK:   ret <8 x i8> %a
19305int8x8_t test_vreinterpret_s8_u8(uint8x8_t a) {
19306  return vreinterpret_s8_u8(a);
19307}
19308
19309// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u16(<4 x i16> %a) #0 {
19310// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
19311// CHECK:   ret <8 x i8> [[TMP0]]
19312int8x8_t test_vreinterpret_s8_u16(uint16x4_t a) {
19313  return vreinterpret_s8_u16(a);
19314}
19315
19316// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u32(<2 x i32> %a) #0 {
19317// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
19318// CHECK:   ret <8 x i8> [[TMP0]]
19319int8x8_t test_vreinterpret_s8_u32(uint32x2_t a) {
19320  return vreinterpret_s8_u32(a);
19321}
19322
19323// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u64(<1 x i64> %a) #0 {
19324// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
19325// CHECK:   ret <8 x i8> [[TMP0]]
19326int8x8_t test_vreinterpret_s8_u64(uint64x1_t a) {
19327  return vreinterpret_s8_u64(a);
19328}
19329
19330// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_f16(<4 x half> %a) #0 {
19331// CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
19332// CHECK:   ret <8 x i8> [[TMP0]]
19333int8x8_t test_vreinterpret_s8_f16(float16x4_t a) {
19334  return vreinterpret_s8_f16(a);
19335}
19336
19337// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_f32(<2 x float> %a) #0 {
19338// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
19339// CHECK:   ret <8 x i8> [[TMP0]]
19340int8x8_t test_vreinterpret_s8_f32(float32x2_t a) {
19341  return vreinterpret_s8_f32(a);
19342}
19343
19344// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_f64(<1 x double> %a) #0 {
19345// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
19346// CHECK:   ret <8 x i8> [[TMP0]]
19347int8x8_t test_vreinterpret_s8_f64(float64x1_t a) {
19348  return vreinterpret_s8_f64(a);
19349}
19350
19351// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_p8(<8 x i8> %a) #0 {
19352// CHECK:   ret <8 x i8> %a
19353int8x8_t test_vreinterpret_s8_p8(poly8x8_t a) {
19354  return vreinterpret_s8_p8(a);
19355}
19356
19357// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_p16(<4 x i16> %a) #0 {
19358// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
19359// CHECK:   ret <8 x i8> [[TMP0]]
19360int8x8_t test_vreinterpret_s8_p16(poly16x4_t a) {
19361  return vreinterpret_s8_p16(a);
19362}
19363
19364// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_p64(<1 x i64> %a) #0 {
19365// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
19366// CHECK:   ret <8 x i8> [[TMP0]]
19367int8x8_t test_vreinterpret_s8_p64(poly64x1_t a) {
19368  return vreinterpret_s8_p64(a);
19369}
19370
19371// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_s8(<8 x i8> %a) #0 {
19372// CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
19373// CHECK:   ret <4 x i16> [[TMP0]]
19374int16x4_t test_vreinterpret_s16_s8(int8x8_t a) {
19375  return vreinterpret_s16_s8(a);
19376}
19377
19378// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_s32(<2 x i32> %a) #0 {
19379// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
19380// CHECK:   ret <4 x i16> [[TMP0]]
19381int16x4_t test_vreinterpret_s16_s32(int32x2_t a) {
19382  return vreinterpret_s16_s32(a);
19383}
19384
19385// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_s64(<1 x i64> %a) #0 {
19386// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
19387// CHECK:   ret <4 x i16> [[TMP0]]
19388int16x4_t test_vreinterpret_s16_s64(int64x1_t a) {
19389  return vreinterpret_s16_s64(a);
19390}
19391
19392// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u8(<8 x i8> %a) #0 {
19393// CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
19394// CHECK:   ret <4 x i16> [[TMP0]]
19395int16x4_t test_vreinterpret_s16_u8(uint8x8_t a) {
19396  return vreinterpret_s16_u8(a);
19397}
19398
19399// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u16(<4 x i16> %a) #0 {
19400// CHECK:   ret <4 x i16> %a
19401int16x4_t test_vreinterpret_s16_u16(uint16x4_t a) {
19402  return vreinterpret_s16_u16(a);
19403}
19404
19405// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u32(<2 x i32> %a) #0 {
19406// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
19407// CHECK:   ret <4 x i16> [[TMP0]]
19408int16x4_t test_vreinterpret_s16_u32(uint32x2_t a) {
19409  return vreinterpret_s16_u32(a);
19410}
19411
19412// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u64(<1 x i64> %a) #0 {
19413// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
19414// CHECK:   ret <4 x i16> [[TMP0]]
19415int16x4_t test_vreinterpret_s16_u64(uint64x1_t a) {
19416  return vreinterpret_s16_u64(a);
19417}
19418
19419// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_f16(<4 x half> %a) #0 {
19420// CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
19421// CHECK:   ret <4 x i16> [[TMP0]]
19422int16x4_t test_vreinterpret_s16_f16(float16x4_t a) {
19423  return vreinterpret_s16_f16(a);
19424}
19425
19426// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_f32(<2 x float> %a) #0 {
19427// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
19428// CHECK:   ret <4 x i16> [[TMP0]]
19429int16x4_t test_vreinterpret_s16_f32(float32x2_t a) {
19430  return vreinterpret_s16_f32(a);
19431}
19432
19433// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_f64(<1 x double> %a) #0 {
19434// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16>
19435// CHECK:   ret <4 x i16> [[TMP0]]
19436int16x4_t test_vreinterpret_s16_f64(float64x1_t a) {
19437  return vreinterpret_s16_f64(a);
19438}
19439
19440// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_p8(<8 x i8> %a) #0 {
19441// CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
19442// CHECK:   ret <4 x i16> [[TMP0]]
19443int16x4_t test_vreinterpret_s16_p8(poly8x8_t a) {
19444  return vreinterpret_s16_p8(a);
19445}
19446
19447// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_p16(<4 x i16> %a) #0 {
19448// CHECK:   ret <4 x i16> %a
19449int16x4_t test_vreinterpret_s16_p16(poly16x4_t a) {
19450  return vreinterpret_s16_p16(a);
19451}
19452
19453// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_p64(<1 x i64> %a) #0 {
19454// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
19455// CHECK:   ret <4 x i16> [[TMP0]]
19456int16x4_t test_vreinterpret_s16_p64(poly64x1_t a) {
19457  return vreinterpret_s16_p64(a);
19458}
19459
19460// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_s8(<8 x i8> %a) #0 {
19461// CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
19462// CHECK:   ret <2 x i32> [[TMP0]]
19463int32x2_t test_vreinterpret_s32_s8(int8x8_t a) {
19464  return vreinterpret_s32_s8(a);
19465}
19466
19467// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_s16(<4 x i16> %a) #0 {
19468// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
19469// CHECK:   ret <2 x i32> [[TMP0]]
19470int32x2_t test_vreinterpret_s32_s16(int16x4_t a) {
19471  return vreinterpret_s32_s16(a);
19472}
19473
19474// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_s64(<1 x i64> %a) #0 {
19475// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
19476// CHECK:   ret <2 x i32> [[TMP0]]
19477int32x2_t test_vreinterpret_s32_s64(int64x1_t a) {
19478  return vreinterpret_s32_s64(a);
19479}
19480
19481// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u8(<8 x i8> %a) #0 {
19482// CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
19483// CHECK:   ret <2 x i32> [[TMP0]]
19484int32x2_t test_vreinterpret_s32_u8(uint8x8_t a) {
19485  return vreinterpret_s32_u8(a);
19486}
19487
19488// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u16(<4 x i16> %a) #0 {
19489// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
19490// CHECK:   ret <2 x i32> [[TMP0]]
19491int32x2_t test_vreinterpret_s32_u16(uint16x4_t a) {
19492  return vreinterpret_s32_u16(a);
19493}
19494
19495// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u32(<2 x i32> %a) #0 {
19496// CHECK:   ret <2 x i32> %a
19497int32x2_t test_vreinterpret_s32_u32(uint32x2_t a) {
19498  return vreinterpret_s32_u32(a);
19499}
19500
19501// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u64(<1 x i64> %a) #0 {
19502// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
19503// CHECK:   ret <2 x i32> [[TMP0]]
19504int32x2_t test_vreinterpret_s32_u64(uint64x1_t a) {
19505  return vreinterpret_s32_u64(a);
19506}
19507
19508// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_f16(<4 x half> %a) #0 {
19509// CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32>
19510// CHECK:   ret <2 x i32> [[TMP0]]
19511int32x2_t test_vreinterpret_s32_f16(float16x4_t a) {
19512  return vreinterpret_s32_f16(a);
19513}
19514
19515// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_f32(<2 x float> %a) #0 {
19516// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32>
19517// CHECK:   ret <2 x i32> [[TMP0]]
19518int32x2_t test_vreinterpret_s32_f32(float32x2_t a) {
19519  return vreinterpret_s32_f32(a);
19520}
19521
19522// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_f64(<1 x double> %a) #0 {
19523// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x i32>
19524// CHECK:   ret <2 x i32> [[TMP0]]
19525int32x2_t test_vreinterpret_s32_f64(float64x1_t a) {
19526  return vreinterpret_s32_f64(a);
19527}
19528
19529// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_p8(<8 x i8> %a) #0 {
19530// CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
19531// CHECK:   ret <2 x i32> [[TMP0]]
19532int32x2_t test_vreinterpret_s32_p8(poly8x8_t a) {
19533  return vreinterpret_s32_p8(a);
19534}
19535
19536// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_p16(<4 x i16> %a) #0 {
19537// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
19538// CHECK:   ret <2 x i32> [[TMP0]]
19539int32x2_t test_vreinterpret_s32_p16(poly16x4_t a) {
19540  return vreinterpret_s32_p16(a);
19541}
19542
19543// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_p64(<1 x i64> %a) #0 {
19544// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
19545// CHECK:   ret <2 x i32> [[TMP0]]
19546int32x2_t test_vreinterpret_s32_p64(poly64x1_t a) {
19547  return vreinterpret_s32_p64(a);
19548}
19549
19550// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_s8(<8 x i8> %a) #0 {
19551// CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
19552// CHECK:   ret <1 x i64> [[TMP0]]
19553int64x1_t test_vreinterpret_s64_s8(int8x8_t a) {
19554  return vreinterpret_s64_s8(a);
19555}
19556
19557// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_s16(<4 x i16> %a) #0 {
19558// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
19559// CHECK:   ret <1 x i64> [[TMP0]]
19560int64x1_t test_vreinterpret_s64_s16(int16x4_t a) {
19561  return vreinterpret_s64_s16(a);
19562}
19563
19564// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_s32(<2 x i32> %a) #0 {
19565// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
19566// CHECK:   ret <1 x i64> [[TMP0]]
19567int64x1_t test_vreinterpret_s64_s32(int32x2_t a) {
19568  return vreinterpret_s64_s32(a);
19569}
19570
19571// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u8(<8 x i8> %a) #0 {
19572// CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
19573// CHECK:   ret <1 x i64> [[TMP0]]
19574int64x1_t test_vreinterpret_s64_u8(uint8x8_t a) {
19575  return vreinterpret_s64_u8(a);
19576}
19577
19578// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u16(<4 x i16> %a) #0 {
19579// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
19580// CHECK:   ret <1 x i64> [[TMP0]]
19581int64x1_t test_vreinterpret_s64_u16(uint16x4_t a) {
19582  return vreinterpret_s64_u16(a);
19583}
19584
19585// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u32(<2 x i32> %a) #0 {
19586// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
19587// CHECK:   ret <1 x i64> [[TMP0]]
19588int64x1_t test_vreinterpret_s64_u32(uint32x2_t a) {
19589  return vreinterpret_s64_u32(a);
19590}
19591
19592// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u64(<1 x i64> %a) #0 {
19593// CHECK:   ret <1 x i64> %a
19594int64x1_t test_vreinterpret_s64_u64(uint64x1_t a) {
19595  return vreinterpret_s64_u64(a);
19596}
19597
19598// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_f16(<4 x half> %a) #0 {
19599// CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
19600// CHECK:   ret <1 x i64> [[TMP0]]
19601int64x1_t test_vreinterpret_s64_f16(float16x4_t a) {
19602  return vreinterpret_s64_f16(a);
19603}
19604
19605// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_f32(<2 x float> %a) #0 {
19606// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
19607// CHECK:   ret <1 x i64> [[TMP0]]
19608int64x1_t test_vreinterpret_s64_f32(float32x2_t a) {
19609  return vreinterpret_s64_f32(a);
19610}
19611
19612// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_f64(<1 x double> %a) #0 {
19613// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64>
19614// CHECK:   ret <1 x i64> [[TMP0]]
19615int64x1_t test_vreinterpret_s64_f64(float64x1_t a) {
19616  return vreinterpret_s64_f64(a);
19617}
19618
19619// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_p8(<8 x i8> %a) #0 {
19620// CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
19621// CHECK:   ret <1 x i64> [[TMP0]]
19622int64x1_t test_vreinterpret_s64_p8(poly8x8_t a) {
19623  return vreinterpret_s64_p8(a);
19624}
19625
19626// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_p16(<4 x i16> %a) #0 {
19627// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
19628// CHECK:   ret <1 x i64> [[TMP0]]
19629int64x1_t test_vreinterpret_s64_p16(poly16x4_t a) {
19630  return vreinterpret_s64_p16(a);
19631}
19632
19633// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_p64(<1 x i64> %a) #0 {
19634// CHECK:   ret <1 x i64> %a
19635int64x1_t test_vreinterpret_s64_p64(poly64x1_t a) {
19636  return vreinterpret_s64_p64(a);
19637}
19638
19639// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s8(<8 x i8> %a) #0 {
19640// CHECK:   ret <8 x i8> %a
19641uint8x8_t test_vreinterpret_u8_s8(int8x8_t a) {
19642  return vreinterpret_u8_s8(a);
19643}
19644
19645// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s16(<4 x i16> %a) #0 {
19646// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
19647// CHECK:   ret <8 x i8> [[TMP0]]
19648uint8x8_t test_vreinterpret_u8_s16(int16x4_t a) {
19649  return vreinterpret_u8_s16(a);
19650}
19651
19652// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s32(<2 x i32> %a) #0 {
19653// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
19654// CHECK:   ret <8 x i8> [[TMP0]]
19655uint8x8_t test_vreinterpret_u8_s32(int32x2_t a) {
19656  return vreinterpret_u8_s32(a);
19657}
19658
19659// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s64(<1 x i64> %a) #0 {
19660// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
19661// CHECK:   ret <8 x i8> [[TMP0]]
19662uint8x8_t test_vreinterpret_u8_s64(int64x1_t a) {
19663  return vreinterpret_u8_s64(a);
19664}
19665
19666// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_u16(<4 x i16> %a) #0 {
19667// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
19668// CHECK:   ret <8 x i8> [[TMP0]]
19669uint8x8_t test_vreinterpret_u8_u16(uint16x4_t a) {
19670  return vreinterpret_u8_u16(a);
19671}
19672
19673// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_u32(<2 x i32> %a) #0 {
19674// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
19675// CHECK:   ret <8 x i8> [[TMP0]]
19676uint8x8_t test_vreinterpret_u8_u32(uint32x2_t a) {
19677  return vreinterpret_u8_u32(a);
19678}
19679
19680// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_u64(<1 x i64> %a) #0 {
19681// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
19682// CHECK:   ret <8 x i8> [[TMP0]]
19683uint8x8_t test_vreinterpret_u8_u64(uint64x1_t a) {
19684  return vreinterpret_u8_u64(a);
19685}
19686
19687// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_f16(<4 x half> %a) #0 {
19688// CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
19689// CHECK:   ret <8 x i8> [[TMP0]]
19690uint8x8_t test_vreinterpret_u8_f16(float16x4_t a) {
19691  return vreinterpret_u8_f16(a);
19692}
19693
19694// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_f32(<2 x float> %a) #0 {
19695// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
19696// CHECK:   ret <8 x i8> [[TMP0]]
19697uint8x8_t test_vreinterpret_u8_f32(float32x2_t a) {
19698  return vreinterpret_u8_f32(a);
19699}
19700
19701// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_f64(<1 x double> %a) #0 {
19702// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
19703// CHECK:   ret <8 x i8> [[TMP0]]
19704uint8x8_t test_vreinterpret_u8_f64(float64x1_t a) {
19705  return vreinterpret_u8_f64(a);
19706}
19707
19708// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_p8(<8 x i8> %a) #0 {
19709// CHECK:   ret <8 x i8> %a
19710uint8x8_t test_vreinterpret_u8_p8(poly8x8_t a) {
19711  return vreinterpret_u8_p8(a);
19712}
19713
19714// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_p16(<4 x i16> %a) #0 {
19715// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
19716// CHECK:   ret <8 x i8> [[TMP0]]
19717uint8x8_t test_vreinterpret_u8_p16(poly16x4_t a) {
19718  return vreinterpret_u8_p16(a);
19719}
19720
19721// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_p64(<1 x i64> %a) #0 {
19722// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
19723// CHECK:   ret <8 x i8> [[TMP0]]
19724uint8x8_t test_vreinterpret_u8_p64(poly64x1_t a) {
19725  return vreinterpret_u8_p64(a);
19726}
19727
19728// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s8(<8 x i8> %a) #0 {
19729// CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
19730// CHECK:   ret <4 x i16> [[TMP0]]
19731uint16x4_t test_vreinterpret_u16_s8(int8x8_t a) {
19732  return vreinterpret_u16_s8(a);
19733}
19734
19735// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s16(<4 x i16> %a) #0 {
19736// CHECK:   ret <4 x i16> %a
19737uint16x4_t test_vreinterpret_u16_s16(int16x4_t a) {
19738  return vreinterpret_u16_s16(a);
19739}
19740
19741// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s32(<2 x i32> %a) #0 {
19742// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
19743// CHECK:   ret <4 x i16> [[TMP0]]
19744uint16x4_t test_vreinterpret_u16_s32(int32x2_t a) {
19745  return vreinterpret_u16_s32(a);
19746}
19747
19748// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s64(<1 x i64> %a) #0 {
19749// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
19750// CHECK:   ret <4 x i16> [[TMP0]]
19751uint16x4_t test_vreinterpret_u16_s64(int64x1_t a) {
19752  return vreinterpret_u16_s64(a);
19753}
19754
19755// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_u8(<8 x i8> %a) #0 {
19756// CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
19757// CHECK:   ret <4 x i16> [[TMP0]]
19758uint16x4_t test_vreinterpret_u16_u8(uint8x8_t a) {
19759  return vreinterpret_u16_u8(a);
19760}
19761
19762// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_u32(<2 x i32> %a) #0 {
19763// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
19764// CHECK:   ret <4 x i16> [[TMP0]]
19765uint16x4_t test_vreinterpret_u16_u32(uint32x2_t a) {
19766  return vreinterpret_u16_u32(a);
19767}
19768
19769// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_u64(<1 x i64> %a) #0 {
19770// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
19771// CHECK:   ret <4 x i16> [[TMP0]]
19772uint16x4_t test_vreinterpret_u16_u64(uint64x1_t a) {
19773  return vreinterpret_u16_u64(a);
19774}
19775
19776// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_f16(<4 x half> %a) #0 {
19777// CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
19778// CHECK:   ret <4 x i16> [[TMP0]]
19779uint16x4_t test_vreinterpret_u16_f16(float16x4_t a) {
19780  return vreinterpret_u16_f16(a);
19781}
19782
19783// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_f32(<2 x float> %a) #0 {
19784// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
19785// CHECK:   ret <4 x i16> [[TMP0]]
19786uint16x4_t test_vreinterpret_u16_f32(float32x2_t a) {
19787  return vreinterpret_u16_f32(a);
19788}
19789
19790// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_f64(<1 x double> %a) #0 {
19791// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16>
19792// CHECK:   ret <4 x i16> [[TMP0]]
19793uint16x4_t test_vreinterpret_u16_f64(float64x1_t a) {
19794  return vreinterpret_u16_f64(a);
19795}
19796
19797// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_p8(<8 x i8> %a) #0 {
19798// CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
19799// CHECK:   ret <4 x i16> [[TMP0]]
19800uint16x4_t test_vreinterpret_u16_p8(poly8x8_t a) {
19801  return vreinterpret_u16_p8(a);
19802}
19803
19804// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_p16(<4 x i16> %a) #0 {
19805// CHECK:   ret <4 x i16> %a
19806uint16x4_t test_vreinterpret_u16_p16(poly16x4_t a) {
19807  return vreinterpret_u16_p16(a);
19808}
19809
19810// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_p64(<1 x i64> %a) #0 {
19811// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
19812// CHECK:   ret <4 x i16> [[TMP0]]
19813uint16x4_t test_vreinterpret_u16_p64(poly64x1_t a) {
19814  return vreinterpret_u16_p64(a);
19815}
19816
19817// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s8(<8 x i8> %a) #0 {
19818// CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
19819// CHECK:   ret <2 x i32> [[TMP0]]
19820uint32x2_t test_vreinterpret_u32_s8(int8x8_t a) {
19821  return vreinterpret_u32_s8(a);
19822}
19823
19824// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s16(<4 x i16> %a) #0 {
19825// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
19826// CHECK:   ret <2 x i32> [[TMP0]]
19827uint32x2_t test_vreinterpret_u32_s16(int16x4_t a) {
19828  return vreinterpret_u32_s16(a);
19829}
19830
19831// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s32(<2 x i32> %a) #0 {
19832// CHECK:   ret <2 x i32> %a
19833uint32x2_t test_vreinterpret_u32_s32(int32x2_t a) {
19834  return vreinterpret_u32_s32(a);
19835}
19836
19837// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s64(<1 x i64> %a) #0 {
19838// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
19839// CHECK:   ret <2 x i32> [[TMP0]]
19840uint32x2_t test_vreinterpret_u32_s64(int64x1_t a) {
19841  return vreinterpret_u32_s64(a);
19842}
19843
19844// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_u8(<8 x i8> %a) #0 {
19845// CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
19846// CHECK:   ret <2 x i32> [[TMP0]]
19847uint32x2_t test_vreinterpret_u32_u8(uint8x8_t a) {
19848  return vreinterpret_u32_u8(a);
19849}
19850
19851// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_u16(<4 x i16> %a) #0 {
19852// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
19853// CHECK:   ret <2 x i32> [[TMP0]]
19854uint32x2_t test_vreinterpret_u32_u16(uint16x4_t a) {
19855  return vreinterpret_u32_u16(a);
19856}
19857
19858// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_u64(<1 x i64> %a) #0 {
19859// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
19860// CHECK:   ret <2 x i32> [[TMP0]]
19861uint32x2_t test_vreinterpret_u32_u64(uint64x1_t a) {
19862  return vreinterpret_u32_u64(a);
19863}
19864
19865// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_f16(<4 x half> %a) #0 {
19866// CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32>
19867// CHECK:   ret <2 x i32> [[TMP0]]
19868uint32x2_t test_vreinterpret_u32_f16(float16x4_t a) {
19869  return vreinterpret_u32_f16(a);
19870}
19871
19872// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_f32(<2 x float> %a) #0 {
19873// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32>
19874// CHECK:   ret <2 x i32> [[TMP0]]
19875uint32x2_t test_vreinterpret_u32_f32(float32x2_t a) {
19876  return vreinterpret_u32_f32(a);
19877}
19878
19879// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_f64(<1 x double> %a) #0 {
19880// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x i32>
19881// CHECK:   ret <2 x i32> [[TMP0]]
19882uint32x2_t test_vreinterpret_u32_f64(float64x1_t a) {
19883  return vreinterpret_u32_f64(a);
19884}
19885
19886// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_p8(<8 x i8> %a) #0 {
19887// CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
19888// CHECK:   ret <2 x i32> [[TMP0]]
19889uint32x2_t test_vreinterpret_u32_p8(poly8x8_t a) {
19890  return vreinterpret_u32_p8(a);
19891}
19892
19893// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_p16(<4 x i16> %a) #0 {
19894// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
19895// CHECK:   ret <2 x i32> [[TMP0]]
19896uint32x2_t test_vreinterpret_u32_p16(poly16x4_t a) {
19897  return vreinterpret_u32_p16(a);
19898}
19899
19900// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_p64(<1 x i64> %a) #0 {
19901// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
19902// CHECK:   ret <2 x i32> [[TMP0]]
19903uint32x2_t test_vreinterpret_u32_p64(poly64x1_t a) {
19904  return vreinterpret_u32_p64(a);
19905}
19906
19907// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s8(<8 x i8> %a) #0 {
19908// CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
19909// CHECK:   ret <1 x i64> [[TMP0]]
19910uint64x1_t test_vreinterpret_u64_s8(int8x8_t a) {
19911  return vreinterpret_u64_s8(a);
19912}
19913
19914// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s16(<4 x i16> %a) #0 {
19915// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
19916// CHECK:   ret <1 x i64> [[TMP0]]
19917uint64x1_t test_vreinterpret_u64_s16(int16x4_t a) {
19918  return vreinterpret_u64_s16(a);
19919}
19920
19921// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s32(<2 x i32> %a) #0 {
19922// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
19923// CHECK:   ret <1 x i64> [[TMP0]]
19924uint64x1_t test_vreinterpret_u64_s32(int32x2_t a) {
19925  return vreinterpret_u64_s32(a);
19926}
19927
19928// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s64(<1 x i64> %a) #0 {
19929// CHECK:   ret <1 x i64> %a
19930uint64x1_t test_vreinterpret_u64_s64(int64x1_t a) {
19931  return vreinterpret_u64_s64(a);
19932}
19933
19934// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_u8(<8 x i8> %a) #0 {
19935// CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
19936// CHECK:   ret <1 x i64> [[TMP0]]
19937uint64x1_t test_vreinterpret_u64_u8(uint8x8_t a) {
19938  return vreinterpret_u64_u8(a);
19939}
19940
19941// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_u16(<4 x i16> %a) #0 {
19942// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
19943// CHECK:   ret <1 x i64> [[TMP0]]
19944uint64x1_t test_vreinterpret_u64_u16(uint16x4_t a) {
19945  return vreinterpret_u64_u16(a);
19946}
19947
19948// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_u32(<2 x i32> %a) #0 {
19949// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
19950// CHECK:   ret <1 x i64> [[TMP0]]
19951uint64x1_t test_vreinterpret_u64_u32(uint32x2_t a) {
19952  return vreinterpret_u64_u32(a);
19953}
19954
19955// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_f16(<4 x half> %a) #0 {
19956// CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
19957// CHECK:   ret <1 x i64> [[TMP0]]
19958uint64x1_t test_vreinterpret_u64_f16(float16x4_t a) {
19959  return vreinterpret_u64_f16(a);
19960}
19961
19962// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_f32(<2 x float> %a) #0 {
19963// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
19964// CHECK:   ret <1 x i64> [[TMP0]]
19965uint64x1_t test_vreinterpret_u64_f32(float32x2_t a) {
19966  return vreinterpret_u64_f32(a);
19967}
19968
19969// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_f64(<1 x double> %a) #0 {
19970// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64>
19971// CHECK:   ret <1 x i64> [[TMP0]]
19972uint64x1_t test_vreinterpret_u64_f64(float64x1_t a) {
19973  return vreinterpret_u64_f64(a);
19974}
19975
19976// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_p8(<8 x i8> %a) #0 {
19977// CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
19978// CHECK:   ret <1 x i64> [[TMP0]]
19979uint64x1_t test_vreinterpret_u64_p8(poly8x8_t a) {
19980  return vreinterpret_u64_p8(a);
19981}
19982
19983// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_p16(<4 x i16> %a) #0 {
19984// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
19985// CHECK:   ret <1 x i64> [[TMP0]]
19986uint64x1_t test_vreinterpret_u64_p16(poly16x4_t a) {
19987  return vreinterpret_u64_p16(a);
19988}
19989
19990// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_p64(<1 x i64> %a) #0 {
19991// CHECK:   ret <1 x i64> %a
19992uint64x1_t test_vreinterpret_u64_p64(poly64x1_t a) {
19993  return vreinterpret_u64_p64(a);
19994}
19995
19996// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s8(<8 x i8> %a) #0 {
19997// CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
19998// CHECK:   ret <4 x half> [[TMP0]]
19999float16x4_t test_vreinterpret_f16_s8(int8x8_t a) {
20000  return vreinterpret_f16_s8(a);
20001}
20002
20003// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s16(<4 x i16> %a) #0 {
20004// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
20005// CHECK:   ret <4 x half> [[TMP0]]
20006float16x4_t test_vreinterpret_f16_s16(int16x4_t a) {
20007  return vreinterpret_f16_s16(a);
20008}
20009
20010// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s32(<2 x i32> %a) #0 {
20011// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half>
20012// CHECK:   ret <4 x half> [[TMP0]]
20013float16x4_t test_vreinterpret_f16_s32(int32x2_t a) {
20014  return vreinterpret_f16_s32(a);
20015}
20016
20017// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s64(<1 x i64> %a) #0 {
20018// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
20019// CHECK:   ret <4 x half> [[TMP0]]
20020float16x4_t test_vreinterpret_f16_s64(int64x1_t a) {
20021  return vreinterpret_f16_s64(a);
20022}
20023
20024// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u8(<8 x i8> %a) #0 {
20025// CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
20026// CHECK:   ret <4 x half> [[TMP0]]
20027float16x4_t test_vreinterpret_f16_u8(uint8x8_t a) {
20028  return vreinterpret_f16_u8(a);
20029}
20030
20031// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u16(<4 x i16> %a) #0 {
20032// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
20033// CHECK:   ret <4 x half> [[TMP0]]
20034float16x4_t test_vreinterpret_f16_u16(uint16x4_t a) {
20035  return vreinterpret_f16_u16(a);
20036}
20037
20038// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u32(<2 x i32> %a) #0 {
20039// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half>
20040// CHECK:   ret <4 x half> [[TMP0]]
20041float16x4_t test_vreinterpret_f16_u32(uint32x2_t a) {
20042  return vreinterpret_f16_u32(a);
20043}
20044
20045// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u64(<1 x i64> %a) #0 {
20046// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
20047// CHECK:   ret <4 x half> [[TMP0]]
20048float16x4_t test_vreinterpret_f16_u64(uint64x1_t a) {
20049  return vreinterpret_f16_u64(a);
20050}
20051
20052// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_f32(<2 x float> %a) #0 {
20053// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x half>
20054// CHECK:   ret <4 x half> [[TMP0]]
20055float16x4_t test_vreinterpret_f16_f32(float32x2_t a) {
20056  return vreinterpret_f16_f32(a);
20057}
20058
20059// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_f64(<1 x double> %a) #0 {
20060// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x half>
20061// CHECK:   ret <4 x half> [[TMP0]]
20062float16x4_t test_vreinterpret_f16_f64(float64x1_t a) {
20063  return vreinterpret_f16_f64(a);
20064}
20065
20066// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_p8(<8 x i8> %a) #0 {
20067// CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
20068// CHECK:   ret <4 x half> [[TMP0]]
20069float16x4_t test_vreinterpret_f16_p8(poly8x8_t a) {
20070  return vreinterpret_f16_p8(a);
20071}
20072
20073// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_p16(<4 x i16> %a) #0 {
20074// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
20075// CHECK:   ret <4 x half> [[TMP0]]
20076float16x4_t test_vreinterpret_f16_p16(poly16x4_t a) {
20077  return vreinterpret_f16_p16(a);
20078}
20079
20080// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_p64(<1 x i64> %a) #0 {
20081// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
20082// CHECK:   ret <4 x half> [[TMP0]]
20083float16x4_t test_vreinterpret_f16_p64(poly64x1_t a) {
20084  return vreinterpret_f16_p64(a);
20085}
20086
20087// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s8(<8 x i8> %a) #0 {
20088// CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
20089// CHECK:   ret <2 x float> [[TMP0]]
20090float32x2_t test_vreinterpret_f32_s8(int8x8_t a) {
20091  return vreinterpret_f32_s8(a);
20092}
20093
20094// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s16(<4 x i16> %a) #0 {
20095// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
20096// CHECK:   ret <2 x float> [[TMP0]]
20097float32x2_t test_vreinterpret_f32_s16(int16x4_t a) {
20098  return vreinterpret_f32_s16(a);
20099}
20100
20101// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s32(<2 x i32> %a) #0 {
20102// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float>
20103// CHECK:   ret <2 x float> [[TMP0]]
20104float32x2_t test_vreinterpret_f32_s32(int32x2_t a) {
20105  return vreinterpret_f32_s32(a);
20106}
20107
20108// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s64(<1 x i64> %a) #0 {
20109// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
20110// CHECK:   ret <2 x float> [[TMP0]]
20111float32x2_t test_vreinterpret_f32_s64(int64x1_t a) {
20112  return vreinterpret_f32_s64(a);
20113}
20114
20115// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u8(<8 x i8> %a) #0 {
20116// CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
20117// CHECK:   ret <2 x float> [[TMP0]]
20118float32x2_t test_vreinterpret_f32_u8(uint8x8_t a) {
20119  return vreinterpret_f32_u8(a);
20120}
20121
20122// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u16(<4 x i16> %a) #0 {
20123// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
20124// CHECK:   ret <2 x float> [[TMP0]]
20125float32x2_t test_vreinterpret_f32_u16(uint16x4_t a) {
20126  return vreinterpret_f32_u16(a);
20127}
20128
20129// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u32(<2 x i32> %a) #0 {
20130// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float>
20131// CHECK:   ret <2 x float> [[TMP0]]
20132float32x2_t test_vreinterpret_f32_u32(uint32x2_t a) {
20133  return vreinterpret_f32_u32(a);
20134}
20135
20136// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u64(<1 x i64> %a) #0 {
20137// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
20138// CHECK:   ret <2 x float> [[TMP0]]
20139float32x2_t test_vreinterpret_f32_u64(uint64x1_t a) {
20140  return vreinterpret_f32_u64(a);
20141}
20142
20143// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_f16(<4 x half> %a) #0 {
20144// CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x float>
20145// CHECK:   ret <2 x float> [[TMP0]]
20146float32x2_t test_vreinterpret_f32_f16(float16x4_t a) {
20147  return vreinterpret_f32_f16(a);
20148}
20149
20150// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_f64(<1 x double> %a) #0 {
20151// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x float>
20152// CHECK:   ret <2 x float> [[TMP0]]
20153float32x2_t test_vreinterpret_f32_f64(float64x1_t a) {
20154  return vreinterpret_f32_f64(a);
20155}
20156
20157// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_p8(<8 x i8> %a) #0 {
20158// CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
20159// CHECK:   ret <2 x float> [[TMP0]]
20160float32x2_t test_vreinterpret_f32_p8(poly8x8_t a) {
20161  return vreinterpret_f32_p8(a);
20162}
20163
20164// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_p16(<4 x i16> %a) #0 {
20165// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
20166// CHECK:   ret <2 x float> [[TMP0]]
20167float32x2_t test_vreinterpret_f32_p16(poly16x4_t a) {
20168  return vreinterpret_f32_p16(a);
20169}
20170
20171// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_p64(<1 x i64> %a) #0 {
20172// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
20173// CHECK:   ret <2 x float> [[TMP0]]
20174float32x2_t test_vreinterpret_f32_p64(poly64x1_t a) {
20175  return vreinterpret_f32_p64(a);
20176}
20177
20178// CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_s8(<8 x i8> %a) #0 {
20179// CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double>
20180// CHECK:   ret <1 x double> [[TMP0]]
20181float64x1_t test_vreinterpret_f64_s8(int8x8_t a) {
20182  return vreinterpret_f64_s8(a);
20183}
20184
20185// CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_s16(<4 x i16> %a) #0 {
20186// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double>
20187// CHECK:   ret <1 x double> [[TMP0]]
20188float64x1_t test_vreinterpret_f64_s16(int16x4_t a) {
20189  return vreinterpret_f64_s16(a);
20190}
20191
20192// CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_s32(<2 x i32> %a) #0 {
20193// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x double>
20194// CHECK:   ret <1 x double> [[TMP0]]
20195float64x1_t test_vreinterpret_f64_s32(int32x2_t a) {
20196  return vreinterpret_f64_s32(a);
20197}
20198
20199// CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_s64(<1 x i64> %a) #0 {
20200// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double>
20201// CHECK:   ret <1 x double> [[TMP0]]
20202float64x1_t test_vreinterpret_f64_s64(int64x1_t a) {
20203  return vreinterpret_f64_s64(a);
20204}
20205
20206// CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_u8(<8 x i8> %a) #0 {
20207// CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double>
20208// CHECK:   ret <1 x double> [[TMP0]]
20209float64x1_t test_vreinterpret_f64_u8(uint8x8_t a) {
20210  return vreinterpret_f64_u8(a);
20211}
20212
20213// CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_u16(<4 x i16> %a) #0 {
20214// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double>
20215// CHECK:   ret <1 x double> [[TMP0]]
20216float64x1_t test_vreinterpret_f64_u16(uint16x4_t a) {
20217  return vreinterpret_f64_u16(a);
20218}
20219
20220// CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_u32(<2 x i32> %a) #0 {
20221// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x double>
20222// CHECK:   ret <1 x double> [[TMP0]]
20223float64x1_t test_vreinterpret_f64_u32(uint32x2_t a) {
20224  return vreinterpret_f64_u32(a);
20225}
20226
20227// CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_u64(<1 x i64> %a) #0 {
20228// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double>
20229// CHECK:   ret <1 x double> [[TMP0]]
20230float64x1_t test_vreinterpret_f64_u64(uint64x1_t a) {
20231  return vreinterpret_f64_u64(a);
20232}
20233
20234// CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_f16(<4 x half> %a) #0 {
20235// CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x double>
20236// CHECK:   ret <1 x double> [[TMP0]]
20237float64x1_t test_vreinterpret_f64_f16(float16x4_t a) {
20238  return vreinterpret_f64_f16(a);
20239}
20240
20241// CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_f32(<2 x float> %a) #0 {
20242// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x double>
20243// CHECK:   ret <1 x double> [[TMP0]]
20244float64x1_t test_vreinterpret_f64_f32(float32x2_t a) {
20245  return vreinterpret_f64_f32(a);
20246}
20247
20248// CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_p8(<8 x i8> %a) #0 {
20249// CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double>
20250// CHECK:   ret <1 x double> [[TMP0]]
20251float64x1_t test_vreinterpret_f64_p8(poly8x8_t a) {
20252  return vreinterpret_f64_p8(a);
20253}
20254
20255// CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_p16(<4 x i16> %a) #0 {
20256// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double>
20257// CHECK:   ret <1 x double> [[TMP0]]
20258float64x1_t test_vreinterpret_f64_p16(poly16x4_t a) {
20259  return vreinterpret_f64_p16(a);
20260}
20261
20262// CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_p64(<1 x i64> %a) #0 {
20263// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double>
20264// CHECK:   ret <1 x double> [[TMP0]]
20265float64x1_t test_vreinterpret_f64_p64(poly64x1_t a) {
20266  return vreinterpret_f64_p64(a);
20267}
20268
20269// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s8(<8 x i8> %a) #0 {
20270// CHECK:   ret <8 x i8> %a
20271poly8x8_t test_vreinterpret_p8_s8(int8x8_t a) {
20272  return vreinterpret_p8_s8(a);
20273}
20274
20275// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s16(<4 x i16> %a) #0 {
20276// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20277// CHECK:   ret <8 x i8> [[TMP0]]
20278poly8x8_t test_vreinterpret_p8_s16(int16x4_t a) {
20279  return vreinterpret_p8_s16(a);
20280}
20281
20282// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s32(<2 x i32> %a) #0 {
20283// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
20284// CHECK:   ret <8 x i8> [[TMP0]]
20285poly8x8_t test_vreinterpret_p8_s32(int32x2_t a) {
20286  return vreinterpret_p8_s32(a);
20287}
20288
20289// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s64(<1 x i64> %a) #0 {
20290// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
20291// CHECK:   ret <8 x i8> [[TMP0]]
20292poly8x8_t test_vreinterpret_p8_s64(int64x1_t a) {
20293  return vreinterpret_p8_s64(a);
20294}
20295
20296// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u8(<8 x i8> %a) #0 {
20297// CHECK:   ret <8 x i8> %a
20298poly8x8_t test_vreinterpret_p8_u8(uint8x8_t a) {
20299  return vreinterpret_p8_u8(a);
20300}
20301
20302// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u16(<4 x i16> %a) #0 {
20303// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20304// CHECK:   ret <8 x i8> [[TMP0]]
20305poly8x8_t test_vreinterpret_p8_u16(uint16x4_t a) {
20306  return vreinterpret_p8_u16(a);
20307}
20308
20309// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u32(<2 x i32> %a) #0 {
20310// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
20311// CHECK:   ret <8 x i8> [[TMP0]]
20312poly8x8_t test_vreinterpret_p8_u32(uint32x2_t a) {
20313  return vreinterpret_p8_u32(a);
20314}
20315
20316// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u64(<1 x i64> %a) #0 {
20317// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
20318// CHECK:   ret <8 x i8> [[TMP0]]
20319poly8x8_t test_vreinterpret_p8_u64(uint64x1_t a) {
20320  return vreinterpret_p8_u64(a);
20321}
20322
20323// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_f16(<4 x half> %a) #0 {
20324// CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
20325// CHECK:   ret <8 x i8> [[TMP0]]
20326poly8x8_t test_vreinterpret_p8_f16(float16x4_t a) {
20327  return vreinterpret_p8_f16(a);
20328}
20329
20330// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_f32(<2 x float> %a) #0 {
20331// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
20332// CHECK:   ret <8 x i8> [[TMP0]]
20333poly8x8_t test_vreinterpret_p8_f32(float32x2_t a) {
20334  return vreinterpret_p8_f32(a);
20335}
20336
20337// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_f64(<1 x double> %a) #0 {
20338// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
20339// CHECK:   ret <8 x i8> [[TMP0]]
20340poly8x8_t test_vreinterpret_p8_f64(float64x1_t a) {
20341  return vreinterpret_p8_f64(a);
20342}
20343
20344// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_p16(<4 x i16> %a) #0 {
20345// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20346// CHECK:   ret <8 x i8> [[TMP0]]
20347poly8x8_t test_vreinterpret_p8_p16(poly16x4_t a) {
20348  return vreinterpret_p8_p16(a);
20349}
20350
20351// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_p64(<1 x i64> %a) #0 {
20352// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
20353// CHECK:   ret <8 x i8> [[TMP0]]
20354poly8x8_t test_vreinterpret_p8_p64(poly64x1_t a) {
20355  return vreinterpret_p8_p64(a);
20356}
20357
20358// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s8(<8 x i8> %a) #0 {
20359// CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
20360// CHECK:   ret <4 x i16> [[TMP0]]
20361poly16x4_t test_vreinterpret_p16_s8(int8x8_t a) {
20362  return vreinterpret_p16_s8(a);
20363}
20364
20365// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s16(<4 x i16> %a) #0 {
20366// CHECK:   ret <4 x i16> %a
20367poly16x4_t test_vreinterpret_p16_s16(int16x4_t a) {
20368  return vreinterpret_p16_s16(a);
20369}
20370
20371// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s32(<2 x i32> %a) #0 {
20372// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
20373// CHECK:   ret <4 x i16> [[TMP0]]
20374poly16x4_t test_vreinterpret_p16_s32(int32x2_t a) {
20375  return vreinterpret_p16_s32(a);
20376}
20377
20378// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s64(<1 x i64> %a) #0 {
20379// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
20380// CHECK:   ret <4 x i16> [[TMP0]]
20381poly16x4_t test_vreinterpret_p16_s64(int64x1_t a) {
20382  return vreinterpret_p16_s64(a);
20383}
20384
20385// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u8(<8 x i8> %a) #0 {
20386// CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
20387// CHECK:   ret <4 x i16> [[TMP0]]
20388poly16x4_t test_vreinterpret_p16_u8(uint8x8_t a) {
20389  return vreinterpret_p16_u8(a);
20390}
20391
20392// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u16(<4 x i16> %a) #0 {
20393// CHECK:   ret <4 x i16> %a
20394poly16x4_t test_vreinterpret_p16_u16(uint16x4_t a) {
20395  return vreinterpret_p16_u16(a);
20396}
20397
20398// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u32(<2 x i32> %a) #0 {
20399// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
20400// CHECK:   ret <4 x i16> [[TMP0]]
20401poly16x4_t test_vreinterpret_p16_u32(uint32x2_t a) {
20402  return vreinterpret_p16_u32(a);
20403}
20404
20405// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u64(<1 x i64> %a) #0 {
20406// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
20407// CHECK:   ret <4 x i16> [[TMP0]]
20408poly16x4_t test_vreinterpret_p16_u64(uint64x1_t a) {
20409  return vreinterpret_p16_u64(a);
20410}
20411
20412// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_f16(<4 x half> %a) #0 {
20413// CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
20414// CHECK:   ret <4 x i16> [[TMP0]]
20415poly16x4_t test_vreinterpret_p16_f16(float16x4_t a) {
20416  return vreinterpret_p16_f16(a);
20417}
20418
20419// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_f32(<2 x float> %a) #0 {
20420// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
20421// CHECK:   ret <4 x i16> [[TMP0]]
20422poly16x4_t test_vreinterpret_p16_f32(float32x2_t a) {
20423  return vreinterpret_p16_f32(a);
20424}
20425
20426// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_f64(<1 x double> %a) #0 {
20427// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16>
20428// CHECK:   ret <4 x i16> [[TMP0]]
20429poly16x4_t test_vreinterpret_p16_f64(float64x1_t a) {
20430  return vreinterpret_p16_f64(a);
20431}
20432
20433// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_p8(<8 x i8> %a) #0 {
20434// CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
20435// CHECK:   ret <4 x i16> [[TMP0]]
20436poly16x4_t test_vreinterpret_p16_p8(poly8x8_t a) {
20437  return vreinterpret_p16_p8(a);
20438}
20439
20440// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_p64(<1 x i64> %a) #0 {
20441// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
20442// CHECK:   ret <4 x i16> [[TMP0]]
20443poly16x4_t test_vreinterpret_p16_p64(poly64x1_t a) {
20444  return vreinterpret_p16_p64(a);
20445}
20446
20447// CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_s8(<8 x i8> %a) #0 {
20448// CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
20449// CHECK:   ret <1 x i64> [[TMP0]]
20450poly64x1_t test_vreinterpret_p64_s8(int8x8_t a) {
20451  return vreinterpret_p64_s8(a);
20452}
20453
20454// CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_s16(<4 x i16> %a) #0 {
20455// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
20456// CHECK:   ret <1 x i64> [[TMP0]]
20457poly64x1_t test_vreinterpret_p64_s16(int16x4_t a) {
20458  return vreinterpret_p64_s16(a);
20459}
20460
20461// CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_s32(<2 x i32> %a) #0 {
20462// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
20463// CHECK:   ret <1 x i64> [[TMP0]]
20464poly64x1_t test_vreinterpret_p64_s32(int32x2_t a) {
20465  return vreinterpret_p64_s32(a);
20466}
20467
20468// CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_s64(<1 x i64> %a) #0 {
20469// CHECK:   ret <1 x i64> %a
20470poly64x1_t test_vreinterpret_p64_s64(int64x1_t a) {
20471  return vreinterpret_p64_s64(a);
20472}
20473
20474// CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_u8(<8 x i8> %a) #0 {
20475// CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
20476// CHECK:   ret <1 x i64> [[TMP0]]
20477poly64x1_t test_vreinterpret_p64_u8(uint8x8_t a) {
20478  return vreinterpret_p64_u8(a);
20479}
20480
20481// CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_u16(<4 x i16> %a) #0 {
20482// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
20483// CHECK:   ret <1 x i64> [[TMP0]]
20484poly64x1_t test_vreinterpret_p64_u16(uint16x4_t a) {
20485  return vreinterpret_p64_u16(a);
20486}
20487
20488// CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_u32(<2 x i32> %a) #0 {
20489// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
20490// CHECK:   ret <1 x i64> [[TMP0]]
20491poly64x1_t test_vreinterpret_p64_u32(uint32x2_t a) {
20492  return vreinterpret_p64_u32(a);
20493}
20494
20495// CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_u64(<1 x i64> %a) #0 {
20496// CHECK:   ret <1 x i64> %a
20497poly64x1_t test_vreinterpret_p64_u64(uint64x1_t a) {
20498  return vreinterpret_p64_u64(a);
20499}
20500
20501// CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_f16(<4 x half> %a) #0 {
20502// CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
20503// CHECK:   ret <1 x i64> [[TMP0]]
20504poly64x1_t test_vreinterpret_p64_f16(float16x4_t a) {
20505  return vreinterpret_p64_f16(a);
20506}
20507
20508// CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_f32(<2 x float> %a) #0 {
20509// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
20510// CHECK:   ret <1 x i64> [[TMP0]]
20511poly64x1_t test_vreinterpret_p64_f32(float32x2_t a) {
20512  return vreinterpret_p64_f32(a);
20513}
20514
20515// CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_f64(<1 x double> %a) #0 {
20516// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64>
20517// CHECK:   ret <1 x i64> [[TMP0]]
20518poly64x1_t test_vreinterpret_p64_f64(float64x1_t a) {
20519  return vreinterpret_p64_f64(a);
20520}
20521
20522// CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_p8(<8 x i8> %a) #0 {
20523// CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
20524// CHECK:   ret <1 x i64> [[TMP0]]
20525poly64x1_t test_vreinterpret_p64_p8(poly8x8_t a) {
20526  return vreinterpret_p64_p8(a);
20527}
20528
20529// CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_p16(<4 x i16> %a) #0 {
20530// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
20531// CHECK:   ret <1 x i64> [[TMP0]]
20532poly64x1_t test_vreinterpret_p64_p16(poly16x4_t a) {
20533  return vreinterpret_p64_p16(a);
20534}
20535
20536// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_s16(<8 x i16> %a) #0 {
20537// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20538// CHECK:   ret <16 x i8> [[TMP0]]
20539int8x16_t test_vreinterpretq_s8_s16(int16x8_t a) {
20540  return vreinterpretq_s8_s16(a);
20541}
20542
20543// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_s32(<4 x i32> %a) #0 {
20544// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
20545// CHECK:   ret <16 x i8> [[TMP0]]
20546int8x16_t test_vreinterpretq_s8_s32(int32x4_t a) {
20547  return vreinterpretq_s8_s32(a);
20548}
20549
20550// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_s64(<2 x i64> %a) #0 {
20551// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
20552// CHECK:   ret <16 x i8> [[TMP0]]
20553int8x16_t test_vreinterpretq_s8_s64(int64x2_t a) {
20554  return vreinterpretq_s8_s64(a);
20555}
20556
20557// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u8(<16 x i8> %a) #0 {
20558// CHECK:   ret <16 x i8> %a
20559int8x16_t test_vreinterpretq_s8_u8(uint8x16_t a) {
20560  return vreinterpretq_s8_u8(a);
20561}
20562
20563// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u16(<8 x i16> %a) #0 {
20564// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20565// CHECK:   ret <16 x i8> [[TMP0]]
20566int8x16_t test_vreinterpretq_s8_u16(uint16x8_t a) {
20567  return vreinterpretq_s8_u16(a);
20568}
20569
20570// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u32(<4 x i32> %a) #0 {
20571// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
20572// CHECK:   ret <16 x i8> [[TMP0]]
20573int8x16_t test_vreinterpretq_s8_u32(uint32x4_t a) {
20574  return vreinterpretq_s8_u32(a);
20575}
20576
20577// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u64(<2 x i64> %a) #0 {
20578// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
20579// CHECK:   ret <16 x i8> [[TMP0]]
20580int8x16_t test_vreinterpretq_s8_u64(uint64x2_t a) {
20581  return vreinterpretq_s8_u64(a);
20582}
20583
20584// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_f16(<8 x half> %a) #0 {
20585// CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
20586// CHECK:   ret <16 x i8> [[TMP0]]
20587int8x16_t test_vreinterpretq_s8_f16(float16x8_t a) {
20588  return vreinterpretq_s8_f16(a);
20589}
20590
20591// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_f32(<4 x float> %a) #0 {
20592// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
20593// CHECK:   ret <16 x i8> [[TMP0]]
20594int8x16_t test_vreinterpretq_s8_f32(float32x4_t a) {
20595  return vreinterpretq_s8_f32(a);
20596}
20597
20598// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_f64(<2 x double> %a) #0 {
20599// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
20600// CHECK:   ret <16 x i8> [[TMP0]]
20601int8x16_t test_vreinterpretq_s8_f64(float64x2_t a) {
20602  return vreinterpretq_s8_f64(a);
20603}
20604
20605// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_p8(<16 x i8> %a) #0 {
20606// CHECK:   ret <16 x i8> %a
20607int8x16_t test_vreinterpretq_s8_p8(poly8x16_t a) {
20608  return vreinterpretq_s8_p8(a);
20609}
20610
20611// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_p16(<8 x i16> %a) #0 {
20612// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20613// CHECK:   ret <16 x i8> [[TMP0]]
20614int8x16_t test_vreinterpretq_s8_p16(poly16x8_t a) {
20615  return vreinterpretq_s8_p16(a);
20616}
20617
20618// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_p64(<2 x i64> %a) #0 {
20619// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
20620// CHECK:   ret <16 x i8> [[TMP0]]
20621int8x16_t test_vreinterpretq_s8_p64(poly64x2_t a) {
20622  return vreinterpretq_s8_p64(a);
20623}
20624
20625// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_s8(<16 x i8> %a) #0 {
20626// CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
20627// CHECK:   ret <8 x i16> [[TMP0]]
20628int16x8_t test_vreinterpretq_s16_s8(int8x16_t a) {
20629  return vreinterpretq_s16_s8(a);
20630}
20631
20632// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_s32(<4 x i32> %a) #0 {
20633// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
20634// CHECK:   ret <8 x i16> [[TMP0]]
20635int16x8_t test_vreinterpretq_s16_s32(int32x4_t a) {
20636  return vreinterpretq_s16_s32(a);
20637}
20638
20639// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_s64(<2 x i64> %a) #0 {
20640// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
20641// CHECK:   ret <8 x i16> [[TMP0]]
20642int16x8_t test_vreinterpretq_s16_s64(int64x2_t a) {
20643  return vreinterpretq_s16_s64(a);
20644}
20645
20646// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u8(<16 x i8> %a) #0 {
20647// CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
20648// CHECK:   ret <8 x i16> [[TMP0]]
20649int16x8_t test_vreinterpretq_s16_u8(uint8x16_t a) {
20650  return vreinterpretq_s16_u8(a);
20651}
20652
20653// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u16(<8 x i16> %a) #0 {
20654// CHECK:   ret <8 x i16> %a
20655int16x8_t test_vreinterpretq_s16_u16(uint16x8_t a) {
20656  return vreinterpretq_s16_u16(a);
20657}
20658
20659// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u32(<4 x i32> %a) #0 {
20660// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
20661// CHECK:   ret <8 x i16> [[TMP0]]
20662int16x8_t test_vreinterpretq_s16_u32(uint32x4_t a) {
20663  return vreinterpretq_s16_u32(a);
20664}
20665
20666// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u64(<2 x i64> %a) #0 {
20667// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
20668// CHECK:   ret <8 x i16> [[TMP0]]
20669int16x8_t test_vreinterpretq_s16_u64(uint64x2_t a) {
20670  return vreinterpretq_s16_u64(a);
20671}
20672
20673// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_f16(<8 x half> %a) #0 {
20674// CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
20675// CHECK:   ret <8 x i16> [[TMP0]]
20676int16x8_t test_vreinterpretq_s16_f16(float16x8_t a) {
20677  return vreinterpretq_s16_f16(a);
20678}
20679
20680// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_f32(<4 x float> %a) #0 {
20681// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
20682// CHECK:   ret <8 x i16> [[TMP0]]
20683int16x8_t test_vreinterpretq_s16_f32(float32x4_t a) {
20684  return vreinterpretq_s16_f32(a);
20685}
20686
20687// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_f64(<2 x double> %a) #0 {
20688// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16>
20689// CHECK:   ret <8 x i16> [[TMP0]]
20690int16x8_t test_vreinterpretq_s16_f64(float64x2_t a) {
20691  return vreinterpretq_s16_f64(a);
20692}
20693
20694// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_p8(<16 x i8> %a) #0 {
20695// CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
20696// CHECK:   ret <8 x i16> [[TMP0]]
20697int16x8_t test_vreinterpretq_s16_p8(poly8x16_t a) {
20698  return vreinterpretq_s16_p8(a);
20699}
20700
20701// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_p16(<8 x i16> %a) #0 {
20702// CHECK:   ret <8 x i16> %a
20703int16x8_t test_vreinterpretq_s16_p16(poly16x8_t a) {
20704  return vreinterpretq_s16_p16(a);
20705}
20706
20707// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_p64(<2 x i64> %a) #0 {
20708// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
20709// CHECK:   ret <8 x i16> [[TMP0]]
20710int16x8_t test_vreinterpretq_s16_p64(poly64x2_t a) {
20711  return vreinterpretq_s16_p64(a);
20712}
20713
20714// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_s8(<16 x i8> %a) #0 {
20715// CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
20716// CHECK:   ret <4 x i32> [[TMP0]]
20717int32x4_t test_vreinterpretq_s32_s8(int8x16_t a) {
20718  return vreinterpretq_s32_s8(a);
20719}
20720
20721// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_s16(<8 x i16> %a) #0 {
20722// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
20723// CHECK:   ret <4 x i32> [[TMP0]]
20724int32x4_t test_vreinterpretq_s32_s16(int16x8_t a) {
20725  return vreinterpretq_s32_s16(a);
20726}
20727
20728// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_s64(<2 x i64> %a) #0 {
20729// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
20730// CHECK:   ret <4 x i32> [[TMP0]]
20731int32x4_t test_vreinterpretq_s32_s64(int64x2_t a) {
20732  return vreinterpretq_s32_s64(a);
20733}
20734
20735// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u8(<16 x i8> %a) #0 {
20736// CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
20737// CHECK:   ret <4 x i32> [[TMP0]]
20738int32x4_t test_vreinterpretq_s32_u8(uint8x16_t a) {
20739  return vreinterpretq_s32_u8(a);
20740}
20741
20742// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u16(<8 x i16> %a) #0 {
20743// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
20744// CHECK:   ret <4 x i32> [[TMP0]]
20745int32x4_t test_vreinterpretq_s32_u16(uint16x8_t a) {
20746  return vreinterpretq_s32_u16(a);
20747}
20748
20749// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u32(<4 x i32> %a) #0 {
20750// CHECK:   ret <4 x i32> %a
20751int32x4_t test_vreinterpretq_s32_u32(uint32x4_t a) {
20752  return vreinterpretq_s32_u32(a);
20753}
20754
20755// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u64(<2 x i64> %a) #0 {
20756// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
20757// CHECK:   ret <4 x i32> [[TMP0]]
20758int32x4_t test_vreinterpretq_s32_u64(uint64x2_t a) {
20759  return vreinterpretq_s32_u64(a);
20760}
20761
20762// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_f16(<8 x half> %a) #0 {
20763// CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32>
20764// CHECK:   ret <4 x i32> [[TMP0]]
20765int32x4_t test_vreinterpretq_s32_f16(float16x8_t a) {
20766  return vreinterpretq_s32_f16(a);
20767}
20768
20769// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_f32(<4 x float> %a) #0 {
20770// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32>
20771// CHECK:   ret <4 x i32> [[TMP0]]
20772int32x4_t test_vreinterpretq_s32_f32(float32x4_t a) {
20773  return vreinterpretq_s32_f32(a);
20774}
20775
20776// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_f64(<2 x double> %a) #0 {
20777// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x i32>
20778// CHECK:   ret <4 x i32> [[TMP0]]
20779int32x4_t test_vreinterpretq_s32_f64(float64x2_t a) {
20780  return vreinterpretq_s32_f64(a);
20781}
20782
20783// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_p8(<16 x i8> %a) #0 {
20784// CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
20785// CHECK:   ret <4 x i32> [[TMP0]]
20786int32x4_t test_vreinterpretq_s32_p8(poly8x16_t a) {
20787  return vreinterpretq_s32_p8(a);
20788}
20789
20790// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_p16(<8 x i16> %a) #0 {
20791// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
20792// CHECK:   ret <4 x i32> [[TMP0]]
20793int32x4_t test_vreinterpretq_s32_p16(poly16x8_t a) {
20794  return vreinterpretq_s32_p16(a);
20795}
20796
20797// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_p64(<2 x i64> %a) #0 {
20798// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
20799// CHECK:   ret <4 x i32> [[TMP0]]
20800int32x4_t test_vreinterpretq_s32_p64(poly64x2_t a) {
20801  return vreinterpretq_s32_p64(a);
20802}
20803
20804// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_s8(<16 x i8> %a) #0 {
20805// CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
20806// CHECK:   ret <2 x i64> [[TMP0]]
20807int64x2_t test_vreinterpretq_s64_s8(int8x16_t a) {
20808  return vreinterpretq_s64_s8(a);
20809}
20810
20811// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_s16(<8 x i16> %a) #0 {
20812// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
20813// CHECK:   ret <2 x i64> [[TMP0]]
20814int64x2_t test_vreinterpretq_s64_s16(int16x8_t a) {
20815  return vreinterpretq_s64_s16(a);
20816}
20817
20818// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_s32(<4 x i32> %a) #0 {
20819// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
20820// CHECK:   ret <2 x i64> [[TMP0]]
20821int64x2_t test_vreinterpretq_s64_s32(int32x4_t a) {
20822  return vreinterpretq_s64_s32(a);
20823}
20824
20825// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u8(<16 x i8> %a) #0 {
20826// CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
20827// CHECK:   ret <2 x i64> [[TMP0]]
20828int64x2_t test_vreinterpretq_s64_u8(uint8x16_t a) {
20829  return vreinterpretq_s64_u8(a);
20830}
20831
20832// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u16(<8 x i16> %a) #0 {
20833// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
20834// CHECK:   ret <2 x i64> [[TMP0]]
20835int64x2_t test_vreinterpretq_s64_u16(uint16x8_t a) {
20836  return vreinterpretq_s64_u16(a);
20837}
20838
20839// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u32(<4 x i32> %a) #0 {
20840// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
20841// CHECK:   ret <2 x i64> [[TMP0]]
20842int64x2_t test_vreinterpretq_s64_u32(uint32x4_t a) {
20843  return vreinterpretq_s64_u32(a);
20844}
20845
20846// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u64(<2 x i64> %a) #0 {
20847// CHECK:   ret <2 x i64> %a
20848int64x2_t test_vreinterpretq_s64_u64(uint64x2_t a) {
20849  return vreinterpretq_s64_u64(a);
20850}
20851
20852// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_f16(<8 x half> %a) #0 {
20853// CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
20854// CHECK:   ret <2 x i64> [[TMP0]]
20855int64x2_t test_vreinterpretq_s64_f16(float16x8_t a) {
20856  return vreinterpretq_s64_f16(a);
20857}
20858
20859// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_f32(<4 x float> %a) #0 {
20860// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
20861// CHECK:   ret <2 x i64> [[TMP0]]
20862int64x2_t test_vreinterpretq_s64_f32(float32x4_t a) {
20863  return vreinterpretq_s64_f32(a);
20864}
20865
20866// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_f64(<2 x double> %a) #0 {
20867// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64>
20868// CHECK:   ret <2 x i64> [[TMP0]]
20869int64x2_t test_vreinterpretq_s64_f64(float64x2_t a) {
20870  return vreinterpretq_s64_f64(a);
20871}
20872
20873// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_p8(<16 x i8> %a) #0 {
20874// CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
20875// CHECK:   ret <2 x i64> [[TMP0]]
20876int64x2_t test_vreinterpretq_s64_p8(poly8x16_t a) {
20877  return vreinterpretq_s64_p8(a);
20878}
20879
20880// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_p16(<8 x i16> %a) #0 {
20881// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
20882// CHECK:   ret <2 x i64> [[TMP0]]
20883int64x2_t test_vreinterpretq_s64_p16(poly16x8_t a) {
20884  return vreinterpretq_s64_p16(a);
20885}
20886
20887// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_p64(<2 x i64> %a) #0 {
20888// CHECK:   ret <2 x i64> %a
20889int64x2_t test_vreinterpretq_s64_p64(poly64x2_t a) {
20890  return vreinterpretq_s64_p64(a);
20891}
20892
20893// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s8(<16 x i8> %a) #0 {
20894// CHECK:   ret <16 x i8> %a
20895uint8x16_t test_vreinterpretq_u8_s8(int8x16_t a) {
20896  return vreinterpretq_u8_s8(a);
20897}
20898
20899// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s16(<8 x i16> %a) #0 {
20900// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20901// CHECK:   ret <16 x i8> [[TMP0]]
20902uint8x16_t test_vreinterpretq_u8_s16(int16x8_t a) {
20903  return vreinterpretq_u8_s16(a);
20904}
20905
20906// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s32(<4 x i32> %a) #0 {
20907// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
20908// CHECK:   ret <16 x i8> [[TMP0]]
20909uint8x16_t test_vreinterpretq_u8_s32(int32x4_t a) {
20910  return vreinterpretq_u8_s32(a);
20911}
20912
20913// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s64(<2 x i64> %a) #0 {
20914// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
20915// CHECK:   ret <16 x i8> [[TMP0]]
20916uint8x16_t test_vreinterpretq_u8_s64(int64x2_t a) {
20917  return vreinterpretq_u8_s64(a);
20918}
20919
20920// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_u16(<8 x i16> %a) #0 {
20921// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20922// CHECK:   ret <16 x i8> [[TMP0]]
20923uint8x16_t test_vreinterpretq_u8_u16(uint16x8_t a) {
20924  return vreinterpretq_u8_u16(a);
20925}
20926
20927// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_u32(<4 x i32> %a) #0 {
20928// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
20929// CHECK:   ret <16 x i8> [[TMP0]]
20930uint8x16_t test_vreinterpretq_u8_u32(uint32x4_t a) {
20931  return vreinterpretq_u8_u32(a);
20932}
20933
20934// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_u64(<2 x i64> %a) #0 {
20935// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
20936// CHECK:   ret <16 x i8> [[TMP0]]
20937uint8x16_t test_vreinterpretq_u8_u64(uint64x2_t a) {
20938  return vreinterpretq_u8_u64(a);
20939}
20940
20941// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_f16(<8 x half> %a) #0 {
20942// CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
20943// CHECK:   ret <16 x i8> [[TMP0]]
20944uint8x16_t test_vreinterpretq_u8_f16(float16x8_t a) {
20945  return vreinterpretq_u8_f16(a);
20946}
20947
20948// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_f32(<4 x float> %a) #0 {
20949// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
20950// CHECK:   ret <16 x i8> [[TMP0]]
20951uint8x16_t test_vreinterpretq_u8_f32(float32x4_t a) {
20952  return vreinterpretq_u8_f32(a);
20953}
20954
20955// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_f64(<2 x double> %a) #0 {
20956// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
20957// CHECK:   ret <16 x i8> [[TMP0]]
20958uint8x16_t test_vreinterpretq_u8_f64(float64x2_t a) {
20959  return vreinterpretq_u8_f64(a);
20960}
20961
20962// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_p8(<16 x i8> %a) #0 {
20963// CHECK:   ret <16 x i8> %a
20964uint8x16_t test_vreinterpretq_u8_p8(poly8x16_t a) {
20965  return vreinterpretq_u8_p8(a);
20966}
20967
20968// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_p16(<8 x i16> %a) #0 {
20969// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20970// CHECK:   ret <16 x i8> [[TMP0]]
20971uint8x16_t test_vreinterpretq_u8_p16(poly16x8_t a) {
20972  return vreinterpretq_u8_p16(a);
20973}
20974
20975// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_p64(<2 x i64> %a) #0 {
20976// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
20977// CHECK:   ret <16 x i8> [[TMP0]]
20978uint8x16_t test_vreinterpretq_u8_p64(poly64x2_t a) {
20979  return vreinterpretq_u8_p64(a);
20980}
20981
20982// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s8(<16 x i8> %a) #0 {
20983// CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
20984// CHECK:   ret <8 x i16> [[TMP0]]
20985uint16x8_t test_vreinterpretq_u16_s8(int8x16_t a) {
20986  return vreinterpretq_u16_s8(a);
20987}
20988
20989// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s16(<8 x i16> %a) #0 {
20990// CHECK:   ret <8 x i16> %a
20991uint16x8_t test_vreinterpretq_u16_s16(int16x8_t a) {
20992  return vreinterpretq_u16_s16(a);
20993}
20994
20995// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s32(<4 x i32> %a) #0 {
20996// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
20997// CHECK:   ret <8 x i16> [[TMP0]]
20998uint16x8_t test_vreinterpretq_u16_s32(int32x4_t a) {
20999  return vreinterpretq_u16_s32(a);
21000}
21001
21002// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s64(<2 x i64> %a) #0 {
21003// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
21004// CHECK:   ret <8 x i16> [[TMP0]]
21005uint16x8_t test_vreinterpretq_u16_s64(int64x2_t a) {
21006  return vreinterpretq_u16_s64(a);
21007}
21008
21009// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_u8(<16 x i8> %a) #0 {
21010// CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
21011// CHECK:   ret <8 x i16> [[TMP0]]
21012uint16x8_t test_vreinterpretq_u16_u8(uint8x16_t a) {
21013  return vreinterpretq_u16_u8(a);
21014}
21015
21016// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_u32(<4 x i32> %a) #0 {
21017// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
21018// CHECK:   ret <8 x i16> [[TMP0]]
21019uint16x8_t test_vreinterpretq_u16_u32(uint32x4_t a) {
21020  return vreinterpretq_u16_u32(a);
21021}
21022
21023// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_u64(<2 x i64> %a) #0 {
21024// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
21025// CHECK:   ret <8 x i16> [[TMP0]]
21026uint16x8_t test_vreinterpretq_u16_u64(uint64x2_t a) {
21027  return vreinterpretq_u16_u64(a);
21028}
21029
21030// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_f16(<8 x half> %a) #0 {
21031// CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
21032// CHECK:   ret <8 x i16> [[TMP0]]
21033uint16x8_t test_vreinterpretq_u16_f16(float16x8_t a) {
21034  return vreinterpretq_u16_f16(a);
21035}
21036
21037// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_f32(<4 x float> %a) #0 {
21038// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
21039// CHECK:   ret <8 x i16> [[TMP0]]
21040uint16x8_t test_vreinterpretq_u16_f32(float32x4_t a) {
21041  return vreinterpretq_u16_f32(a);
21042}
21043
21044// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_f64(<2 x double> %a) #0 {
21045// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16>
21046// CHECK:   ret <8 x i16> [[TMP0]]
21047uint16x8_t test_vreinterpretq_u16_f64(float64x2_t a) {
21048  return vreinterpretq_u16_f64(a);
21049}
21050
21051// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_p8(<16 x i8> %a) #0 {
21052// CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
21053// CHECK:   ret <8 x i16> [[TMP0]]
21054uint16x8_t test_vreinterpretq_u16_p8(poly8x16_t a) {
21055  return vreinterpretq_u16_p8(a);
21056}
21057
21058// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_p16(<8 x i16> %a) #0 {
21059// CHECK:   ret <8 x i16> %a
21060uint16x8_t test_vreinterpretq_u16_p16(poly16x8_t a) {
21061  return vreinterpretq_u16_p16(a);
21062}
21063
21064// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_p64(<2 x i64> %a) #0 {
21065// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
21066// CHECK:   ret <8 x i16> [[TMP0]]
21067uint16x8_t test_vreinterpretq_u16_p64(poly64x2_t a) {
21068  return vreinterpretq_u16_p64(a);
21069}
21070
21071// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s8(<16 x i8> %a) #0 {
21072// CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
21073// CHECK:   ret <4 x i32> [[TMP0]]
21074uint32x4_t test_vreinterpretq_u32_s8(int8x16_t a) {
21075  return vreinterpretq_u32_s8(a);
21076}
21077
21078// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s16(<8 x i16> %a) #0 {
21079// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
21080// CHECK:   ret <4 x i32> [[TMP0]]
21081uint32x4_t test_vreinterpretq_u32_s16(int16x8_t a) {
21082  return vreinterpretq_u32_s16(a);
21083}
21084
21085// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s32(<4 x i32> %a) #0 {
21086// CHECK:   ret <4 x i32> %a
21087uint32x4_t test_vreinterpretq_u32_s32(int32x4_t a) {
21088  return vreinterpretq_u32_s32(a);
21089}
21090
21091// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s64(<2 x i64> %a) #0 {
21092// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
21093// CHECK:   ret <4 x i32> [[TMP0]]
21094uint32x4_t test_vreinterpretq_u32_s64(int64x2_t a) {
21095  return vreinterpretq_u32_s64(a);
21096}
21097
21098// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_u8(<16 x i8> %a) #0 {
21099// CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
21100// CHECK:   ret <4 x i32> [[TMP0]]
21101uint32x4_t test_vreinterpretq_u32_u8(uint8x16_t a) {
21102  return vreinterpretq_u32_u8(a);
21103}
21104
21105// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_u16(<8 x i16> %a) #0 {
21106// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
21107// CHECK:   ret <4 x i32> [[TMP0]]
21108uint32x4_t test_vreinterpretq_u32_u16(uint16x8_t a) {
21109  return vreinterpretq_u32_u16(a);
21110}
21111
21112// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_u64(<2 x i64> %a) #0 {
21113// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
21114// CHECK:   ret <4 x i32> [[TMP0]]
21115uint32x4_t test_vreinterpretq_u32_u64(uint64x2_t a) {
21116  return vreinterpretq_u32_u64(a);
21117}
21118
21119// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_f16(<8 x half> %a) #0 {
21120// CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32>
21121// CHECK:   ret <4 x i32> [[TMP0]]
21122uint32x4_t test_vreinterpretq_u32_f16(float16x8_t a) {
21123  return vreinterpretq_u32_f16(a);
21124}
21125
21126// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_f32(<4 x float> %a) #0 {
21127// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32>
21128// CHECK:   ret <4 x i32> [[TMP0]]
21129uint32x4_t test_vreinterpretq_u32_f32(float32x4_t a) {
21130  return vreinterpretq_u32_f32(a);
21131}
21132
21133// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_f64(<2 x double> %a) #0 {
21134// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x i32>
21135// CHECK:   ret <4 x i32> [[TMP0]]
21136uint32x4_t test_vreinterpretq_u32_f64(float64x2_t a) {
21137  return vreinterpretq_u32_f64(a);
21138}
21139
21140// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_p8(<16 x i8> %a) #0 {
21141// CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
21142// CHECK:   ret <4 x i32> [[TMP0]]
21143uint32x4_t test_vreinterpretq_u32_p8(poly8x16_t a) {
21144  return vreinterpretq_u32_p8(a);
21145}
21146
21147// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_p16(<8 x i16> %a) #0 {
21148// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
21149// CHECK:   ret <4 x i32> [[TMP0]]
21150uint32x4_t test_vreinterpretq_u32_p16(poly16x8_t a) {
21151  return vreinterpretq_u32_p16(a);
21152}
21153
21154// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_p64(<2 x i64> %a) #0 {
21155// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
21156// CHECK:   ret <4 x i32> [[TMP0]]
21157uint32x4_t test_vreinterpretq_u32_p64(poly64x2_t a) {
21158  return vreinterpretq_u32_p64(a);
21159}
21160
21161// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s8(<16 x i8> %a) #0 {
21162// CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
21163// CHECK:   ret <2 x i64> [[TMP0]]
21164uint64x2_t test_vreinterpretq_u64_s8(int8x16_t a) {
21165  return vreinterpretq_u64_s8(a);
21166}
21167
21168// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s16(<8 x i16> %a) #0 {
21169// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
21170// CHECK:   ret <2 x i64> [[TMP0]]
21171uint64x2_t test_vreinterpretq_u64_s16(int16x8_t a) {
21172  return vreinterpretq_u64_s16(a);
21173}
21174
21175// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s32(<4 x i32> %a) #0 {
21176// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
21177// CHECK:   ret <2 x i64> [[TMP0]]
21178uint64x2_t test_vreinterpretq_u64_s32(int32x4_t a) {
21179  return vreinterpretq_u64_s32(a);
21180}
21181
21182// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s64(<2 x i64> %a) #0 {
21183// CHECK:   ret <2 x i64> %a
21184uint64x2_t test_vreinterpretq_u64_s64(int64x2_t a) {
21185  return vreinterpretq_u64_s64(a);
21186}
21187
21188// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_u8(<16 x i8> %a) #0 {
21189// CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
21190// CHECK:   ret <2 x i64> [[TMP0]]
21191uint64x2_t test_vreinterpretq_u64_u8(uint8x16_t a) {
21192  return vreinterpretq_u64_u8(a);
21193}
21194
21195// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_u16(<8 x i16> %a) #0 {
21196// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
21197// CHECK:   ret <2 x i64> [[TMP0]]
21198uint64x2_t test_vreinterpretq_u64_u16(uint16x8_t a) {
21199  return vreinterpretq_u64_u16(a);
21200}
21201
21202// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_u32(<4 x i32> %a) #0 {
21203// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
21204// CHECK:   ret <2 x i64> [[TMP0]]
21205uint64x2_t test_vreinterpretq_u64_u32(uint32x4_t a) {
21206  return vreinterpretq_u64_u32(a);
21207}
21208
21209// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_f16(<8 x half> %a) #0 {
21210// CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
21211// CHECK:   ret <2 x i64> [[TMP0]]
21212uint64x2_t test_vreinterpretq_u64_f16(float16x8_t a) {
21213  return vreinterpretq_u64_f16(a);
21214}
21215
21216// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_f32(<4 x float> %a) #0 {
21217// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
21218// CHECK:   ret <2 x i64> [[TMP0]]
21219uint64x2_t test_vreinterpretq_u64_f32(float32x4_t a) {
21220  return vreinterpretq_u64_f32(a);
21221}
21222
21223// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_f64(<2 x double> %a) #0 {
21224// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64>
21225// CHECK:   ret <2 x i64> [[TMP0]]
21226uint64x2_t test_vreinterpretq_u64_f64(float64x2_t a) {
21227  return vreinterpretq_u64_f64(a);
21228}
21229
21230// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_p8(<16 x i8> %a) #0 {
21231// CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
21232// CHECK:   ret <2 x i64> [[TMP0]]
21233uint64x2_t test_vreinterpretq_u64_p8(poly8x16_t a) {
21234  return vreinterpretq_u64_p8(a);
21235}
21236
21237// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_p16(<8 x i16> %a) #0 {
21238// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
21239// CHECK:   ret <2 x i64> [[TMP0]]
21240uint64x2_t test_vreinterpretq_u64_p16(poly16x8_t a) {
21241  return vreinterpretq_u64_p16(a);
21242}
21243
21244// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_p64(<2 x i64> %a) #0 {
21245// CHECK:   ret <2 x i64> %a
21246uint64x2_t test_vreinterpretq_u64_p64(poly64x2_t a) {
21247  return vreinterpretq_u64_p64(a);
21248}
21249
21250// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s8(<16 x i8> %a) #0 {
21251// CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
21252// CHECK:   ret <8 x half> [[TMP0]]
21253float16x8_t test_vreinterpretq_f16_s8(int8x16_t a) {
21254  return vreinterpretq_f16_s8(a);
21255}
21256
21257// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s16(<8 x i16> %a) #0 {
21258// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
21259// CHECK:   ret <8 x half> [[TMP0]]
21260float16x8_t test_vreinterpretq_f16_s16(int16x8_t a) {
21261  return vreinterpretq_f16_s16(a);
21262}
21263
21264// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s32(<4 x i32> %a) #0 {
21265// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half>
21266// CHECK:   ret <8 x half> [[TMP0]]
21267float16x8_t test_vreinterpretq_f16_s32(int32x4_t a) {
21268  return vreinterpretq_f16_s32(a);
21269}
21270
21271// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s64(<2 x i64> %a) #0 {
21272// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
21273// CHECK:   ret <8 x half> [[TMP0]]
21274float16x8_t test_vreinterpretq_f16_s64(int64x2_t a) {
21275  return vreinterpretq_f16_s64(a);
21276}
21277
21278// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u8(<16 x i8> %a) #0 {
21279// CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
21280// CHECK:   ret <8 x half> [[TMP0]]
21281float16x8_t test_vreinterpretq_f16_u8(uint8x16_t a) {
21282  return vreinterpretq_f16_u8(a);
21283}
21284
21285// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u16(<8 x i16> %a) #0 {
21286// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
21287// CHECK:   ret <8 x half> [[TMP0]]
21288float16x8_t test_vreinterpretq_f16_u16(uint16x8_t a) {
21289  return vreinterpretq_f16_u16(a);
21290}
21291
21292// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u32(<4 x i32> %a) #0 {
21293// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half>
21294// CHECK:   ret <8 x half> [[TMP0]]
21295float16x8_t test_vreinterpretq_f16_u32(uint32x4_t a) {
21296  return vreinterpretq_f16_u32(a);
21297}
21298
21299// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u64(<2 x i64> %a) #0 {
21300// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
21301// CHECK:   ret <8 x half> [[TMP0]]
21302float16x8_t test_vreinterpretq_f16_u64(uint64x2_t a) {
21303  return vreinterpretq_f16_u64(a);
21304}
21305
21306// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_f32(<4 x float> %a) #0 {
21307// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x half>
21308// CHECK:   ret <8 x half> [[TMP0]]
21309float16x8_t test_vreinterpretq_f16_f32(float32x4_t a) {
21310  return vreinterpretq_f16_f32(a);
21311}
21312
21313// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_f64(<2 x double> %a) #0 {
21314// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x half>
21315// CHECK:   ret <8 x half> [[TMP0]]
21316float16x8_t test_vreinterpretq_f16_f64(float64x2_t a) {
21317  return vreinterpretq_f16_f64(a);
21318}
21319
21320// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_p8(<16 x i8> %a) #0 {
21321// CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
21322// CHECK:   ret <8 x half> [[TMP0]]
21323float16x8_t test_vreinterpretq_f16_p8(poly8x16_t a) {
21324  return vreinterpretq_f16_p8(a);
21325}
21326
21327// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_p16(<8 x i16> %a) #0 {
21328// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
21329// CHECK:   ret <8 x half> [[TMP0]]
21330float16x8_t test_vreinterpretq_f16_p16(poly16x8_t a) {
21331  return vreinterpretq_f16_p16(a);
21332}
21333
21334// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_p64(<2 x i64> %a) #0 {
21335// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
21336// CHECK:   ret <8 x half> [[TMP0]]
21337float16x8_t test_vreinterpretq_f16_p64(poly64x2_t a) {
21338  return vreinterpretq_f16_p64(a);
21339}
21340
21341// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s8(<16 x i8> %a) #0 {
21342// CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
21343// CHECK:   ret <4 x float> [[TMP0]]
21344float32x4_t test_vreinterpretq_f32_s8(int8x16_t a) {
21345  return vreinterpretq_f32_s8(a);
21346}
21347
21348// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s16(<8 x i16> %a) #0 {
21349// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
21350// CHECK:   ret <4 x float> [[TMP0]]
21351float32x4_t test_vreinterpretq_f32_s16(int16x8_t a) {
21352  return vreinterpretq_f32_s16(a);
21353}
21354
21355// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s32(<4 x i32> %a) #0 {
21356// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float>
21357// CHECK:   ret <4 x float> [[TMP0]]
21358float32x4_t test_vreinterpretq_f32_s32(int32x4_t a) {
21359  return vreinterpretq_f32_s32(a);
21360}
21361
21362// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s64(<2 x i64> %a) #0 {
21363// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
21364// CHECK:   ret <4 x float> [[TMP0]]
21365float32x4_t test_vreinterpretq_f32_s64(int64x2_t a) {
21366  return vreinterpretq_f32_s64(a);
21367}
21368
21369// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u8(<16 x i8> %a) #0 {
21370// CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
21371// CHECK:   ret <4 x float> [[TMP0]]
21372float32x4_t test_vreinterpretq_f32_u8(uint8x16_t a) {
21373  return vreinterpretq_f32_u8(a);
21374}
21375
21376// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u16(<8 x i16> %a) #0 {
21377// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
21378// CHECK:   ret <4 x float> [[TMP0]]
21379float32x4_t test_vreinterpretq_f32_u16(uint16x8_t a) {
21380  return vreinterpretq_f32_u16(a);
21381}
21382
21383// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u32(<4 x i32> %a) #0 {
21384// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float>
21385// CHECK:   ret <4 x float> [[TMP0]]
21386float32x4_t test_vreinterpretq_f32_u32(uint32x4_t a) {
21387  return vreinterpretq_f32_u32(a);
21388}
21389
21390// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u64(<2 x i64> %a) #0 {
21391// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
21392// CHECK:   ret <4 x float> [[TMP0]]
21393float32x4_t test_vreinterpretq_f32_u64(uint64x2_t a) {
21394  return vreinterpretq_f32_u64(a);
21395}
21396
21397// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_f16(<8 x half> %a) #0 {
21398// CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x float>
21399// CHECK:   ret <4 x float> [[TMP0]]
21400float32x4_t test_vreinterpretq_f32_f16(float16x8_t a) {
21401  return vreinterpretq_f32_f16(a);
21402}
21403
21404// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_f64(<2 x double> %a) #0 {
21405// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x float>
21406// CHECK:   ret <4 x float> [[TMP0]]
21407float32x4_t test_vreinterpretq_f32_f64(float64x2_t a) {
21408  return vreinterpretq_f32_f64(a);
21409}
21410
21411// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_p8(<16 x i8> %a) #0 {
21412// CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
21413// CHECK:   ret <4 x float> [[TMP0]]
21414float32x4_t test_vreinterpretq_f32_p8(poly8x16_t a) {
21415  return vreinterpretq_f32_p8(a);
21416}
21417
21418// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_p16(<8 x i16> %a) #0 {
21419// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
21420// CHECK:   ret <4 x float> [[TMP0]]
21421float32x4_t test_vreinterpretq_f32_p16(poly16x8_t a) {
21422  return vreinterpretq_f32_p16(a);
21423}
21424
21425// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_p64(<2 x i64> %a) #0 {
21426// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
21427// CHECK:   ret <4 x float> [[TMP0]]
21428float32x4_t test_vreinterpretq_f32_p64(poly64x2_t a) {
21429  return vreinterpretq_f32_p64(a);
21430}
21431
21432// CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_s8(<16 x i8> %a) #0 {
21433// CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double>
21434// CHECK:   ret <2 x double> [[TMP0]]
21435float64x2_t test_vreinterpretq_f64_s8(int8x16_t a) {
21436  return vreinterpretq_f64_s8(a);
21437}
21438
21439// CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_s16(<8 x i16> %a) #0 {
21440// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double>
21441// CHECK:   ret <2 x double> [[TMP0]]
21442float64x2_t test_vreinterpretq_f64_s16(int16x8_t a) {
21443  return vreinterpretq_f64_s16(a);
21444}
21445
21446// CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_s32(<4 x i32> %a) #0 {
21447// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x double>
21448// CHECK:   ret <2 x double> [[TMP0]]
21449float64x2_t test_vreinterpretq_f64_s32(int32x4_t a) {
21450  return vreinterpretq_f64_s32(a);
21451}
21452
21453// CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_s64(<2 x i64> %a) #0 {
21454// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double>
21455// CHECK:   ret <2 x double> [[TMP0]]
21456float64x2_t test_vreinterpretq_f64_s64(int64x2_t a) {
21457  return vreinterpretq_f64_s64(a);
21458}
21459
21460// CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_u8(<16 x i8> %a) #0 {
21461// CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double>
21462// CHECK:   ret <2 x double> [[TMP0]]
21463float64x2_t test_vreinterpretq_f64_u8(uint8x16_t a) {
21464  return vreinterpretq_f64_u8(a);
21465}
21466
21467// CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_u16(<8 x i16> %a) #0 {
21468// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double>
21469// CHECK:   ret <2 x double> [[TMP0]]
21470float64x2_t test_vreinterpretq_f64_u16(uint16x8_t a) {
21471  return vreinterpretq_f64_u16(a);
21472}
21473
21474// CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_u32(<4 x i32> %a) #0 {
21475// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x double>
21476// CHECK:   ret <2 x double> [[TMP0]]
21477float64x2_t test_vreinterpretq_f64_u32(uint32x4_t a) {
21478  return vreinterpretq_f64_u32(a);
21479}
21480
21481// CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_u64(<2 x i64> %a) #0 {
21482// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double>
21483// CHECK:   ret <2 x double> [[TMP0]]
21484float64x2_t test_vreinterpretq_f64_u64(uint64x2_t a) {
21485  return vreinterpretq_f64_u64(a);
21486}
21487
21488// CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_f16(<8 x half> %a) #0 {
21489// CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x double>
21490// CHECK:   ret <2 x double> [[TMP0]]
21491float64x2_t test_vreinterpretq_f64_f16(float16x8_t a) {
21492  return vreinterpretq_f64_f16(a);
21493}
21494
21495// CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_f32(<4 x float> %a) #0 {
21496// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x double>
21497// CHECK:   ret <2 x double> [[TMP0]]
21498float64x2_t test_vreinterpretq_f64_f32(float32x4_t a) {
21499  return vreinterpretq_f64_f32(a);
21500}
21501
21502// CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_p8(<16 x i8> %a) #0 {
21503// CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double>
21504// CHECK:   ret <2 x double> [[TMP0]]
21505float64x2_t test_vreinterpretq_f64_p8(poly8x16_t a) {
21506  return vreinterpretq_f64_p8(a);
21507}
21508
21509// CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_p16(<8 x i16> %a) #0 {
21510// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double>
21511// CHECK:   ret <2 x double> [[TMP0]]
21512float64x2_t test_vreinterpretq_f64_p16(poly16x8_t a) {
21513  return vreinterpretq_f64_p16(a);
21514}
21515
21516// CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_p64(<2 x i64> %a) #0 {
21517// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double>
21518// CHECK:   ret <2 x double> [[TMP0]]
21519float64x2_t test_vreinterpretq_f64_p64(poly64x2_t a) {
21520  return vreinterpretq_f64_p64(a);
21521}
21522
21523// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s8(<16 x i8> %a) #0 {
21524// CHECK:   ret <16 x i8> %a
21525poly8x16_t test_vreinterpretq_p8_s8(int8x16_t a) {
21526  return vreinterpretq_p8_s8(a);
21527}
21528
21529// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s16(<8 x i16> %a) #0 {
21530// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
21531// CHECK:   ret <16 x i8> [[TMP0]]
21532poly8x16_t test_vreinterpretq_p8_s16(int16x8_t a) {
21533  return vreinterpretq_p8_s16(a);
21534}
21535
21536// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s32(<4 x i32> %a) #0 {
21537// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
21538// CHECK:   ret <16 x i8> [[TMP0]]
21539poly8x16_t test_vreinterpretq_p8_s32(int32x4_t a) {
21540  return vreinterpretq_p8_s32(a);
21541}
21542
21543// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s64(<2 x i64> %a) #0 {
21544// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
21545// CHECK:   ret <16 x i8> [[TMP0]]
21546poly8x16_t test_vreinterpretq_p8_s64(int64x2_t a) {
21547  return vreinterpretq_p8_s64(a);
21548}
21549
21550// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u8(<16 x i8> %a) #0 {
21551// CHECK:   ret <16 x i8> %a
21552poly8x16_t test_vreinterpretq_p8_u8(uint8x16_t a) {
21553  return vreinterpretq_p8_u8(a);
21554}
21555
21556// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u16(<8 x i16> %a) #0 {
21557// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
21558// CHECK:   ret <16 x i8> [[TMP0]]
21559poly8x16_t test_vreinterpretq_p8_u16(uint16x8_t a) {
21560  return vreinterpretq_p8_u16(a);
21561}
21562
21563// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u32(<4 x i32> %a) #0 {
21564// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
21565// CHECK:   ret <16 x i8> [[TMP0]]
21566poly8x16_t test_vreinterpretq_p8_u32(uint32x4_t a) {
21567  return vreinterpretq_p8_u32(a);
21568}
21569
21570// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u64(<2 x i64> %a) #0 {
21571// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
21572// CHECK:   ret <16 x i8> [[TMP0]]
21573poly8x16_t test_vreinterpretq_p8_u64(uint64x2_t a) {
21574  return vreinterpretq_p8_u64(a);
21575}
21576
21577// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_f16(<8 x half> %a) #0 {
21578// CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
21579// CHECK:   ret <16 x i8> [[TMP0]]
21580poly8x16_t test_vreinterpretq_p8_f16(float16x8_t a) {
21581  return vreinterpretq_p8_f16(a);
21582}
21583
21584// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_f32(<4 x float> %a) #0 {
21585// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
21586// CHECK:   ret <16 x i8> [[TMP0]]
21587poly8x16_t test_vreinterpretq_p8_f32(float32x4_t a) {
21588  return vreinterpretq_p8_f32(a);
21589}
21590
21591// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_f64(<2 x double> %a) #0 {
21592// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
21593// CHECK:   ret <16 x i8> [[TMP0]]
21594poly8x16_t test_vreinterpretq_p8_f64(float64x2_t a) {
21595  return vreinterpretq_p8_f64(a);
21596}
21597
21598// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_p16(<8 x i16> %a) #0 {
21599// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
21600// CHECK:   ret <16 x i8> [[TMP0]]
21601poly8x16_t test_vreinterpretq_p8_p16(poly16x8_t a) {
21602  return vreinterpretq_p8_p16(a);
21603}
21604
21605// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_p64(<2 x i64> %a) #0 {
21606// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
21607// CHECK:   ret <16 x i8> [[TMP0]]
21608poly8x16_t test_vreinterpretq_p8_p64(poly64x2_t a) {
21609  return vreinterpretq_p8_p64(a);
21610}
21611
21612// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s8(<16 x i8> %a) #0 {
21613// CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
21614// CHECK:   ret <8 x i16> [[TMP0]]
21615poly16x8_t test_vreinterpretq_p16_s8(int8x16_t a) {
21616  return vreinterpretq_p16_s8(a);
21617}
21618
21619// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s16(<8 x i16> %a) #0 {
21620// CHECK:   ret <8 x i16> %a
21621poly16x8_t test_vreinterpretq_p16_s16(int16x8_t a) {
21622  return vreinterpretq_p16_s16(a);
21623}
21624
21625// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s32(<4 x i32> %a) #0 {
21626// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
21627// CHECK:   ret <8 x i16> [[TMP0]]
21628poly16x8_t test_vreinterpretq_p16_s32(int32x4_t a) {
21629  return vreinterpretq_p16_s32(a);
21630}
21631
21632// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s64(<2 x i64> %a) #0 {
21633// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
21634// CHECK:   ret <8 x i16> [[TMP0]]
21635poly16x8_t test_vreinterpretq_p16_s64(int64x2_t a) {
21636  return vreinterpretq_p16_s64(a);
21637}
21638
21639// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u8(<16 x i8> %a) #0 {
21640// CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
21641// CHECK:   ret <8 x i16> [[TMP0]]
21642poly16x8_t test_vreinterpretq_p16_u8(uint8x16_t a) {
21643  return vreinterpretq_p16_u8(a);
21644}
21645
21646// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u16(<8 x i16> %a) #0 {
21647// CHECK:   ret <8 x i16> %a
21648poly16x8_t test_vreinterpretq_p16_u16(uint16x8_t a) {
21649  return vreinterpretq_p16_u16(a);
21650}
21651
21652// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u32(<4 x i32> %a) #0 {
21653// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
21654// CHECK:   ret <8 x i16> [[TMP0]]
21655poly16x8_t test_vreinterpretq_p16_u32(uint32x4_t a) {
21656  return vreinterpretq_p16_u32(a);
21657}
21658
21659// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u64(<2 x i64> %a) #0 {
21660// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
21661// CHECK:   ret <8 x i16> [[TMP0]]
21662poly16x8_t test_vreinterpretq_p16_u64(uint64x2_t a) {
21663  return vreinterpretq_p16_u64(a);
21664}
21665
21666// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_f16(<8 x half> %a) #0 {
21667// CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
21668// CHECK:   ret <8 x i16> [[TMP0]]
21669poly16x8_t test_vreinterpretq_p16_f16(float16x8_t a) {
21670  return vreinterpretq_p16_f16(a);
21671}
21672
21673// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_f32(<4 x float> %a) #0 {
21674// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
21675// CHECK:   ret <8 x i16> [[TMP0]]
21676poly16x8_t test_vreinterpretq_p16_f32(float32x4_t a) {
21677  return vreinterpretq_p16_f32(a);
21678}
21679
21680// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_f64(<2 x double> %a) #0 {
21681// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16>
21682// CHECK:   ret <8 x i16> [[TMP0]]
21683poly16x8_t test_vreinterpretq_p16_f64(float64x2_t a) {
21684  return vreinterpretq_p16_f64(a);
21685}
21686
21687// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_p8(<16 x i8> %a) #0 {
21688// CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
21689// CHECK:   ret <8 x i16> [[TMP0]]
21690poly16x8_t test_vreinterpretq_p16_p8(poly8x16_t a) {
21691  return vreinterpretq_p16_p8(a);
21692}
21693
21694// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_p64(<2 x i64> %a) #0 {
21695// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
21696// CHECK:   ret <8 x i16> [[TMP0]]
21697poly16x8_t test_vreinterpretq_p16_p64(poly64x2_t a) {
21698  return vreinterpretq_p16_p64(a);
21699}
21700
21701// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_s8(<16 x i8> %a) #0 {
21702// CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
21703// CHECK:   ret <2 x i64> [[TMP0]]
21704poly64x2_t test_vreinterpretq_p64_s8(int8x16_t a) {
21705  return vreinterpretq_p64_s8(a);
21706}
21707
21708// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_s16(<8 x i16> %a) #0 {
21709// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
21710// CHECK:   ret <2 x i64> [[TMP0]]
21711poly64x2_t test_vreinterpretq_p64_s16(int16x8_t a) {
21712  return vreinterpretq_p64_s16(a);
21713}
21714
21715// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_s32(<4 x i32> %a) #0 {
21716// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
21717// CHECK:   ret <2 x i64> [[TMP0]]
21718poly64x2_t test_vreinterpretq_p64_s32(int32x4_t a) {
21719  return vreinterpretq_p64_s32(a);
21720}
21721
21722// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_s64(<2 x i64> %a) #0 {
21723// CHECK:   ret <2 x i64> %a
21724poly64x2_t test_vreinterpretq_p64_s64(int64x2_t a) {
21725  return vreinterpretq_p64_s64(a);
21726}
21727
21728// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_u8(<16 x i8> %a) #0 {
21729// CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
21730// CHECK:   ret <2 x i64> [[TMP0]]
21731poly64x2_t test_vreinterpretq_p64_u8(uint8x16_t a) {
21732  return vreinterpretq_p64_u8(a);
21733}
21734
21735// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_u16(<8 x i16> %a) #0 {
21736// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
21737// CHECK:   ret <2 x i64> [[TMP0]]
21738poly64x2_t test_vreinterpretq_p64_u16(uint16x8_t a) {
21739  return vreinterpretq_p64_u16(a);
21740}
21741
21742// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_u32(<4 x i32> %a) #0 {
21743// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
21744// CHECK:   ret <2 x i64> [[TMP0]]
21745poly64x2_t test_vreinterpretq_p64_u32(uint32x4_t a) {
21746  return vreinterpretq_p64_u32(a);
21747}
21748
21749// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_u64(<2 x i64> %a) #0 {
21750// CHECK:   ret <2 x i64> %a
21751poly64x2_t test_vreinterpretq_p64_u64(uint64x2_t a) {
21752  return vreinterpretq_p64_u64(a);
21753}
21754
21755// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_f16(<8 x half> %a) #0 {
21756// CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
21757// CHECK:   ret <2 x i64> [[TMP0]]
21758poly64x2_t test_vreinterpretq_p64_f16(float16x8_t a) {
21759  return vreinterpretq_p64_f16(a);
21760}
21761
21762// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_f32(<4 x float> %a) #0 {
21763// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
21764// CHECK:   ret <2 x i64> [[TMP0]]
21765poly64x2_t test_vreinterpretq_p64_f32(float32x4_t a) {
21766  return vreinterpretq_p64_f32(a);
21767}
21768
21769// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_f64(<2 x double> %a) #0 {
21770// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64>
21771// CHECK:   ret <2 x i64> [[TMP0]]
21772poly64x2_t test_vreinterpretq_p64_f64(float64x2_t a) {
21773  return vreinterpretq_p64_f64(a);
21774}
21775
21776// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_p8(<16 x i8> %a) #0 {
21777// CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
21778// CHECK:   ret <2 x i64> [[TMP0]]
21779poly64x2_t test_vreinterpretq_p64_p8(poly8x16_t a) {
21780  return vreinterpretq_p64_p8(a);
21781}
21782
21783// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_p16(<8 x i16> %a) #0 {
21784// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
21785// CHECK:   ret <2 x i64> [[TMP0]]
21786poly64x2_t test_vreinterpretq_p64_p16(poly16x8_t a) {
21787  return vreinterpretq_p64_p16(a);
21788}
21789
21790// CHECK-LABEL: define float @test_vabds_f32(float %a, float %b) #0 {
21791// CHECK:   [[VABDS_F32_I:%.*]] = call float @llvm.aarch64.sisd.fabd.f32(float %a, float %b) #4
21792// CHECK:   ret float [[VABDS_F32_I]]
21793float32_t test_vabds_f32(float32_t a, float32_t b) {
21794  return vabds_f32(a, b);
21795}
21796
21797// CHECK-LABEL: define double @test_vabdd_f64(double %a, double %b) #0 {
21798// CHECK:   [[VABDD_F64_I:%.*]] = call double @llvm.aarch64.sisd.fabd.f64(double %a, double %b) #4
21799// CHECK:   ret double [[VABDD_F64_I]]
21800float64_t test_vabdd_f64(float64_t a, float64_t b) {
21801  return vabdd_f64(a, b);
21802}
21803
21804// CHECK-LABEL: define <1 x i64> @test_vuqadd_s64(<1 x i64> %a, <1 x i64> %b) #0 {
21805// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
21806// CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
21807// CHECK:   [[VUQADD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
21808// CHECK:   [[VUQADD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
21809// CHECK:   [[VUQADD2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64> [[VUQADD_I]], <1 x i64> [[VUQADD1_I]]) #4
21810// CHECK:   ret <1 x i64> [[VUQADD2_I]]
21811int64x1_t test_vuqadd_s64(int64x1_t a, uint64x1_t b) {
21812  return vuqadd_s64(a, b);
21813}
21814
21815// CHECK-LABEL: define <1 x i64> @test_vsqadd_u64(<1 x i64> %a, <1 x i64> %b) #0 {
21816// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
21817// CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
21818// CHECK:   [[VSQADD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
21819// CHECK:   [[VSQADD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
21820// CHECK:   [[VSQADD2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64> [[VSQADD_I]], <1 x i64> [[VSQADD1_I]]) #4
21821// CHECK:   ret <1 x i64> [[VSQADD2_I]]
21822uint64x1_t test_vsqadd_u64(uint64x1_t a, int64x1_t b) {
21823  return vsqadd_u64(a, b);
21824}
21825
21826// CHECK-LABEL: define <8 x i8> @test_vsqadd_u8(<8 x i8> %a, <8 x i8> %b) #0 {
21827// CHECK:   [[VSQADD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4
21828// CHECK:   ret <8 x i8> [[VSQADD_I]]
21829uint8x8_t test_vsqadd_u8(uint8x8_t a, int8x8_t b) {
21830  return vsqadd_u8(a, b);
21831}
21832
21833// CHECK-LABEL: define <16 x i8> @test_vsqaddq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
21834// CHECK:   [[VSQADD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.usqadd.v16i8(<16 x i8> %a, <16 x i8> %b) #4
21835// CHECK:   ret <16 x i8> [[VSQADD_I]]
21836uint8x16_t test_vsqaddq_u8(uint8x16_t a, int8x16_t b) {
21837  return vsqaddq_u8(a, b);
21838}
21839
21840// CHECK-LABEL: define <4 x i16> @test_vsqadd_u16(<4 x i16> %a, <4 x i16> %b) #0 {
21841// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
21842// CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
21843// CHECK:   [[VSQADD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
21844// CHECK:   [[VSQADD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
21845// CHECK:   [[VSQADD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> [[VSQADD_I]], <4 x i16> [[VSQADD1_I]]) #4
21846// CHECK:   ret <4 x i16> [[VSQADD2_I]]
21847uint16x4_t test_vsqadd_u16(uint16x4_t a, int16x4_t b) {
21848  return vsqadd_u16(a, b);
21849}
21850
21851// CHECK-LABEL: define <8 x i16> @test_vsqaddq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
21852// CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
21853// CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
21854// CHECK:   [[VSQADD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
21855// CHECK:   [[VSQADD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
21856// CHECK:   [[VSQADD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.usqadd.v8i16(<8 x i16> [[VSQADD_I]], <8 x i16> [[VSQADD1_I]]) #4
21857// CHECK:   ret <8 x i16> [[VSQADD2_I]]
21858uint16x8_t test_vsqaddq_u16(uint16x8_t a, int16x8_t b) {
21859  return vsqaddq_u16(a, b);
21860}
21861
21862// CHECK-LABEL: define <2 x i32> @test_vsqadd_u32(<2 x i32> %a, <2 x i32> %b) #0 {
21863// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
21864// CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
21865// CHECK:   [[VSQADD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
21866// CHECK:   [[VSQADD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
21867// CHECK:   [[VSQADD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.usqadd.v2i32(<2 x i32> [[VSQADD_I]], <2 x i32> [[VSQADD1_I]]) #4
21868// CHECK:   ret <2 x i32> [[VSQADD2_I]]
21869uint32x2_t test_vsqadd_u32(uint32x2_t a, int32x2_t b) {
21870  return vsqadd_u32(a, b);
21871}
21872
21873// CHECK-LABEL: define <4 x i32> @test_vsqaddq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
21874// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
21875// CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
21876// CHECK:   [[VSQADD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
21877// CHECK:   [[VSQADD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
21878// CHECK:   [[VSQADD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.usqadd.v4i32(<4 x i32> [[VSQADD_I]], <4 x i32> [[VSQADD1_I]]) #4
21879// CHECK:   ret <4 x i32> [[VSQADD2_I]]
21880uint32x4_t test_vsqaddq_u32(uint32x4_t a, int32x4_t b) {
21881  return vsqaddq_u32(a, b);
21882}
21883
21884// CHECK-LABEL: define <2 x i64> @test_vsqaddq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
21885// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
21886// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
21887// CHECK:   [[VSQADD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
21888// CHECK:   [[VSQADD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
21889// CHECK:   [[VSQADD2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.usqadd.v2i64(<2 x i64> [[VSQADD_I]], <2 x i64> [[VSQADD1_I]]) #4
21890// CHECK:   ret <2 x i64> [[VSQADD2_I]]
21891uint64x2_t test_vsqaddq_u64(uint64x2_t a, int64x2_t b) {
21892  return vsqaddq_u64(a, b);
21893}
21894
21895// CHECK-LABEL: define <1 x i64> @test_vabs_s64(<1 x i64> %a) #0 {
21896// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
21897// CHECK:   [[VABS_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
21898// CHECK:   [[VABS1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.abs.v1i64(<1 x i64> [[VABS_I]]) #4
21899// CHECK:   ret <1 x i64> [[VABS1_I]]
21900int64x1_t test_vabs_s64(int64x1_t a) {
21901  return vabs_s64(a);
21902}
21903
21904// CHECK-LABEL: define <1 x i64> @test_vqabs_s64(<1 x i64> %a) #0 {
21905// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
21906// CHECK:   [[VQABS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
21907// CHECK:   [[VQABS_V1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqabs.v1i64(<1 x i64> [[VQABS_V_I]]) #4
21908// CHECK:   [[VQABS_V2_I:%.*]] = bitcast <1 x i64> [[VQABS_V1_I]] to <8 x i8>
21909// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VQABS_V2_I]] to <1 x i64>
21910// CHECK:   ret <1 x i64> [[TMP1]]
21911int64x1_t test_vqabs_s64(int64x1_t a) {
21912  return vqabs_s64(a);
21913}
21914
21915// CHECK-LABEL: define <1 x i64> @test_vqneg_s64(<1 x i64> %a) #0 {
21916// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
21917// CHECK:   [[VQNEG_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
21918// CHECK:   [[VQNEG_V1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqneg.v1i64(<1 x i64> [[VQNEG_V_I]]) #4
21919// CHECK:   [[VQNEG_V2_I:%.*]] = bitcast <1 x i64> [[VQNEG_V1_I]] to <8 x i8>
21920// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VQNEG_V2_I]] to <1 x i64>
21921// CHECK:   ret <1 x i64> [[TMP1]]
21922int64x1_t test_vqneg_s64(int64x1_t a) {
21923  return vqneg_s64(a);
21924}
21925
21926// CHECK-LABEL: define <1 x i64> @test_vneg_s64(<1 x i64> %a) #0 {
21927// CHECK:   [[SUB_I:%.*]] = sub <1 x i64> zeroinitializer, %a
21928// CHECK:   ret <1 x i64> [[SUB_I]]
21929int64x1_t test_vneg_s64(int64x1_t a) {
21930  return vneg_s64(a);
21931}
21932
21933// CHECK-LABEL: define float @test_vaddv_f32(<2 x float> %a) #0 {
21934// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
21935// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
21936// CHECK:   [[VADDV_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v2f32(<2 x float> [[TMP1]]) #4
21937// CHECK:   ret float [[VADDV_F32_I]]
21938float32_t test_vaddv_f32(float32x2_t a) {
21939  return vaddv_f32(a);
21940}
21941
21942// CHECK-LABEL: define float @test_vaddvq_f32(<4 x float> %a) #0 {
21943// CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
21944// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
21945// CHECK:   [[VADDVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v4f32(<4 x float> [[TMP1]]) #4
21946// CHECK:   ret float [[VADDVQ_F32_I]]
21947float32_t test_vaddvq_f32(float32x4_t a) {
21948  return vaddvq_f32(a);
21949}
21950
21951// CHECK-LABEL: define double @test_vaddvq_f64(<2 x double> %a) #0 {
21952// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
21953// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
21954// CHECK:   [[VADDVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.faddv.f64.v2f64(<2 x double> [[TMP1]]) #4
21955// CHECK:   ret double [[VADDVQ_F64_I]]
21956float64_t test_vaddvq_f64(float64x2_t a) {
21957  return vaddvq_f64(a);
21958}
21959
21960// CHECK-LABEL: define float @test_vmaxv_f32(<2 x float> %a) #0 {
21961// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
21962// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
21963// CHECK:   [[VMAXV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> [[TMP1]]) #4
21964// CHECK:   ret float [[VMAXV_F32_I]]
21965float32_t test_vmaxv_f32(float32x2_t a) {
21966  return vmaxv_f32(a);
21967}
21968
21969// CHECK-LABEL: define double @test_vmaxvq_f64(<2 x double> %a) #0 {
21970// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
21971// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
21972// CHECK:   [[VMAXVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> [[TMP1]]) #4
21973// CHECK:   ret double [[VMAXVQ_F64_I]]
21974float64_t test_vmaxvq_f64(float64x2_t a) {
21975  return vmaxvq_f64(a);
21976}
21977
21978// CHECK-LABEL: define float @test_vminv_f32(<2 x float> %a) #0 {
21979// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
21980// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
21981// CHECK:   [[VMINV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> [[TMP1]]) #4
21982// CHECK:   ret float [[VMINV_F32_I]]
21983float32_t test_vminv_f32(float32x2_t a) {
21984  return vminv_f32(a);
21985}
21986
21987// CHECK-LABEL: define double @test_vminvq_f64(<2 x double> %a) #0 {
21988// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
21989// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
21990// CHECK:   [[VMINVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> [[TMP1]]) #4
21991// CHECK:   ret double [[VMINVQ_F64_I]]
21992float64_t test_vminvq_f64(float64x2_t a) {
21993  return vminvq_f64(a);
21994}
21995
21996// CHECK-LABEL: define double @test_vmaxnmvq_f64(<2 x double> %a) #0 {
21997// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
21998// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
21999// CHECK:   [[VMAXNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> [[TMP1]]) #4
22000// CHECK:   ret double [[VMAXNMVQ_F64_I]]
22001float64_t test_vmaxnmvq_f64(float64x2_t a) {
22002  return vmaxnmvq_f64(a);
22003}
22004
22005// CHECK-LABEL: define float @test_vmaxnmv_f32(<2 x float> %a) #0 {
22006// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
22007// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
22008// CHECK:   [[VMAXNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> [[TMP1]]) #4
22009// CHECK:   ret float [[VMAXNMV_F32_I]]
22010float32_t test_vmaxnmv_f32(float32x2_t a) {
22011  return vmaxnmv_f32(a);
22012}
22013
22014// CHECK-LABEL: define double @test_vminnmvq_f64(<2 x double> %a) #0 {
22015// CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
22016// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
22017// CHECK:   [[VMINNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> [[TMP1]]) #4
22018// CHECK:   ret double [[VMINNMVQ_F64_I]]
22019float64_t test_vminnmvq_f64(float64x2_t a) {
22020  return vminnmvq_f64(a);
22021}
22022
22023// CHECK-LABEL: define float @test_vminnmv_f32(<2 x float> %a) #0 {
22024// CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
22025// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
22026// CHECK:   [[VMINNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> [[TMP1]]) #4
22027// CHECK:   ret float [[VMINNMV_F32_I]]
22028float32_t test_vminnmv_f32(float32x2_t a) {
22029  return vminnmv_f32(a);
22030}
22031
22032// CHECK-LABEL: define <2 x i64> @test_vpaddq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
22033// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
22034// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
22035// CHECK:   [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
22036// CHECK:   [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
22037// CHECK:   [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[VPADDQ_V_I]], <2 x i64> [[VPADDQ_V1_I]]) #4
22038// CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8>
22039// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <2 x i64>
22040// CHECK:   ret <2 x i64> [[TMP2]]
22041int64x2_t test_vpaddq_s64(int64x2_t a, int64x2_t b) {
22042  return vpaddq_s64(a, b);
22043}
22044
22045// CHECK-LABEL: define <2 x i64> @test_vpaddq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
22046// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
22047// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
22048// CHECK:   [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
22049// CHECK:   [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
22050// CHECK:   [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[VPADDQ_V_I]], <2 x i64> [[VPADDQ_V1_I]]) #4
22051// CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8>
22052// CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <2 x i64>
22053// CHECK:   ret <2 x i64> [[TMP2]]
22054uint64x2_t test_vpaddq_u64(uint64x2_t a, uint64x2_t b) {
22055  return vpaddq_u64(a, b);
22056}
22057
22058// CHECK-LABEL: define i64 @test_vpaddd_u64(<2 x i64> %a) #0 {
22059// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
22060// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
22061// CHECK:   [[VPADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> [[TMP1]]) #4
22062// CHECK:   ret i64 [[VPADDD_U64_I]]
22063uint64_t test_vpaddd_u64(uint64x2_t a) {
22064  return vpaddd_u64(a);
22065}
22066
22067// CHECK-LABEL: define i64 @test_vaddvq_s64(<2 x i64> %a) #0 {
22068// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
22069// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
22070// CHECK:   [[VADDVQ_S64_I:%.*]] = call i64 @llvm.aarch64.neon.saddv.i64.v2i64(<2 x i64> [[TMP1]]) #4
22071// CHECK:   ret i64 [[VADDVQ_S64_I]]
22072int64_t test_vaddvq_s64(int64x2_t a) {
22073  return vaddvq_s64(a);
22074}
22075
22076// CHECK-LABEL: define i64 @test_vaddvq_u64(<2 x i64> %a) #0 {
22077// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
22078// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
22079// CHECK:   [[VADDVQ_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> [[TMP1]]) #4
22080// CHECK:   ret i64 [[VADDVQ_U64_I]]
22081uint64_t test_vaddvq_u64(uint64x2_t a) {
22082  return vaddvq_u64(a);
22083}
22084
22085// CHECK-LABEL: define <1 x double> @test_vadd_f64(<1 x double> %a, <1 x double> %b) #0 {
22086// CHECK:   [[ADD_I:%.*]] = fadd <1 x double> %a, %b
22087// CHECK:   ret <1 x double> [[ADD_I]]
22088float64x1_t test_vadd_f64(float64x1_t a, float64x1_t b) {
22089  return vadd_f64(a, b);
22090}
22091
22092// CHECK-LABEL: define <1 x double> @test_vmul_f64(<1 x double> %a, <1 x double> %b) #0 {
22093// CHECK:   [[MUL_I:%.*]] = fmul <1 x double> %a, %b
22094// CHECK:   ret <1 x double> [[MUL_I]]
22095float64x1_t test_vmul_f64(float64x1_t a, float64x1_t b) {
22096  return vmul_f64(a, b);
22097}
22098
22099// CHECK-LABEL: define <1 x double> @test_vdiv_f64(<1 x double> %a, <1 x double> %b) #0 {
22100// CHECK:   [[DIV_I:%.*]] = fdiv <1 x double> %a, %b
22101// CHECK:   ret <1 x double> [[DIV_I]]
22102float64x1_t test_vdiv_f64(float64x1_t a, float64x1_t b) {
22103  return vdiv_f64(a, b);
22104}
22105
22106// CHECK-LABEL: define <1 x double> @test_vmla_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) #0 {
22107// CHECK:   [[MUL_I:%.*]] = fmul <1 x double> %b, %c
22108// CHECK:   [[ADD_I:%.*]] = fadd <1 x double> %a, [[MUL_I]]
22109// CHECK:   ret <1 x double> [[ADD_I]]
22110float64x1_t test_vmla_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
22111  return vmla_f64(a, b, c);
22112}
22113
22114// CHECK-LABEL: define <1 x double> @test_vmls_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) #0 {
22115// CHECK:   [[MUL_I:%.*]] = fmul <1 x double> %b, %c
22116// CHECK:   [[SUB_I:%.*]] = fsub <1 x double> %a, [[MUL_I]]
22117// CHECK:   ret <1 x double> [[SUB_I]]
22118float64x1_t test_vmls_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
22119  return vmls_f64(a, b, c);
22120}
22121
22122// CHECK-LABEL: define <1 x double> @test_vfma_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) #0 {
22123// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22124// CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
22125// CHECK:   [[TMP2:%.*]] = bitcast <1 x double> %c to <8 x i8>
22126// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22127// CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
22128// CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
22129// CHECK:   [[TMP6:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[TMP4]], <1 x double> [[TMP5]], <1 x double> [[TMP3]]) #4
22130// CHECK:   ret <1 x double> [[TMP6]]
22131float64x1_t test_vfma_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
22132  return vfma_f64(a, b, c);
22133}
22134
22135// CHECK-LABEL: define <1 x double> @test_vfms_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) #0 {
22136// CHECK:   [[SUB_I:%.*]] = fsub <1 x double> <double -0.000000e+00>, %b
22137// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22138// CHECK:   [[TMP1:%.*]] = bitcast <1 x double> [[SUB_I]] to <8 x i8>
22139// CHECK:   [[TMP2:%.*]] = bitcast <1 x double> %c to <8 x i8>
22140// CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22141// CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
22142// CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
22143// CHECK:   [[TMP6:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[TMP4]], <1 x double> [[TMP5]], <1 x double> [[TMP3]]) #4
22144// CHECK:   ret <1 x double> [[TMP6]]
22145float64x1_t test_vfms_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
22146  return vfms_f64(a, b, c);
22147}
22148
22149// CHECK-LABEL: define <1 x double> @test_vsub_f64(<1 x double> %a, <1 x double> %b) #0 {
22150// CHECK:   [[SUB_I:%.*]] = fsub <1 x double> %a, %b
22151// CHECK:   ret <1 x double> [[SUB_I]]
22152float64x1_t test_vsub_f64(float64x1_t a, float64x1_t b) {
22153  return vsub_f64(a, b);
22154}
22155
22156// CHECK-LABEL: define <1 x double> @test_vabd_f64(<1 x double> %a, <1 x double> %b) #0 {
22157// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22158// CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
22159// CHECK:   [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22160// CHECK:   [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
22161// CHECK:   [[VABD2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fabd.v1f64(<1 x double> [[VABD_I]], <1 x double> [[VABD1_I]]) #4
22162// CHECK:   ret <1 x double> [[VABD2_I]]
22163float64x1_t test_vabd_f64(float64x1_t a, float64x1_t b) {
22164  return vabd_f64(a, b);
22165}
22166
22167// CHECK-LABEL: define <1 x double> @test_vmax_f64(<1 x double> %a, <1 x double> %b) #0 {
22168// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22169// CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
22170// CHECK:   [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22171// CHECK:   [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
22172// CHECK:   [[VMAX2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmax.v1f64(<1 x double> [[VMAX_I]], <1 x double> [[VMAX1_I]]) #4
22173// CHECK:   ret <1 x double> [[VMAX2_I]]
22174float64x1_t test_vmax_f64(float64x1_t a, float64x1_t b) {
22175  return vmax_f64(a, b);
22176}
22177
22178// CHECK-LABEL: define <1 x double> @test_vmin_f64(<1 x double> %a, <1 x double> %b) #0 {
22179// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22180// CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
22181// CHECK:   [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22182// CHECK:   [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
22183// CHECK:   [[VMIN2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmin.v1f64(<1 x double> [[VMIN_I]], <1 x double> [[VMIN1_I]]) #4
22184// CHECK:   ret <1 x double> [[VMIN2_I]]
22185float64x1_t test_vmin_f64(float64x1_t a, float64x1_t b) {
22186  return vmin_f64(a, b);
22187}
22188
22189// CHECK-LABEL: define <1 x double> @test_vmaxnm_f64(<1 x double> %a, <1 x double> %b) #0 {
22190// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22191// CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
22192// CHECK:   [[VMAXNM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22193// CHECK:   [[VMAXNM1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
22194// CHECK:   [[VMAXNM2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmaxnm.v1f64(<1 x double> [[VMAXNM_I]], <1 x double> [[VMAXNM1_I]]) #4
22195// CHECK:   ret <1 x double> [[VMAXNM2_I]]
22196float64x1_t test_vmaxnm_f64(float64x1_t a, float64x1_t b) {
22197  return vmaxnm_f64(a, b);
22198}
22199
22200// CHECK-LABEL: define <1 x double> @test_vminnm_f64(<1 x double> %a, <1 x double> %b) #0 {
22201// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22202// CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
22203// CHECK:   [[VMINNM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22204// CHECK:   [[VMINNM1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
22205// CHECK:   [[VMINNM2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fminnm.v1f64(<1 x double> [[VMINNM_I]], <1 x double> [[VMINNM1_I]]) #4
22206// CHECK:   ret <1 x double> [[VMINNM2_I]]
22207float64x1_t test_vminnm_f64(float64x1_t a, float64x1_t b) {
22208  return vminnm_f64(a, b);
22209}
22210
22211// CHECK-LABEL: define <1 x double> @test_vabs_f64(<1 x double> %a) #0 {
22212// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22213// CHECK:   [[VABS_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22214// CHECK:   [[VABS1_I:%.*]] = call <1 x double> @llvm.fabs.v1f64(<1 x double> [[VABS_I]]) #4
22215// CHECK:   ret <1 x double> [[VABS1_I]]
22216float64x1_t test_vabs_f64(float64x1_t a) {
22217  return vabs_f64(a);
22218}
22219
22220// CHECK-LABEL: define <1 x double> @test_vneg_f64(<1 x double> %a) #0 {
22221// CHECK:   [[SUB_I:%.*]] = fsub <1 x double> <double -0.000000e+00>, %a
22222// CHECK:   ret <1 x double> [[SUB_I]]
22223float64x1_t test_vneg_f64(float64x1_t a) {
22224  return vneg_f64(a);
22225}
22226
22227// CHECK-LABEL: define <1 x i64> @test_vcvt_s64_f64(<1 x double> %a) #0 {
22228// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22229// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22230// CHECK:   [[TMP2:%.*]] = fptosi <1 x double> [[TMP1]] to <1 x i64>
22231// CHECK:   ret <1 x i64> [[TMP2]]
22232int64x1_t test_vcvt_s64_f64(float64x1_t a) {
22233  return vcvt_s64_f64(a);
22234}
22235
22236// CHECK-LABEL: define <1 x i64> @test_vcvt_u64_f64(<1 x double> %a) #0 {
22237// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22238// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22239// CHECK:   [[TMP2:%.*]] = fptoui <1 x double> [[TMP1]] to <1 x i64>
22240// CHECK:   ret <1 x i64> [[TMP2]]
22241uint64x1_t test_vcvt_u64_f64(float64x1_t a) {
22242  return vcvt_u64_f64(a);
22243}
22244
22245// CHECK-LABEL: define <1 x i64> @test_vcvtn_s64_f64(<1 x double> %a) #0 {
22246// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22247// CHECK:   [[VCVTN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22248// CHECK:   [[VCVTN1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double> [[VCVTN_I]]) #4
22249// CHECK:   ret <1 x i64> [[VCVTN1_I]]
22250int64x1_t test_vcvtn_s64_f64(float64x1_t a) {
22251  return vcvtn_s64_f64(a);
22252}
22253
22254// CHECK-LABEL: define <1 x i64> @test_vcvtn_u64_f64(<1 x double> %a) #0 {
22255// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22256// CHECK:   [[VCVTN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22257// CHECK:   [[VCVTN1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double> [[VCVTN_I]]) #4
22258// CHECK:   ret <1 x i64> [[VCVTN1_I]]
22259uint64x1_t test_vcvtn_u64_f64(float64x1_t a) {
22260  return vcvtn_u64_f64(a);
22261}
22262
22263// CHECK-LABEL: define <1 x i64> @test_vcvtp_s64_f64(<1 x double> %a) #0 {
22264// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22265// CHECK:   [[VCVTP_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22266// CHECK:   [[VCVTP1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double> [[VCVTP_I]]) #4
22267// CHECK:   ret <1 x i64> [[VCVTP1_I]]
22268int64x1_t test_vcvtp_s64_f64(float64x1_t a) {
22269  return vcvtp_s64_f64(a);
22270}
22271
22272// CHECK-LABEL: define <1 x i64> @test_vcvtp_u64_f64(<1 x double> %a) #0 {
22273// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22274// CHECK:   [[VCVTP_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22275// CHECK:   [[VCVTP1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double> [[VCVTP_I]]) #4
22276// CHECK:   ret <1 x i64> [[VCVTP1_I]]
22277uint64x1_t test_vcvtp_u64_f64(float64x1_t a) {
22278  return vcvtp_u64_f64(a);
22279}
22280
22281// CHECK-LABEL: define <1 x i64> @test_vcvtm_s64_f64(<1 x double> %a) #0 {
22282// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22283// CHECK:   [[VCVTM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22284// CHECK:   [[VCVTM1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double> [[VCVTM_I]]) #4
22285// CHECK:   ret <1 x i64> [[VCVTM1_I]]
22286int64x1_t test_vcvtm_s64_f64(float64x1_t a) {
22287  return vcvtm_s64_f64(a);
22288}
22289
22290// CHECK-LABEL: define <1 x i64> @test_vcvtm_u64_f64(<1 x double> %a) #0 {
22291// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22292// CHECK:   [[VCVTM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22293// CHECK:   [[VCVTM1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double> [[VCVTM_I]]) #4
22294// CHECK:   ret <1 x i64> [[VCVTM1_I]]
22295uint64x1_t test_vcvtm_u64_f64(float64x1_t a) {
22296  return vcvtm_u64_f64(a);
22297}
22298
22299// CHECK-LABEL: define <1 x i64> @test_vcvta_s64_f64(<1 x double> %a) #0 {
22300// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22301// CHECK:   [[VCVTA_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22302// CHECK:   [[VCVTA1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double> [[VCVTA_I]]) #4
22303// CHECK:   ret <1 x i64> [[VCVTA1_I]]
22304int64x1_t test_vcvta_s64_f64(float64x1_t a) {
22305  return vcvta_s64_f64(a);
22306}
22307
22308// CHECK-LABEL: define <1 x i64> @test_vcvta_u64_f64(<1 x double> %a) #0 {
22309// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22310// CHECK:   [[VCVTA_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22311// CHECK:   [[VCVTA1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double> [[VCVTA_I]]) #4
22312// CHECK:   ret <1 x i64> [[VCVTA1_I]]
22313uint64x1_t test_vcvta_u64_f64(float64x1_t a) {
22314  return vcvta_u64_f64(a);
22315}
22316
22317// CHECK-LABEL: define <1 x double> @test_vcvt_f64_s64(<1 x i64> %a) #0 {
22318// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
22319// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
22320// CHECK:   [[VCVT_I:%.*]] = sitofp <1 x i64> [[TMP1]] to <1 x double>
22321// CHECK:   ret <1 x double> [[VCVT_I]]
22322float64x1_t test_vcvt_f64_s64(int64x1_t a) {
22323  return vcvt_f64_s64(a);
22324}
22325
22326// CHECK-LABEL: define <1 x double> @test_vcvt_f64_u64(<1 x i64> %a) #0 {
22327// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
22328// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
22329// CHECK:   [[VCVT_I:%.*]] = uitofp <1 x i64> [[TMP1]] to <1 x double>
22330// CHECK:   ret <1 x double> [[VCVT_I]]
22331float64x1_t test_vcvt_f64_u64(uint64x1_t a) {
22332  return vcvt_f64_u64(a);
22333}
22334
22335// CHECK-LABEL: define <1 x i64> @test_vcvt_n_s64_f64(<1 x double> %a) #0 {
22336// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22337// CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22338// CHECK:   [[VCVT_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double> [[VCVT_N]], i32 64)
22339// CHECK:   ret <1 x i64> [[VCVT_N1]]
22340int64x1_t test_vcvt_n_s64_f64(float64x1_t a) {
22341  return vcvt_n_s64_f64(a, 64);
22342}
22343
22344// CHECK-LABEL: define <1 x i64> @test_vcvt_n_u64_f64(<1 x double> %a) #0 {
22345// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22346// CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22347// CHECK:   [[VCVT_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double> [[VCVT_N]], i32 64)
22348// CHECK:   ret <1 x i64> [[VCVT_N1]]
22349uint64x1_t test_vcvt_n_u64_f64(float64x1_t a) {
22350  return vcvt_n_u64_f64(a, 64);
22351}
22352
22353// CHECK-LABEL: define <1 x double> @test_vcvt_n_f64_s64(<1 x i64> %a) #0 {
22354// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
22355// CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
22356// CHECK:   [[VCVT_N1:%.*]] = call <1 x double> @llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64> [[VCVT_N]], i32 64)
22357// CHECK:   ret <1 x double> [[VCVT_N1]]
22358float64x1_t test_vcvt_n_f64_s64(int64x1_t a) {
22359  return vcvt_n_f64_s64(a, 64);
22360}
22361
22362// CHECK-LABEL: define <1 x double> @test_vcvt_n_f64_u64(<1 x i64> %a) #0 {
22363// CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
22364// CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
22365// CHECK:   [[VCVT_N1:%.*]] = call <1 x double> @llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64> [[VCVT_N]], i32 64)
22366// CHECK:   ret <1 x double> [[VCVT_N1]]
22367float64x1_t test_vcvt_n_f64_u64(uint64x1_t a) {
22368  return vcvt_n_f64_u64(a, 64);
22369}
22370
22371// CHECK-LABEL: define <1 x double> @test_vrndn_f64(<1 x double> %a) #0 {
22372// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22373// CHECK:   [[VRNDN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22374// CHECK:   [[VRNDN1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frintn.v1f64(<1 x double> [[VRNDN_I]]) #4
22375// CHECK:   ret <1 x double> [[VRNDN1_I]]
22376float64x1_t test_vrndn_f64(float64x1_t a) {
22377  return vrndn_f64(a);
22378}
22379
22380// CHECK-LABEL: define <1 x double> @test_vrnda_f64(<1 x double> %a) #0 {
22381// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22382// CHECK:   [[VRNDA_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22383// CHECK:   [[VRNDA1_I:%.*]] = call <1 x double> @llvm.round.v1f64(<1 x double> [[VRNDA_I]]) #4
22384// CHECK:   ret <1 x double> [[VRNDA1_I]]
22385float64x1_t test_vrnda_f64(float64x1_t a) {
22386  return vrnda_f64(a);
22387}
22388
22389// CHECK-LABEL: define <1 x double> @test_vrndp_f64(<1 x double> %a) #0 {
22390// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22391// CHECK:   [[VRNDP_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22392// CHECK:   [[VRNDP1_I:%.*]] = call <1 x double> @llvm.ceil.v1f64(<1 x double> [[VRNDP_I]]) #4
22393// CHECK:   ret <1 x double> [[VRNDP1_I]]
22394float64x1_t test_vrndp_f64(float64x1_t a) {
22395  return vrndp_f64(a);
22396}
22397
22398// CHECK-LABEL: define <1 x double> @test_vrndm_f64(<1 x double> %a) #0 {
22399// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22400// CHECK:   [[VRNDM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22401// CHECK:   [[VRNDM1_I:%.*]] = call <1 x double> @llvm.floor.v1f64(<1 x double> [[VRNDM_I]]) #4
22402// CHECK:   ret <1 x double> [[VRNDM1_I]]
22403float64x1_t test_vrndm_f64(float64x1_t a) {
22404  return vrndm_f64(a);
22405}
22406
22407// CHECK-LABEL: define <1 x double> @test_vrndx_f64(<1 x double> %a) #0 {
22408// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22409// CHECK:   [[VRNDX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22410// CHECK:   [[VRNDX1_I:%.*]] = call <1 x double> @llvm.rint.v1f64(<1 x double> [[VRNDX_I]]) #4
22411// CHECK:   ret <1 x double> [[VRNDX1_I]]
22412float64x1_t test_vrndx_f64(float64x1_t a) {
22413  return vrndx_f64(a);
22414}
22415
22416// CHECK-LABEL: define <1 x double> @test_vrnd_f64(<1 x double> %a) #0 {
22417// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22418// CHECK:   [[VRNDZ_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22419// CHECK:   [[VRNDZ1_I:%.*]] = call <1 x double> @llvm.trunc.v1f64(<1 x double> [[VRNDZ_I]]) #4
22420// CHECK:   ret <1 x double> [[VRNDZ1_I]]
22421float64x1_t test_vrnd_f64(float64x1_t a) {
22422  return vrnd_f64(a);
22423}
22424
22425// CHECK-LABEL: define <1 x double> @test_vrndi_f64(<1 x double> %a) #0 {
22426// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22427// CHECK:   [[VRNDI_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22428// CHECK:   [[VRNDI1_I:%.*]] = call <1 x double> @llvm.nearbyint.v1f64(<1 x double> [[VRNDI_I]]) #4
22429// CHECK:   ret <1 x double> [[VRNDI1_I]]
22430float64x1_t test_vrndi_f64(float64x1_t a) {
22431  return vrndi_f64(a);
22432}
22433
22434// CHECK-LABEL: define <1 x double> @test_vrsqrte_f64(<1 x double> %a) #0 {
22435// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22436// CHECK:   [[VRSQRTE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22437// CHECK:   [[VRSQRTE_V1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frsqrte.v1f64(<1 x double> [[VRSQRTE_V_I]]) #4
22438// CHECK:   ret <1 x double> [[VRSQRTE_V1_I]]
22439float64x1_t test_vrsqrte_f64(float64x1_t a) {
22440  return vrsqrte_f64(a);
22441}
22442
22443// CHECK-LABEL: define <1 x double> @test_vrecpe_f64(<1 x double> %a) #0 {
22444// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22445// CHECK:   [[VRECPE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22446// CHECK:   [[VRECPE_V1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frecpe.v1f64(<1 x double> [[VRECPE_V_I]]) #4
22447// CHECK:   ret <1 x double> [[VRECPE_V1_I]]
22448float64x1_t test_vrecpe_f64(float64x1_t a) {
22449  return vrecpe_f64(a);
22450}
22451
22452// CHECK-LABEL: define <1 x double> @test_vsqrt_f64(<1 x double> %a) #0 {
22453// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22454// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22455// CHECK:   [[VSQRT_I:%.*]] = call <1 x double> @llvm.sqrt.v1f64(<1 x double> [[TMP1]]) #4
22456// CHECK:   ret <1 x double> [[VSQRT_I]]
22457float64x1_t test_vsqrt_f64(float64x1_t a) {
22458  return vsqrt_f64(a);
22459}
22460
22461// CHECK-LABEL: define <1 x double> @test_vrecps_f64(<1 x double> %a, <1 x double> %b) #0 {
22462// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22463// CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
22464// CHECK:   [[VRECPS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22465// CHECK:   [[VRECPS_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
22466// CHECK:   [[VRECPS_V2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frecps.v1f64(<1 x double> [[VRECPS_V_I]], <1 x double> [[VRECPS_V1_I]]) #4
22467// CHECK:   [[VRECPS_V3_I:%.*]] = bitcast <1 x double> [[VRECPS_V2_I]] to <8 x i8>
22468// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRECPS_V3_I]] to <1 x double>
22469// CHECK:   ret <1 x double> [[TMP2]]
22470float64x1_t test_vrecps_f64(float64x1_t a, float64x1_t b) {
22471  return vrecps_f64(a, b);
22472}
22473
22474// CHECK-LABEL: define <1 x double> @test_vrsqrts_f64(<1 x double> %a, <1 x double> %b) #0 {
22475// CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
22476// CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
22477// CHECK:   [[VRSQRTS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
22478// CHECK:   [[VRSQRTS_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
22479// CHECK:   [[VRSQRTS_V2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frsqrts.v1f64(<1 x double> [[VRSQRTS_V_I]], <1 x double> [[VRSQRTS_V1_I]]) #4
22480// CHECK:   [[VRSQRTS_V3_I:%.*]] = bitcast <1 x double> [[VRSQRTS_V2_I]] to <8 x i8>
22481// CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[VRSQRTS_V3_I]] to <1 x double>
22482// CHECK:   ret <1 x double> [[TMP2]]
22483float64x1_t test_vrsqrts_f64(float64x1_t a, float64x1_t b) {
22484  return vrsqrts_f64(a, b);
22485}
22486
22487// CHECK-LABEL: define i32 @test_vminv_s32(<2 x i32> %a) #0 {
22488// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
22489// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
22490// CHECK:   [[VMINV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32> [[TMP1]]) #4
22491// CHECK:   ret i32 [[VMINV_S32_I]]
22492int32_t test_vminv_s32(int32x2_t a) {
22493  return vminv_s32(a);
22494}
22495
22496// CHECK-LABEL: define i32 @test_vminv_u32(<2 x i32> %a) #0 {
22497// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
22498// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
22499// CHECK:   [[VMINV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v2i32(<2 x i32> [[TMP1]]) #4
22500// CHECK:   ret i32 [[VMINV_U32_I]]
22501uint32_t test_vminv_u32(uint32x2_t a) {
22502  return vminv_u32(a);
22503}
22504
22505// CHECK-LABEL: define i32 @test_vmaxv_s32(<2 x i32> %a) #0 {
22506// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
22507// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
22508// CHECK:   [[VMAXV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> [[TMP1]]) #4
22509// CHECK:   ret i32 [[VMAXV_S32_I]]
22510int32_t test_vmaxv_s32(int32x2_t a) {
22511  return vmaxv_s32(a);
22512}
22513
22514// CHECK-LABEL: define i32 @test_vmaxv_u32(<2 x i32> %a) #0 {
22515// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
22516// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
22517// CHECK:   [[VMAXV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v2i32(<2 x i32> [[TMP1]]) #4
22518// CHECK:   ret i32 [[VMAXV_U32_I]]
22519uint32_t test_vmaxv_u32(uint32x2_t a) {
22520  return vmaxv_u32(a);
22521}
22522
22523// CHECK-LABEL: define i32 @test_vaddv_s32(<2 x i32> %a) #0 {
22524// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
22525// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
22526// CHECK:   [[VADDV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32> [[TMP1]]) #4
22527// CHECK:   ret i32 [[VADDV_S32_I]]
22528int32_t test_vaddv_s32(int32x2_t a) {
22529  return vaddv_s32(a);
22530}
22531
22532// CHECK-LABEL: define i32 @test_vaddv_u32(<2 x i32> %a) #0 {
22533// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
22534// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
22535// CHECK:   [[VADDV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v2i32(<2 x i32> [[TMP1]]) #4
22536// CHECK:   ret i32 [[VADDV_U32_I]]
22537uint32_t test_vaddv_u32(uint32x2_t a) {
22538  return vaddv_u32(a);
22539}
22540
22541// CHECK-LABEL: define i64 @test_vaddlv_s32(<2 x i32> %a) #0 {
22542// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
22543// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
22544// CHECK:   [[VADDLV_S32_I:%.*]] = call i64 @llvm.aarch64.neon.saddlv.i64.v2i32(<2 x i32> [[TMP1]]) #4
22545// CHECK:   ret i64 [[VADDLV_S32_I]]
22546int64_t test_vaddlv_s32(int32x2_t a) {
22547  return vaddlv_s32(a);
22548}
22549
22550// CHECK-LABEL: define i64 @test_vaddlv_u32(<2 x i32> %a) #0 {
22551// CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
22552// CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
22553// CHECK:   [[VADDLV_U32_I:%.*]] = call i64 @llvm.aarch64.neon.uaddlv.i64.v2i32(<2 x i32> [[TMP1]]) #4
22554// CHECK:   ret i64 [[VADDLV_U32_I]]
22555uint64_t test_vaddlv_u32(uint32x2_t a) {
22556  return vaddlv_u32(a);
22557}
22558