1// RUN: %clang_cc1 -triple arm64-apple-ios7 -target-feature +neon -target-abi darwinpcs -ffreestanding -emit-llvm -w -o - %s | FileCheck %s
2
3// CHECK: define signext i8 @f0()
4char f0(void) {
5  return 0;
6}
7
8// Struct as return type. Aggregates <= 16 bytes are passed directly and round
9// up to multiple of 8 bytes.
10// CHECK: define i64 @f1()
11struct s1 { char f0; };
12struct s1 f1(void) {}
13
14// CHECK: define i64 @f2()
15struct s2 { short f0; };
16struct s2 f2(void) {}
17
18// CHECK: define i64 @f3()
19struct s3 { int f0; };
20struct s3 f3(void) {}
21
22// CHECK: define i64 @f4()
23struct s4 { struct s4_0 { int f0; } f0; };
24struct s4 f4(void) {}
25
26// CHECK: define i64 @f5()
27struct s5 { struct { } f0; int f1; };
28struct s5 f5(void) {}
29
30// CHECK: define i64 @f6()
31struct s6 { int f0[1]; };
32struct s6 f6(void) {}
33
34// CHECK: define void @f7()
35struct s7 { struct { int : 0; } f0; };
36struct s7 f7(void) {}
37
38// CHECK: define void @f8()
39struct s8 { struct { int : 0; } f0[1]; };
40struct s8 f8(void) {}
41
42// CHECK: define i64 @f9()
43struct s9 { int f0; int : 0; };
44struct s9 f9(void) {}
45
46// CHECK: define i64 @f10()
47struct s10 { int f0; int : 0; int : 0; };
48struct s10 f10(void) {}
49
50// CHECK: define i64 @f11()
51struct s11 { int : 0; int f0; };
52struct s11 f11(void) {}
53
54// CHECK: define i64 @f12()
55union u12 { char f0; short f1; int f2; };
56union u12 f12(void) {}
57
58// Homogeneous Aggregate as return type will be passed directly.
59// CHECK: define %struct.s13 @f13()
60struct s13 { float f0; };
61struct s13 f13(void) {}
62// CHECK: define %union.u14 @f14()
63union u14 { float f0; };
64union u14 f14(void) {}
65
66// CHECK: define void @f15()
67void f15(struct s7 a0) {}
68
69// CHECK: define void @f16()
70void f16(struct s8 a0) {}
71
72// CHECK: define i64 @f17()
73struct s17 { short f0 : 13; char f1 : 4; };
74struct s17 f17(void) {}
75
76// CHECK: define i64 @f18()
77struct s18 { short f0; char f1 : 4; };
78struct s18 f18(void) {}
79
80// CHECK: define i64 @f19()
81struct s19 { int f0; struct s8 f1; };
82struct s19 f19(void) {}
83
84// CHECK: define i64 @f20()
85struct s20 { struct s8 f1; int f0; };
86struct s20 f20(void) {}
87
88// CHECK: define i64 @f21()
89struct s21 { struct {} f1; int f0 : 4; };
90struct s21 f21(void) {}
91
92// CHECK: define i64 @f22()
93// CHECK: define i64 @f23()
94// CHECK: define i64 @f24()
95// CHECK: define i128 @f25()
96// CHECK: define { float, float } @f26()
97// CHECK: define { double, double } @f27()
98_Complex char       f22(void) {}
99_Complex short      f23(void) {}
100_Complex int        f24(void) {}
101_Complex long long  f25(void) {}
102_Complex float      f26(void) {}
103_Complex double     f27(void) {}
104
105// CHECK: define i64 @f28()
106struct s28 { _Complex char f0; };
107struct s28 f28() {}
108
109// CHECK: define i64 @f29()
110struct s29 { _Complex short f0; };
111struct s29 f29() {}
112
113// CHECK: define i64 @f30()
114struct s30 { _Complex int f0; };
115struct s30 f30() {}
116
117struct s31 { char x; };
118void f31(struct s31 s) { }
119// CHECK: define void @f31(i64 %s.coerce)
120// CHECK: %s = alloca %struct.s31, align 8
121// CHECK: trunc i64 %s.coerce to i8
122// CHECK: store i8 %{{.*}},
123
124struct s32 { double x; };
125void f32(struct s32 s) { }
126// Expand Homogeneous Aggregate.
127// CHECK: @f32(double %{{.*}})
128
129// A composite type larger than 16 bytes should be passed indirectly.
130struct s33 { char buf[32*32]; };
131void f33(struct s33 s) { }
132// CHECK: define void @f33(%struct.s33* %s)
133
134struct s34 { char c; };
135void f34(struct s34 s);
136void g34(struct s34 *s) { f34(*s); }
137// CHECK: @g34(%struct.s34* %s)
138// CHECK: %[[a:.*]] = load i8* %{{.*}}
139// CHECK: zext i8 %[[a]] to i64
140// CHECK: call void @f34(i64 %{{.*}})
141
142/*
143 * Check that va_arg accesses stack according to ABI alignment
144 */
145long long t1(int i, ...) {
146    // CHECK: t1
147    __builtin_va_list ap;
148    __builtin_va_start(ap, i);
149    // CHECK-NOT: add i32 %{{.*}} 7
150    // CHECK-NOT: and i32 %{{.*}} -8
151    long long ll = __builtin_va_arg(ap, long long);
152    __builtin_va_end(ap);
153    return ll;
154}
155double t2(int i, ...) {
156    // CHECK: t2
157    __builtin_va_list ap;
158    __builtin_va_start(ap, i);
159    // CHECK-NOT: add i32 %{{.*}} 7
160    // CHECK-NOT: and i32 %{{.*}} -8
161    double ll = __builtin_va_arg(ap, double);
162    __builtin_va_end(ap);
163    return ll;
164}
165
166#include <arm_neon.h>
167
168// Homogeneous Vector Aggregate as return type and argument type.
169// CHECK: define %struct.int8x16x2_t @f0_0(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
170int8x16x2_t f0_0(int8x16_t a0, int8x16_t a1) {
171  return vzipq_s8(a0, a1);
172}
173
174// Test direct vector passing.
175typedef float T_float32x2 __attribute__ ((__vector_size__ (8)));
176typedef float T_float32x4 __attribute__ ((__vector_size__ (16)));
177typedef float T_float32x8 __attribute__ ((__vector_size__ (32)));
178typedef float T_float32x16 __attribute__ ((__vector_size__ (64)));
179
180// CHECK: define <2 x float> @f1_0(<2 x float> %{{.*}})
181T_float32x2 f1_0(T_float32x2 a0) { return a0; }
182// CHECK: define <4 x float> @f1_1(<4 x float> %{{.*}})
183T_float32x4 f1_1(T_float32x4 a0) { return a0; }
184// Vector with length bigger than 16-byte is illegal and is passed indirectly.
185// CHECK: define void @f1_2(<8 x float>* noalias sret  %{{.*}}, <8 x float>*)
186T_float32x8 f1_2(T_float32x8 a0) { return a0; }
187// CHECK: define void @f1_3(<16 x float>* noalias sret %{{.*}}, <16 x float>*)
188T_float32x16 f1_3(T_float32x16 a0) { return a0; }
189
190// Testing alignment with aggregates: HFA, aggregates with size <= 16 bytes and
191// aggregates with size > 16 bytes.
192struct s35
193{
194   float v[4]; //Testing HFA.
195} __attribute__((aligned(16)));
196typedef struct s35 s35_with_align;
197
198typedef __attribute__((neon_vector_type(4))) float float32x4_t;
199float32x4_t f35(int i, s35_with_align s1, s35_with_align s2) {
200// CHECK: define <4 x float> @f35(i32 %i, float %s1.0, float %s1.1, float %s1.2, float %s1.3, float %s2.0, float %s2.1, float %s2.2, float %s2.3)
201// CHECK: %s1 = alloca %struct.s35, align 16
202// CHECK: %s2 = alloca %struct.s35, align 16
203// CHECK: %[[a:.*]] = bitcast %struct.s35* %s1 to <4 x float>*
204// CHECK: load <4 x float>* %[[a]], align 16
205// CHECK: %[[b:.*]] = bitcast %struct.s35* %s2 to <4 x float>*
206// CHECK: load <4 x float>* %[[b]], align 16
207  float32x4_t v = vaddq_f32(*(float32x4_t *)&s1,
208                            *(float32x4_t *)&s2);
209  return v;
210}
211
212struct s36
213{
214   int v[4]; //Testing 16-byte aggregate.
215} __attribute__((aligned(16)));
216typedef struct s36 s36_with_align;
217
218typedef __attribute__((neon_vector_type(4))) int int32x4_t;
219int32x4_t f36(int i, s36_with_align s1, s36_with_align s2) {
220// CHECK: define <4 x i32> @f36(i32 %i, i128 %s1.coerce, i128 %s2.coerce)
221// CHECK: %s1 = alloca %struct.s36, align 16
222// CHECK: %s2 = alloca %struct.s36, align 16
223// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 1
224// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 1
225// CHECK: %[[a:.*]] = bitcast %struct.s36* %s1 to <4 x i32>*
226// CHECK: load <4 x i32>* %[[a]], align 16
227// CHECK: %[[b:.*]] = bitcast %struct.s36* %s2 to <4 x i32>*
228// CHECK: load <4 x i32>* %[[b]], align 16
229  int32x4_t v = vaddq_s32(*(int32x4_t *)&s1,
230                          *(int32x4_t *)&s2);
231  return v;
232}
233
234struct s37
235{
236   int v[18]; //Testing large aggregate.
237} __attribute__((aligned(16)));
238typedef struct s37 s37_with_align;
239
240int32x4_t f37(int i, s37_with_align s1, s37_with_align s2) {
241// CHECK: define <4 x i32> @f37(i32 %i, %struct.s37* %s1, %struct.s37* %s2)
242// CHECK: %[[a:.*]] = bitcast %struct.s37* %s1 to <4 x i32>*
243// CHECK: load <4 x i32>* %[[a]], align 16
244// CHECK: %[[b:.*]] = bitcast %struct.s37* %s2 to <4 x i32>*
245// CHECK: load <4 x i32>* %[[b]], align 16
246  int32x4_t v = vaddq_s32(*(int32x4_t *)&s1,
247                          *(int32x4_t *)&s2);
248  return v;
249}
250s37_with_align g37;
251int32x4_t caller37() {
252// CHECK: caller37
253// CHECK: %[[a:.*]] = alloca %struct.s37, align 16
254// CHECK: %[[b:.*]] = alloca %struct.s37, align 16
255// CHECK: call void @llvm.memcpy
256// CHECK: call void @llvm.memcpy
257// CHECK: call <4 x i32> @f37(i32 3, %struct.s37* %[[a]], %struct.s37* %[[b]])
258  return f37(3, g37, g37);
259}
260
261// rdar://problem/12648441
262// Test passing structs with size < 8, < 16 and > 16
263// with alignment of 16 and without
264
265// structs with size <= 8 bytes, without alignment attribute
266// passed as i64 regardless of the align attribute
267struct s38
268{
269  int i;
270  short s;
271};
272typedef struct s38 s38_no_align;
273// passing structs in registers
274__attribute__ ((noinline))
275int f38(int i, s38_no_align s1, s38_no_align s2) {
276// CHECK: define i32 @f38(i32 %i, i64 %s1.coerce, i64 %s2.coerce)
277// CHECK: %s1 = alloca %struct.s38, align 8
278// CHECK: %s2 = alloca %struct.s38, align 8
279// CHECK: store i64 %s1.coerce, i64* %{{.*}}, align 1
280// CHECK: store i64 %s2.coerce, i64* %{{.*}}, align 1
281// CHECK: getelementptr inbounds %struct.s38* %s1, i32 0, i32 0
282// CHECK: getelementptr inbounds %struct.s38* %s2, i32 0, i32 0
283// CHECK: getelementptr inbounds %struct.s38* %s1, i32 0, i32 1
284// CHECK: getelementptr inbounds %struct.s38* %s2, i32 0, i32 1
285  return s1.i + s2.i + i + s1.s + s2.s;
286}
287s38_no_align g38;
288s38_no_align g38_2;
289int caller38() {
290// CHECK: define i32 @caller38()
291// CHECK: %[[a:.*]] = load i64* bitcast (%struct.s38* @g38 to i64*), align 1
292// CHECK: %[[b:.*]] = load i64* bitcast (%struct.s38* @g38_2 to i64*), align 1
293// CHECK: call i32 @f38(i32 3, i64 %[[a]], i64 %[[b]])
294  return f38(3, g38, g38_2);
295}
296// passing structs on stack
297__attribute__ ((noinline))
298int f38_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
299              int i9, s38_no_align s1, s38_no_align s2) {
300// CHECK: define i32 @f38_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i64 %s1.coerce, i64 %s2.coerce)
301// CHECK: %s1 = alloca %struct.s38, align 8
302// CHECK: %s2 = alloca %struct.s38, align 8
303// CHECK: store i64 %s1.coerce, i64* %{{.*}}, align 1
304// CHECK: store i64 %s2.coerce, i64* %{{.*}}, align 1
305// CHECK: getelementptr inbounds %struct.s38* %s1, i32 0, i32 0
306// CHECK: getelementptr inbounds %struct.s38* %s2, i32 0, i32 0
307// CHECK: getelementptr inbounds %struct.s38* %s1, i32 0, i32 1
308// CHECK: getelementptr inbounds %struct.s38* %s2, i32 0, i32 1
309  return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s;
310}
311int caller38_stack() {
312// CHECK: define i32 @caller38_stack()
313// CHECK: %[[a:.*]] = load i64* bitcast (%struct.s38* @g38 to i64*), align 1
314// CHECK: %[[b:.*]] = load i64* bitcast (%struct.s38* @g38_2 to i64*), align 1
315// CHECK: call i32 @f38_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i64 %[[a]], i64 %[[b]])
316  return f38_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g38, g38_2);
317}
318
319// structs with size <= 8 bytes, with alignment attribute
320struct s39
321{
322  int i;
323  short s;
324} __attribute__((aligned(16)));
325typedef struct s39 s39_with_align;
326// passing aligned structs in registers
327__attribute__ ((noinline))
328int f39(int i, s39_with_align s1, s39_with_align s2) {
329// CHECK: define i32 @f39(i32 %i, i128 %s1.coerce, i128 %s2.coerce)
330// CHECK: %s1 = alloca %struct.s39, align 16
331// CHECK: %s2 = alloca %struct.s39, align 16
332// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 1
333// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 1
334// CHECK: getelementptr inbounds %struct.s39* %s1, i32 0, i32 0
335// CHECK: getelementptr inbounds %struct.s39* %s2, i32 0, i32 0
336// CHECK: getelementptr inbounds %struct.s39* %s1, i32 0, i32 1
337// CHECK: getelementptr inbounds %struct.s39* %s2, i32 0, i32 1
338  return s1.i + s2.i + i + s1.s + s2.s;
339}
340s39_with_align g39;
341s39_with_align g39_2;
342int caller39() {
343// CHECK: define i32 @caller39()
344// CHECK: %[[a:.*]] = load i128* bitcast (%struct.s39* @g39 to i128*), align 1
345// CHECK: %[[b:.*]] = load i128* bitcast (%struct.s39* @g39_2 to i128*), align 1
346// CHECK: call i32 @f39(i32 3, i128 %[[a]], i128 %[[b]])
347  return f39(3, g39, g39_2);
348}
349// passing aligned structs on stack
350__attribute__ ((noinline))
351int f39_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
352              int i9, s39_with_align s1, s39_with_align s2) {
353// CHECK: define i32 @f39_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce)
354// CHECK: %s1 = alloca %struct.s39, align 16
355// CHECK: %s2 = alloca %struct.s39, align 16
356// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 1
357// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 1
358// CHECK: getelementptr inbounds %struct.s39* %s1, i32 0, i32 0
359// CHECK: getelementptr inbounds %struct.s39* %s2, i32 0, i32 0
360// CHECK: getelementptr inbounds %struct.s39* %s1, i32 0, i32 1
361// CHECK: getelementptr inbounds %struct.s39* %s2, i32 0, i32 1
362  return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s;
363}
364int caller39_stack() {
365// CHECK: define i32 @caller39_stack()
366// CHECK: %[[a:.*]] = load i128* bitcast (%struct.s39* @g39 to i128*), align 1
367// CHECK: %[[b:.*]] = load i128* bitcast (%struct.s39* @g39_2 to i128*), align 1
368// CHECK: call i32 @f39_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i128 %[[a]], i128 %[[b]])
369  return f39_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g39, g39_2);
370}
371
372// structs with size <= 16 bytes, without alignment attribute
373struct s40
374{
375  int i;
376  short s;
377  int i2;
378  short s2;
379};
380typedef struct s40 s40_no_align;
381// passing structs in registers
382__attribute__ ((noinline))
383int f40(int i, s40_no_align s1, s40_no_align s2) {
384// CHECK: define i32 @f40(i32 %i, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce)
385// CHECK: %s1 = alloca %struct.s40, align 8
386// CHECK: %s2 = alloca %struct.s40, align 8
387// CHECK: store [2 x i64] %s1.coerce, [2 x i64]* %{{.*}}, align 1
388// CHECK: store [2 x i64] %s2.coerce, [2 x i64]* %{{.*}}, align 1
389// CHECK: getelementptr inbounds %struct.s40* %s1, i32 0, i32 0
390// CHECK: getelementptr inbounds %struct.s40* %s2, i32 0, i32 0
391// CHECK: getelementptr inbounds %struct.s40* %s1, i32 0, i32 1
392// CHECK: getelementptr inbounds %struct.s40* %s2, i32 0, i32 1
393  return s1.i + s2.i + i + s1.s + s2.s;
394}
395s40_no_align g40;
396s40_no_align g40_2;
397int caller40() {
398// CHECK: define i32 @caller40()
399// CHECK: %[[a:.*]] = load [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 1
400// CHECK: %[[b:.*]] = load [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 1
401// CHECK: call i32 @f40(i32 3, [2 x i64] %[[a]], [2 x i64] %[[b]])
402  return f40(3, g40, g40_2);
403}
404// passing structs on stack
405__attribute__ ((noinline))
406int f40_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
407              int i9, s40_no_align s1, s40_no_align s2) {
408// CHECK: define i32 @f40_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce)
409// CHECK: %s1 = alloca %struct.s40, align 8
410// CHECK: %s2 = alloca %struct.s40, align 8
411// CHECK: store [2 x i64] %s1.coerce, [2 x i64]* %{{.*}}, align 1
412// CHECK: store [2 x i64] %s2.coerce, [2 x i64]* %{{.*}}, align 1
413// CHECK: getelementptr inbounds %struct.s40* %s1, i32 0, i32 0
414// CHECK: getelementptr inbounds %struct.s40* %s2, i32 0, i32 0
415// CHECK: getelementptr inbounds %struct.s40* %s1, i32 0, i32 1
416// CHECK: getelementptr inbounds %struct.s40* %s2, i32 0, i32 1
417  return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s;
418}
419int caller40_stack() {
420// CHECK: define i32 @caller40_stack()
421// CHECK: %[[a:.*]] = load [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 1
422// CHECK: %[[b:.*]] = load [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 1
423// CHECK: call i32 @f40_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, [2 x i64] %[[a]], [2 x i64] %[[b]])
424  return f40_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g40, g40_2);
425}
426
427// structs with size <= 16 bytes, with alignment attribute
428struct s41
429{
430  int i;
431  short s;
432  int i2;
433  short s2;
434} __attribute__((aligned(16)));
435typedef struct s41 s41_with_align;
436// passing aligned structs in registers
437__attribute__ ((noinline))
438int f41(int i, s41_with_align s1, s41_with_align s2) {
439// CHECK: define i32 @f41(i32 %i, i128 %s1.coerce, i128 %s2.coerce)
440// CHECK: %s1 = alloca %struct.s41, align 16
441// CHECK: %s2 = alloca %struct.s41, align 16
442// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 1
443// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 1
444// CHECK: getelementptr inbounds %struct.s41* %s1, i32 0, i32 0
445// CHECK: getelementptr inbounds %struct.s41* %s2, i32 0, i32 0
446// CHECK: getelementptr inbounds %struct.s41* %s1, i32 0, i32 1
447// CHECK: getelementptr inbounds %struct.s41* %s2, i32 0, i32 1
448  return s1.i + s2.i + i + s1.s + s2.s;
449}
450s41_with_align g41;
451s41_with_align g41_2;
452int caller41() {
453// CHECK: define i32 @caller41()
454// CHECK: %[[a:.*]] = load i128* bitcast (%struct.s41* @g41 to i128*), align 1
455// CHECK: %[[b:.*]] = load i128* bitcast (%struct.s41* @g41_2 to i128*), align 1
456// CHECK: call i32 @f41(i32 3, i128 %[[a]], i128 %[[b]])
457  return f41(3, g41, g41_2);
458}
459// passing aligned structs on stack
460__attribute__ ((noinline))
461int f41_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
462              int i9, s41_with_align s1, s41_with_align s2) {
463// CHECK: define i32 @f41_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce)
464// CHECK: %s1 = alloca %struct.s41, align 16
465// CHECK: %s2 = alloca %struct.s41, align 16
466// CHECK: store i128 %s1.coerce, i128* %{{.*}}, align 1
467// CHECK: store i128 %s2.coerce, i128* %{{.*}}, align 1
468// CHECK: getelementptr inbounds %struct.s41* %s1, i32 0, i32 0
469// CHECK: getelementptr inbounds %struct.s41* %s2, i32 0, i32 0
470// CHECK: getelementptr inbounds %struct.s41* %s1, i32 0, i32 1
471// CHECK: getelementptr inbounds %struct.s41* %s2, i32 0, i32 1
472  return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s;
473}
474int caller41_stack() {
475// CHECK: define i32 @caller41_stack()
476// CHECK: %[[a:.*]] = load i128* bitcast (%struct.s41* @g41 to i128*), align 1
477// CHECK: %[[b:.*]] = load i128* bitcast (%struct.s41* @g41_2 to i128*), align 1
478// CHECK: call i32 @f41_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i128 %[[a]], i128 %[[b]])
479  return f41_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g41, g41_2);
480}
481
482// structs with size > 16 bytes, without alignment attribute
483struct s42
484{
485  int i;
486  short s;
487  int i2;
488  short s2;
489  int i3;
490  short s3;
491};
492typedef struct s42 s42_no_align;
493// passing structs in registers
494__attribute__ ((noinline))
495int f42(int i, s42_no_align s1, s42_no_align s2) {
496// CHECK: define i32 @f42(i32 %i, %struct.s42* %s1, %struct.s42* %s2)
497// CHECK: getelementptr inbounds %struct.s42* %s1, i32 0, i32 0
498// CHECK: getelementptr inbounds %struct.s42* %s2, i32 0, i32 0
499// CHECK: getelementptr inbounds %struct.s42* %s1, i32 0, i32 1
500// CHECK: getelementptr inbounds %struct.s42* %s2, i32 0, i32 1
501  return s1.i + s2.i + i + s1.s + s2.s;
502}
503s42_no_align g42;
504s42_no_align g42_2;
505int caller42() {
506// CHECK: define i32 @caller42()
507// CHECK: %[[a:.*]] = alloca %struct.s42, align 4
508// CHECK: %[[b:.*]] = alloca %struct.s42, align 4
509// CHECK: %[[c:.*]] = bitcast %struct.s42* %[[a]] to i8*
510// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
511// CHECK: %[[d:.*]] = bitcast %struct.s42* %[[b]] to i8*
512// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
513// CHECK: call i32 @f42(i32 3, %struct.s42* %[[a]], %struct.s42* %[[b]])
514  return f42(3, g42, g42_2);
515}
516// passing structs on stack
517__attribute__ ((noinline))
518int f42_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
519              int i9, s42_no_align s1, s42_no_align s2) {
520// CHECK: define i32 @f42_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, %struct.s42* %s1, %struct.s42* %s2)
521// CHECK: getelementptr inbounds %struct.s42* %s1, i32 0, i32 0
522// CHECK: getelementptr inbounds %struct.s42* %s2, i32 0, i32 0
523// CHECK: getelementptr inbounds %struct.s42* %s1, i32 0, i32 1
524// CHECK: getelementptr inbounds %struct.s42* %s2, i32 0, i32 1
525  return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s;
526}
527int caller42_stack() {
528// CHECK: define i32 @caller42_stack()
529// CHECK: %[[a:.*]] = alloca %struct.s42, align 4
530// CHECK: %[[b:.*]] = alloca %struct.s42, align 4
531// CHECK: %[[c:.*]] = bitcast %struct.s42* %[[a]] to i8*
532// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
533// CHECK: %[[d:.*]] = bitcast %struct.s42* %[[b]] to i8*
534// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
535// CHECK: call i32 @f42_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, %struct.s42* %[[a]], %struct.s42* %[[b]])
536  return f42_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g42, g42_2);
537}
538
539// structs with size > 16 bytes, with alignment attribute
540struct s43
541{
542  int i;
543  short s;
544  int i2;
545  short s2;
546  int i3;
547  short s3;
548} __attribute__((aligned(16)));
549typedef struct s43 s43_with_align;
550// passing aligned structs in registers
551__attribute__ ((noinline))
552int f43(int i, s43_with_align s1, s43_with_align s2) {
553// CHECK: define i32 @f43(i32 %i, %struct.s43* %s1, %struct.s43* %s2)
554// CHECK: getelementptr inbounds %struct.s43* %s1, i32 0, i32 0
555// CHECK: getelementptr inbounds %struct.s43* %s2, i32 0, i32 0
556// CHECK: getelementptr inbounds %struct.s43* %s1, i32 0, i32 1
557// CHECK: getelementptr inbounds %struct.s43* %s2, i32 0, i32 1
558  return s1.i + s2.i + i + s1.s + s2.s;
559}
560s43_with_align g43;
561s43_with_align g43_2;
562int caller43() {
563// CHECK: define i32 @caller43()
564// CHECK: %[[a:.*]] = alloca %struct.s43, align 16
565// CHECK: %[[b:.*]] = alloca %struct.s43, align 16
566// CHECK: %[[c:.*]] = bitcast %struct.s43* %[[a]] to i8*
567// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
568// CHECK: %[[d:.*]] = bitcast %struct.s43* %[[b]] to i8*
569// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
570// CHECK: call i32 @f43(i32 3, %struct.s43* %[[a]], %struct.s43* %[[b]])
571  return f43(3, g43, g43_2);
572}
573// passing aligned structs on stack
574__attribute__ ((noinline))
575int f43_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
576              int i9, s43_with_align s1, s43_with_align s2) {
577// CHECK: define i32 @f43_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, %struct.s43* %s1, %struct.s43* %s2)
578// CHECK: getelementptr inbounds %struct.s43* %s1, i32 0, i32 0
579// CHECK: getelementptr inbounds %struct.s43* %s2, i32 0, i32 0
580// CHECK: getelementptr inbounds %struct.s43* %s1, i32 0, i32 1
581// CHECK: getelementptr inbounds %struct.s43* %s2, i32 0, i32 1
582  return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + s1.s + s2.s;
583}
584int caller43_stack() {
585// CHECK: define i32 @caller43_stack()
586// CHECK: %[[a:.*]] = alloca %struct.s43, align 16
587// CHECK: %[[b:.*]] = alloca %struct.s43, align 16
588// CHECK: %[[c:.*]] = bitcast %struct.s43* %[[a]] to i8*
589// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
590// CHECK: %[[d:.*]] = bitcast %struct.s43* %[[b]] to i8*
591// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
592// CHECK: call i32 @f43_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, %struct.s43* %[[a]], %struct.s43* %[[b]])
593  return f43_stack(1, 2, 3, 4, 5, 6, 7, 8, 9, g43, g43_2);
594}
595
596// rdar://13668927
597// We should not split argument s1 between registers and stack.
598__attribute__ ((noinline))
599int f40_split(int i, int i2, int i3, int i4, int i5, int i6, int i7,
600              s40_no_align s1, s40_no_align s2) {
601// CHECK: define i32 @f40_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, [1 x i32], [2 x i64] %s1.coerce, [2 x i64] %s2.coerce)
602  return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + s1.s + s2.s;
603}
604int caller40_split() {
605// CHECK: define i32 @caller40_split()
606// CHECK: call i32 @f40_split(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, [1 x i32] undef, [2 x i64] %{{.*}} [2 x i64] %{{.*}})
607  return f40_split(1, 2, 3, 4, 5, 6, 7, g40, g40_2);
608}
609
610__attribute__ ((noinline))
611int f41_split(int i, int i2, int i3, int i4, int i5, int i6, int i7,
612              s41_with_align s1, s41_with_align s2) {
613// CHECK: define i32 @f41_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, [1 x i32], i128 %s1.coerce, i128 %s2.coerce)
614  return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + s1.s + s2.s;
615}
616int caller41_split() {
617// CHECK: define i32 @caller41_split()
618// CHECK: call i32 @f41_split(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, [1 x i32] undef, i128 %{{.*}}, i128 %{{.*}})
619  return f41_split(1, 2, 3, 4, 5, 6, 7, g41, g41_2);
620}
621
622// Handle homogeneous aggregates properly in variadic functions.
623struct HFA {
624  float a, b, c, d;
625};
626
627float test_hfa(int n, ...) {
628// CHECK-LABEL: define float @test_hfa(i32 %n, ...)
629// CHECK: [[THELIST:%.*]] = alloca i8*
630// CHECK: [[CURLIST:%.*]] = load i8** [[THELIST]]
631
632  // HFA is not indirect, so occupies its full 16 bytes on the stack.
633// CHECK: [[NEXTLIST:%.*]] = getelementptr i8* [[CURLIST]], i32 16
634// CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]]
635
636// CHECK: bitcast i8* [[CURLIST]] to %struct.HFA*
637  __builtin_va_list thelist;
638  __builtin_va_start(thelist, n);
639  struct HFA h = __builtin_va_arg(thelist, struct HFA);
640  return h.d;
641}
642
643float test_hfa_call(struct HFA *a) {
644// CHECK-LABEL: define float @test_hfa_call(%struct.HFA* %a)
645// CHECK: call float (i32, ...)* @test_hfa(i32 1, [2 x double] {{.*}})
646  test_hfa(1, *a);
647}
648
649struct TooBigHFA {
650  float a, b, c, d, e;
651};
652
653float test_toobig_hfa(int n, ...) {
654// CHECK-LABEL: define float @test_toobig_hfa(i32 %n, ...)
655// CHECK: [[THELIST:%.*]] = alloca i8*
656// CHECK: [[CURLIST:%.*]] = load i8** [[THELIST]]
657
658  // TooBigHFA is not actually an HFA, so gets passed indirectly. Only 8 bytes
659  // of stack consumed.
660// CHECK: [[NEXTLIST:%.*]] = getelementptr i8* [[CURLIST]], i32 8
661// CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]]
662
663// CHECK: [[HFAPTRPTR:%.*]] = bitcast i8* [[CURLIST]] to i8**
664// CHECK: [[HFAPTR:%.*]] = load i8** [[HFAPTRPTR]]
665// CHECK: bitcast i8* [[HFAPTR]] to %struct.TooBigHFA*
666  __builtin_va_list thelist;
667  __builtin_va_start(thelist, n);
668  struct TooBigHFA h = __builtin_va_arg(thelist, struct TooBigHFA);
669  return h.d;
670}
671
672struct HVA {
673  int32x4_t a, b;
674};
675
676int32x4_t test_hva(int n, ...) {
677// CHECK-LABEL: define <4 x i32> @test_hva(i32 %n, ...)
678// CHECK: [[THELIST:%.*]] = alloca i8*
679// CHECK: [[CURLIST:%.*]] = load i8** [[THELIST]]
680
681  // HVA is not indirect, so occupies its full 16 bytes on the stack. but it
682  // must be properly aligned.
683// CHECK: [[ALIGN0:%.*]] = getelementptr i8* [[CURLIST]], i32 15
684// CHECK: [[ALIGN1:%.*]] = ptrtoint i8* [[ALIGN0]] to i64
685// CHECK: [[ALIGN2:%.*]] = and i64 [[ALIGN1]], -16
686// CHECK: [[ALIGNED_LIST:%.*]] = inttoptr i64 [[ALIGN2]] to i8*
687
688// CHECK: [[NEXTLIST:%.*]] = getelementptr i8* [[ALIGNED_LIST]], i32 32
689// CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]]
690
691// CHECK: bitcast i8* [[ALIGNED_LIST]] to %struct.HVA*
692  __builtin_va_list thelist;
693  __builtin_va_start(thelist, n);
694  struct HVA h = __builtin_va_arg(thelist, struct HVA);
695  return h.b;
696}
697
698struct TooBigHVA {
699  int32x4_t a, b, c, d, e;
700};
701
702int32x4_t test_toobig_hva(int n, ...) {
703// CHECK-LABEL: define <4 x i32> @test_toobig_hva(i32 %n, ...)
704// CHECK: [[THELIST:%.*]] = alloca i8*
705// CHECK: [[CURLIST:%.*]] = load i8** [[THELIST]]
706
707  // TooBigHVA is not actually an HVA, so gets passed indirectly. Only 8 bytes
708  // of stack consumed.
709// CHECK: [[NEXTLIST:%.*]] = getelementptr i8* [[CURLIST]], i32 8
710// CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]]
711
712// CHECK: [[HVAPTRPTR:%.*]] = bitcast i8* [[CURLIST]] to i8**
713// CHECK: [[HVAPTR:%.*]] = load i8** [[HVAPTRPTR]]
714// CHECK: bitcast i8* [[HVAPTR]] to %struct.TooBigHVA*
715  __builtin_va_list thelist;
716  __builtin_va_start(thelist, n);
717  struct TooBigHVA h = __builtin_va_arg(thelist, struct TooBigHVA);
718  return h.d;
719}
720