1// RUN: %clang_cc1 %s -emit-llvm -o - -triple=armv5-unknown-freebsd -std=c11 | FileCheck %s
2
3// Test that we are generating atomicrmw instructions, rather than
4// compare-exchange loops for common atomic ops.  This makes a big difference
5// on RISC platforms, where the compare-exchange loop becomes a ll/sc pair for
6// the load and then another ll/sc in the loop, expanding to about 30
7// instructions when it should be only 4.  It has a smaller, but still
8// noticeable, impact on platforms like x86 and RISC-V, where there are atomic
9// RMW instructions.
10//
11// We currently emit cmpxchg loops for most operations on _Bools, because
12// they're sufficiently rare that it's not worth making sure that the semantics
13// are correct.
14
15typedef int __attribute__((vector_size(16))) vector;
16
17_Atomic(_Bool) b;
18_Atomic(int) i;
19_Atomic(long long) l;
20_Atomic(short) s;
21_Atomic(char*) p;
22_Atomic(float) f;
23_Atomic(vector) v;
24
25// CHECK: testinc
26void testinc(void)
27{
28  // Special case for suffix bool++, sets to true and returns the old value.
29  // CHECK: atomicrmw xchg i8* @b, i8 1 seq_cst
30  b++;
31  // CHECK: atomicrmw add i32* @i, i32 1 seq_cst
32  i++;
33  // CHECK: atomicrmw add i64* @l, i64 1 seq_cst
34  l++;
35  // CHECK: atomicrmw add i16* @s, i16 1 seq_cst
36  s++;
37  // Prefix increment
38  // Special case for bool: set to true and return true
39  // CHECK: store atomic i8 1, i8* @b seq_cst, align 1
40  ++b;
41  // Currently, we have no variant of atomicrmw that returns the new value, so
42  // we have to generate an atomic add, which returns the old value, and then a
43  // non-atomic add.
44  // CHECK: atomicrmw add i32* @i, i32 1 seq_cst
45  // CHECK: add i32
46  ++i;
47  // CHECK: atomicrmw add i64* @l, i64 1 seq_cst
48  // CHECK: add i64
49  ++l;
50  // CHECK: atomicrmw add i16* @s, i16 1 seq_cst
51  // CHECK: add i16
52  ++s;
53}
54// CHECK: testdec
55void testdec(void)
56{
57  // CHECK: cmpxchg i8* @b
58  b--;
59  // CHECK: atomicrmw sub i32* @i, i32 1 seq_cst
60  i--;
61  // CHECK: atomicrmw sub i64* @l, i64 1 seq_cst
62  l--;
63  // CHECK: atomicrmw sub i16* @s, i16 1 seq_cst
64  s--;
65  // CHECK: cmpxchg i8* @b
66  --b;
67  // CHECK: atomicrmw sub i32* @i, i32 1 seq_cst
68  // CHECK: sub i32
69  --i;
70  // CHECK: atomicrmw sub i64* @l, i64 1 seq_cst
71  // CHECK: sub i64
72  --l;
73  // CHECK: atomicrmw sub i16* @s, i16 1 seq_cst
74  // CHECK: sub i16
75  --s;
76}
77// CHECK: testaddeq
78void testaddeq(void)
79{
80  // CHECK: cmpxchg i8* @b
81  // CHECK: atomicrmw add i32* @i, i32 42 seq_cst
82  // CHECK: atomicrmw add i64* @l, i64 42 seq_cst
83  // CHECK: atomicrmw add i16* @s, i16 42 seq_cst
84  b += 42;
85  i += 42;
86  l += 42;
87  s += 42;
88}
89// CHECK: testsubeq
90void testsubeq(void)
91{
92  // CHECK: cmpxchg i8* @b
93  // CHECK: atomicrmw sub i32* @i, i32 42 seq_cst
94  // CHECK: atomicrmw sub i64* @l, i64 42 seq_cst
95  // CHECK: atomicrmw sub i16* @s, i16 42 seq_cst
96  b -= 42;
97  i -= 42;
98  l -= 42;
99  s -= 42;
100}
101// CHECK: testxoreq
102void testxoreq(void)
103{
104  // CHECK: cmpxchg i8* @b
105  // CHECK: atomicrmw xor i32* @i, i32 42 seq_cst
106  // CHECK: atomicrmw xor i64* @l, i64 42 seq_cst
107  // CHECK: atomicrmw xor i16* @s, i16 42 seq_cst
108  b ^= 42;
109  i ^= 42;
110  l ^= 42;
111  s ^= 42;
112}
113// CHECK: testoreq
114void testoreq(void)
115{
116  // CHECK: cmpxchg i8* @b
117  // CHECK: atomicrmw or i32* @i, i32 42 seq_cst
118  // CHECK: atomicrmw or i64* @l, i64 42 seq_cst
119  // CHECK: atomicrmw or i16* @s, i16 42 seq_cst
120  b |= 42;
121  i |= 42;
122  l |= 42;
123  s |= 42;
124}
125// CHECK: testandeq
126void testandeq(void)
127{
128  // CHECK: cmpxchg i8* @b
129  // CHECK: atomicrmw and i32* @i, i32 42 seq_cst
130  // CHECK: atomicrmw and i64* @l, i64 42 seq_cst
131  // CHECK: atomicrmw and i16* @s, i16 42 seq_cst
132  b &= 42;
133  i &= 42;
134  l &= 42;
135  s &= 42;
136}
137
138// CHECK-LABEL: define arm_aapcscc void @testFloat(float*
139void testFloat(_Atomic(float) *fp) {
140// CHECK:      [[FP:%.*]] = alloca float*
141// CHECK-NEXT: [[X:%.*]] = alloca float
142// CHECK-NEXT: [[F:%.*]] = alloca float
143// CHECK-NEXT: [[TMP0:%.*]] = alloca float
144// CHECK-NEXT: [[TMP1:%.*]] = alloca float
145// CHECK-NEXT: store float* {{%.*}}, float** [[FP]]
146
147// CHECK-NEXT: [[T0:%.*]] = load float** [[FP]]
148// CHECK-NEXT: store float 1.000000e+00, float* [[T0]], align 4
149  __c11_atomic_init(fp, 1.0f);
150
151// CHECK-NEXT: store float 2.000000e+00, float* [[X]], align 4
152  _Atomic(float) x = 2.0f;
153
154// CHECK-NEXT: [[T0:%.*]] = load float** [[FP]]
155// CHECK-NEXT: [[T1:%.*]] = bitcast float* [[T0]] to i8*
156// CHECK-NEXT: [[T2:%.*]] = bitcast float* [[TMP0]] to i8*
157// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 4, i8* [[T1]], i8* [[T2]], i32 5)
158// CHECK-NEXT: [[T3:%.*]] = load float* [[TMP0]], align 4
159// CHECK-NEXT: store float [[T3]], float* [[F]]
160  float f = *fp;
161
162// CHECK-NEXT: [[T0:%.*]] = load float* [[F]], align 4
163// CHECK-NEXT: [[T1:%.*]] = load float** [[FP]], align 4
164// CHECK-NEXT: store float [[T0]], float* [[TMP1]], align 4
165// CHECK-NEXT: [[T2:%.*]] = bitcast float* [[T1]] to i8*
166// CHECK-NEXT: [[T3:%.*]] = bitcast float* [[TMP1]] to i8*
167// CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 4, i8* [[T2]], i8* [[T3]], i32 5)
168  *fp = f;
169
170// CHECK-NEXT: ret void
171}
172
173// CHECK: define arm_aapcscc void @testComplexFloat([[CF:{ float, float }]]*
174void testComplexFloat(_Atomic(_Complex float) *fp) {
175// CHECK:      [[FP:%.*]] = alloca [[CF]]*, align 4
176// CHECK-NEXT: [[X:%.*]] = alloca [[CF]], align 8
177// CHECK-NEXT: [[F:%.*]] = alloca [[CF]], align 4
178// CHECK-NEXT: [[TMP0:%.*]] = alloca [[CF]], align 8
179// CHECK-NEXT: [[TMP1:%.*]] = alloca [[CF]], align 8
180// CHECK-NEXT: store [[CF]]*
181
182// CHECK-NEXT: [[P:%.*]] = load [[CF]]** [[FP]]
183// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[P]], i32 0, i32 0
184// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[P]], i32 0, i32 1
185// CHECK-NEXT: store float 1.000000e+00, float* [[T0]]
186// CHECK-NEXT: store float 0.000000e+00, float* [[T1]]
187  __c11_atomic_init(fp, 1.0f);
188
189// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[X]], i32 0, i32 0
190// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[X]], i32 0, i32 1
191// CHECK-NEXT: store float 2.000000e+00, float* [[T0]]
192// CHECK-NEXT: store float 0.000000e+00, float* [[T1]]
193  _Atomic(_Complex float) x = 2.0f;
194
195// CHECK-NEXT: [[T0:%.*]] = load [[CF]]** [[FP]]
196// CHECK-NEXT: [[T1:%.*]] = bitcast [[CF]]* [[T0]] to i8*
197// CHECK-NEXT: [[T2:%.*]] = bitcast [[CF]]* [[TMP0]] to i8*
198// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 8, i8* [[T1]], i8* [[T2]], i32 5)
199// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[TMP0]], i32 0, i32 0
200// CHECK-NEXT: [[R:%.*]] = load float* [[T0]]
201// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[TMP0]], i32 0, i32 1
202// CHECK-NEXT: [[I:%.*]] = load float* [[T0]]
203// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 0
204// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 1
205// CHECK-NEXT: store float [[R]], float* [[T0]]
206// CHECK-NEXT: store float [[I]], float* [[T1]]
207  _Complex float f = *fp;
208
209// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 0
210// CHECK-NEXT: [[R:%.*]] = load float* [[T0]]
211// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 1
212// CHECK-NEXT: [[I:%.*]] = load float* [[T0]]
213// CHECK-NEXT: [[DEST:%.*]] = load [[CF]]** [[FP]], align 4
214// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[TMP1]], i32 0, i32 0
215// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[TMP1]], i32 0, i32 1
216// CHECK-NEXT: store float [[R]], float* [[T0]]
217// CHECK-NEXT: store float [[I]], float* [[T1]]
218// CHECK-NEXT: [[T0:%.*]] = bitcast [[CF]]* [[DEST]] to i8*
219// CHECK-NEXT: [[T1:%.*]] = bitcast [[CF]]* [[TMP1]] to i8*
220// CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 8, i8* [[T0]], i8* [[T1]], i32 5)
221  *fp = f;
222
223// CHECK-NEXT: ret void
224}
225
226typedef struct { short x, y, z, w; } S;
227// CHECK: define arm_aapcscc void @testStruct([[S:.*]]*
228void testStruct(_Atomic(S) *fp) {
229// CHECK:      [[FP:%.*]] = alloca [[S]]*, align 4
230// CHECK-NEXT: [[X:%.*]] = alloca [[S]], align 8
231// CHECK-NEXT: [[F:%.*]] = alloca [[S:%.*]], align 2
232// CHECK-NEXT: [[TMP0:%.*]] = alloca [[S]], align 8
233// CHECK-NEXT: store [[S]]*
234
235// CHECK-NEXT: [[P:%.*]] = load [[S]]** [[FP]]
236// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 0
237// CHECK-NEXT: store i16 1, i16* [[T0]], align 2
238// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 1
239// CHECK-NEXT: store i16 2, i16* [[T0]], align 2
240// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 2
241// CHECK-NEXT: store i16 3, i16* [[T0]], align 2
242// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 3
243// CHECK-NEXT: store i16 4, i16* [[T0]], align 2
244  __c11_atomic_init(fp, (S){1,2,3,4});
245
246// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 0
247// CHECK-NEXT: store i16 1, i16* [[T0]], align 2
248// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 1
249// CHECK-NEXT: store i16 2, i16* [[T0]], align 2
250// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 2
251// CHECK-NEXT: store i16 3, i16* [[T0]], align 2
252// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 3
253// CHECK-NEXT: store i16 4, i16* [[T0]], align 2
254  _Atomic(S) x = (S){1,2,3,4};
255
256// CHECK-NEXT: [[T0:%.*]] = load [[S]]** [[FP]]
257// CHECK-NEXT: [[T1:%.*]] = bitcast [[S]]* [[T0]] to i8*
258// CHECK-NEXT: [[T2:%.*]] = bitcast [[S]]* [[F]] to i8*
259// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 8, i8* [[T1]], i8* [[T2]], i32 5)
260  S f = *fp;
261
262// CHECK-NEXT: [[T0:%.*]] = load [[S]]** [[FP]]
263// CHECK-NEXT: [[T1:%.*]] = bitcast [[S]]* [[TMP0]] to i8*
264// CHECK-NEXT: [[T2:%.*]] = bitcast [[S]]* [[F]] to i8*
265// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 8, i32 2, i1 false)
266// CHECK-NEXT: [[T3:%.*]] = bitcast [[S]]* [[T0]] to i8*
267// CHECK-NEXT: [[T4:%.*]] = bitcast [[S]]* [[TMP0]] to i8*
268// CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 8, i8* [[T3]], i8* [[T4]], i32 5)
269  *fp = f;
270
271// CHECK-NEXT: ret void
272}
273
274typedef struct { short x, y, z; } PS;
275// CHECK: define arm_aapcscc void @testPromotedStruct([[APS:.*]]*
276void testPromotedStruct(_Atomic(PS) *fp) {
277// CHECK:      [[FP:%.*]] = alloca [[APS]]*, align 4
278// CHECK-NEXT: [[X:%.*]] = alloca [[APS]], align 8
279// CHECK-NEXT: [[F:%.*]] = alloca [[PS:%.*]], align 2
280// CHECK-NEXT: [[TMP0:%.*]] = alloca [[APS]], align 8
281// CHECK-NEXT: [[TMP1:%.*]] = alloca [[APS]], align 8
282// CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
283// CHECK-NEXT: [[TMP2:%.*]] = alloca %struct.PS, align 2
284// CHECK-NEXT: [[TMP3:%.*]] = alloca [[APS]], align 8
285// CHECK-NEXT: store [[APS]]*
286
287// CHECK-NEXT: [[P:%.*]] = load [[APS]]** [[FP]]
288// CHECK-NEXT: [[T0:%.*]] = bitcast [[APS]]* [[P]] to i8*
289// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false)
290// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[P]], i32 0, i32 0
291// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 0
292// CHECK-NEXT: store i16 1, i16* [[T1]], align 2
293// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 1
294// CHECK-NEXT: store i16 2, i16* [[T1]], align 2
295// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 2
296// CHECK-NEXT: store i16 3, i16* [[T1]], align 2
297  __c11_atomic_init(fp, (PS){1,2,3});
298
299// CHECK-NEXT: [[T0:%.*]] = bitcast [[APS]]* [[X]] to i8*
300// CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* [[T0]], i8 0, i32 8, i32 8, i1 false)
301// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[X]], i32 0, i32 0
302// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 0
303// CHECK-NEXT: store i16 1, i16* [[T1]], align 2
304// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 1
305// CHECK-NEXT: store i16 2, i16* [[T1]], align 2
306// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 2
307// CHECK-NEXT: store i16 3, i16* [[T1]], align 2
308  _Atomic(PS) x = (PS){1,2,3};
309
310// CHECK-NEXT: [[T0:%.*]] = load [[APS]]** [[FP]]
311// CHECK-NEXT: [[T1:%.*]] = bitcast [[APS]]* [[T0]] to i8*
312// CHECK-NEXT: [[T2:%.*]] = bitcast [[APS]]* [[TMP0]] to i8*
313// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 8, i8* [[T1]], i8* [[T2]], i32 5)
314// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[TMP0]], i32 0, i32 0
315// CHECK-NEXT: [[T1:%.*]] = bitcast [[PS]]* [[F]] to i8*
316// CHECK-NEXT: [[T2:%.*]] = bitcast [[PS]]* [[T0]] to i8*
317// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 6, i32 2, i1 false)
318  PS f = *fp;
319
320// CHECK-NEXT: [[T0:%.*]] = load [[APS]]** [[FP]]
321// CHECK-NEXT: [[T1:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[TMP1]] to i8*
322// CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* [[T1]], i8 0, i32 8, i32 8, i1 false)
323// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[APS]]* [[TMP1]], i32 0, i32 0
324// CHECK-NEXT: [[T2:%.*]] = bitcast [[PS]]* [[T1]] to i8*
325// CHECK-NEXT: [[T3:%.*]] = bitcast [[PS]]* [[F]] to i8*
326// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T2]], i8* [[T3]], i32 6, i32 2, i1 false)
327// CHECK-NEXT: [[T4:%.*]] = bitcast [[APS]]* [[T0]] to i8*
328// CHECK-NEXT: [[T5:%.*]] = bitcast [[APS]]* [[TMP1]] to i8*
329// CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 8, i8* [[T4]], i8* [[T5]], i32 5)
330  *fp = f;
331
332// CHECK-NEXT: [[T0:%.*]] = load [[APS]]** [[FP]], align 4
333// CHECK-NEXT: [[T1:%.*]] = bitcast [[APS]]* [[T0]] to i8*
334// CHECK-NEXT: [[T2:%.*]] = bitcast [[APS]]* [[TMP3]] to i8*
335// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 8, i8* [[T1]], i8* [[T2]], i32 5)
336// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[TMP3]], i32 0, i32 0
337// CHECK-NEXT: [[T1:%.*]] = bitcast %struct.PS* [[TMP2]] to i8*
338// CHECK-NEXT: [[T2:%.*]] = bitcast %struct.PS* [[T0]] to i8*
339// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 6, i32 2, i1 false)
340// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds %struct.PS* [[TMP2]], i32 0, i32 0
341// CHECK-NEXT: [[T1:%.*]] = load i16* [[T0]], align 2
342// CHECK-NEXT: [[T2:%.*]] = sext i16 [[T1]] to i32
343// CHECK-NEXT: store i32 [[T2]], i32* [[A]], align 4
344  int a = ((PS)*fp).x;
345
346// CHECK-NEXT: ret void
347}
348
349// CHECK: define arm_aapcscc void @testPromotedStructOps([[APS:.*]]*
350
351// FIXME: none of these look right, but we can leave the "test" here
352// to make sure they at least don't crash.
353void testPromotedStructOps(_Atomic(PS) *p) {
354  PS a = __c11_atomic_load(p, 5);
355  __c11_atomic_store(p, a, 5);
356  PS b = __c11_atomic_exchange(p, a, 5);
357  _Bool v = __c11_atomic_compare_exchange_strong(p, &b, a, 5, 5);
358  v = __c11_atomic_compare_exchange_weak(p, &b, a, 5, 5);
359}
360