1// RUN: %clang_cc1 %s -emit-llvm -o - -triple=armv5-unknown-freebsd -std=c11 | FileCheck %s 2 3// Test that we are generating atomicrmw instructions, rather than 4// compare-exchange loops for common atomic ops. This makes a big difference 5// on RISC platforms, where the compare-exchange loop becomes a ll/sc pair for 6// the load and then another ll/sc in the loop, expanding to about 30 7// instructions when it should be only 4. It has a smaller, but still 8// noticeable, impact on platforms like x86 and RISC-V, where there are atomic 9// RMW instructions. 10// 11// We currently emit cmpxchg loops for most operations on _Bools, because 12// they're sufficiently rare that it's not worth making sure that the semantics 13// are correct. 14 15typedef int __attribute__((vector_size(16))) vector; 16 17_Atomic(_Bool) b; 18_Atomic(int) i; 19_Atomic(long long) l; 20_Atomic(short) s; 21_Atomic(char*) p; 22_Atomic(float) f; 23_Atomic(vector) v; 24 25// CHECK: testinc 26void testinc(void) 27{ 28 // Special case for suffix bool++, sets to true and returns the old value. 29 // CHECK: atomicrmw xchg i8* @b, i8 1 seq_cst 30 b++; 31 // CHECK: atomicrmw add i32* @i, i32 1 seq_cst 32 i++; 33 // CHECK: atomicrmw add i64* @l, i64 1 seq_cst 34 l++; 35 // CHECK: atomicrmw add i16* @s, i16 1 seq_cst 36 s++; 37 // Prefix increment 38 // Special case for bool: set to true and return true 39 // CHECK: store atomic i8 1, i8* @b seq_cst, align 1 40 ++b; 41 // Currently, we have no variant of atomicrmw that returns the new value, so 42 // we have to generate an atomic add, which returns the old value, and then a 43 // non-atomic add. 44 // CHECK: atomicrmw add i32* @i, i32 1 seq_cst 45 // CHECK: add i32 46 ++i; 47 // CHECK: atomicrmw add i64* @l, i64 1 seq_cst 48 // CHECK: add i64 49 ++l; 50 // CHECK: atomicrmw add i16* @s, i16 1 seq_cst 51 // CHECK: add i16 52 ++s; 53} 54// CHECK: testdec 55void testdec(void) 56{ 57 // CHECK: cmpxchg i8* @b 58 b--; 59 // CHECK: atomicrmw sub i32* @i, i32 1 seq_cst 60 i--; 61 // CHECK: atomicrmw sub i64* @l, i64 1 seq_cst 62 l--; 63 // CHECK: atomicrmw sub i16* @s, i16 1 seq_cst 64 s--; 65 // CHECK: cmpxchg i8* @b 66 --b; 67 // CHECK: atomicrmw sub i32* @i, i32 1 seq_cst 68 // CHECK: sub i32 69 --i; 70 // CHECK: atomicrmw sub i64* @l, i64 1 seq_cst 71 // CHECK: sub i64 72 --l; 73 // CHECK: atomicrmw sub i16* @s, i16 1 seq_cst 74 // CHECK: sub i16 75 --s; 76} 77// CHECK: testaddeq 78void testaddeq(void) 79{ 80 // CHECK: cmpxchg i8* @b 81 // CHECK: atomicrmw add i32* @i, i32 42 seq_cst 82 // CHECK: atomicrmw add i64* @l, i64 42 seq_cst 83 // CHECK: atomicrmw add i16* @s, i16 42 seq_cst 84 b += 42; 85 i += 42; 86 l += 42; 87 s += 42; 88} 89// CHECK: testsubeq 90void testsubeq(void) 91{ 92 // CHECK: cmpxchg i8* @b 93 // CHECK: atomicrmw sub i32* @i, i32 42 seq_cst 94 // CHECK: atomicrmw sub i64* @l, i64 42 seq_cst 95 // CHECK: atomicrmw sub i16* @s, i16 42 seq_cst 96 b -= 42; 97 i -= 42; 98 l -= 42; 99 s -= 42; 100} 101// CHECK: testxoreq 102void testxoreq(void) 103{ 104 // CHECK: cmpxchg i8* @b 105 // CHECK: atomicrmw xor i32* @i, i32 42 seq_cst 106 // CHECK: atomicrmw xor i64* @l, i64 42 seq_cst 107 // CHECK: atomicrmw xor i16* @s, i16 42 seq_cst 108 b ^= 42; 109 i ^= 42; 110 l ^= 42; 111 s ^= 42; 112} 113// CHECK: testoreq 114void testoreq(void) 115{ 116 // CHECK: cmpxchg i8* @b 117 // CHECK: atomicrmw or i32* @i, i32 42 seq_cst 118 // CHECK: atomicrmw or i64* @l, i64 42 seq_cst 119 // CHECK: atomicrmw or i16* @s, i16 42 seq_cst 120 b |= 42; 121 i |= 42; 122 l |= 42; 123 s |= 42; 124} 125// CHECK: testandeq 126void testandeq(void) 127{ 128 // CHECK: cmpxchg i8* @b 129 // CHECK: atomicrmw and i32* @i, i32 42 seq_cst 130 // CHECK: atomicrmw and i64* @l, i64 42 seq_cst 131 // CHECK: atomicrmw and i16* @s, i16 42 seq_cst 132 b &= 42; 133 i &= 42; 134 l &= 42; 135 s &= 42; 136} 137 138// CHECK-LABEL: define arm_aapcscc void @testFloat(float* 139void testFloat(_Atomic(float) *fp) { 140// CHECK: [[FP:%.*]] = alloca float* 141// CHECK-NEXT: [[X:%.*]] = alloca float 142// CHECK-NEXT: [[F:%.*]] = alloca float 143// CHECK-NEXT: [[TMP0:%.*]] = alloca float 144// CHECK-NEXT: [[TMP1:%.*]] = alloca float 145// CHECK-NEXT: store float* {{%.*}}, float** [[FP]] 146 147// CHECK-NEXT: [[T0:%.*]] = load float** [[FP]] 148// CHECK-NEXT: store float 1.000000e+00, float* [[T0]], align 4 149 __c11_atomic_init(fp, 1.0f); 150 151// CHECK-NEXT: store float 2.000000e+00, float* [[X]], align 4 152 _Atomic(float) x = 2.0f; 153 154// CHECK-NEXT: [[T0:%.*]] = load float** [[FP]] 155// CHECK-NEXT: [[T1:%.*]] = bitcast float* [[T0]] to i8* 156// CHECK-NEXT: [[T2:%.*]] = bitcast float* [[TMP0]] to i8* 157// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 4, i8* [[T1]], i8* [[T2]], i32 5) 158// CHECK-NEXT: [[T3:%.*]] = load float* [[TMP0]], align 4 159// CHECK-NEXT: store float [[T3]], float* [[F]] 160 float f = *fp; 161 162// CHECK-NEXT: [[T0:%.*]] = load float* [[F]], align 4 163// CHECK-NEXT: [[T1:%.*]] = load float** [[FP]], align 4 164// CHECK-NEXT: store float [[T0]], float* [[TMP1]], align 4 165// CHECK-NEXT: [[T2:%.*]] = bitcast float* [[T1]] to i8* 166// CHECK-NEXT: [[T3:%.*]] = bitcast float* [[TMP1]] to i8* 167// CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 4, i8* [[T2]], i8* [[T3]], i32 5) 168 *fp = f; 169 170// CHECK-NEXT: ret void 171} 172 173// CHECK: define arm_aapcscc void @testComplexFloat([[CF:{ float, float }]]* 174void testComplexFloat(_Atomic(_Complex float) *fp) { 175// CHECK: [[FP:%.*]] = alloca [[CF]]*, align 4 176// CHECK-NEXT: [[X:%.*]] = alloca [[CF]], align 8 177// CHECK-NEXT: [[F:%.*]] = alloca [[CF]], align 4 178// CHECK-NEXT: [[TMP0:%.*]] = alloca [[CF]], align 8 179// CHECK-NEXT: [[TMP1:%.*]] = alloca [[CF]], align 8 180// CHECK-NEXT: store [[CF]]* 181 182// CHECK-NEXT: [[P:%.*]] = load [[CF]]** [[FP]] 183// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[P]], i32 0, i32 0 184// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[P]], i32 0, i32 1 185// CHECK-NEXT: store float 1.000000e+00, float* [[T0]] 186// CHECK-NEXT: store float 0.000000e+00, float* [[T1]] 187 __c11_atomic_init(fp, 1.0f); 188 189// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[X]], i32 0, i32 0 190// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[X]], i32 0, i32 1 191// CHECK-NEXT: store float 2.000000e+00, float* [[T0]] 192// CHECK-NEXT: store float 0.000000e+00, float* [[T1]] 193 _Atomic(_Complex float) x = 2.0f; 194 195// CHECK-NEXT: [[T0:%.*]] = load [[CF]]** [[FP]] 196// CHECK-NEXT: [[T1:%.*]] = bitcast [[CF]]* [[T0]] to i8* 197// CHECK-NEXT: [[T2:%.*]] = bitcast [[CF]]* [[TMP0]] to i8* 198// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 8, i8* [[T1]], i8* [[T2]], i32 5) 199// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[TMP0]], i32 0, i32 0 200// CHECK-NEXT: [[R:%.*]] = load float* [[T0]] 201// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[TMP0]], i32 0, i32 1 202// CHECK-NEXT: [[I:%.*]] = load float* [[T0]] 203// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 0 204// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 1 205// CHECK-NEXT: store float [[R]], float* [[T0]] 206// CHECK-NEXT: store float [[I]], float* [[T1]] 207 _Complex float f = *fp; 208 209// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 0 210// CHECK-NEXT: [[R:%.*]] = load float* [[T0]] 211// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[F]], i32 0, i32 1 212// CHECK-NEXT: [[I:%.*]] = load float* [[T0]] 213// CHECK-NEXT: [[DEST:%.*]] = load [[CF]]** [[FP]], align 4 214// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[CF]]* [[TMP1]], i32 0, i32 0 215// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[CF]]* [[TMP1]], i32 0, i32 1 216// CHECK-NEXT: store float [[R]], float* [[T0]] 217// CHECK-NEXT: store float [[I]], float* [[T1]] 218// CHECK-NEXT: [[T0:%.*]] = bitcast [[CF]]* [[DEST]] to i8* 219// CHECK-NEXT: [[T1:%.*]] = bitcast [[CF]]* [[TMP1]] to i8* 220// CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 8, i8* [[T0]], i8* [[T1]], i32 5) 221 *fp = f; 222 223// CHECK-NEXT: ret void 224} 225 226typedef struct { short x, y, z, w; } S; 227// CHECK: define arm_aapcscc void @testStruct([[S:.*]]* 228void testStruct(_Atomic(S) *fp) { 229// CHECK: [[FP:%.*]] = alloca [[S]]*, align 4 230// CHECK-NEXT: [[X:%.*]] = alloca [[S]], align 8 231// CHECK-NEXT: [[F:%.*]] = alloca [[S:%.*]], align 2 232// CHECK-NEXT: [[TMP0:%.*]] = alloca [[S]], align 8 233// CHECK-NEXT: store [[S]]* 234 235// CHECK-NEXT: [[P:%.*]] = load [[S]]** [[FP]] 236// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 0 237// CHECK-NEXT: store i16 1, i16* [[T0]], align 2 238// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 1 239// CHECK-NEXT: store i16 2, i16* [[T0]], align 2 240// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 2 241// CHECK-NEXT: store i16 3, i16* [[T0]], align 2 242// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[P]], i32 0, i32 3 243// CHECK-NEXT: store i16 4, i16* [[T0]], align 2 244 __c11_atomic_init(fp, (S){1,2,3,4}); 245 246// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 0 247// CHECK-NEXT: store i16 1, i16* [[T0]], align 2 248// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 1 249// CHECK-NEXT: store i16 2, i16* [[T0]], align 2 250// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 2 251// CHECK-NEXT: store i16 3, i16* [[T0]], align 2 252// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]]* [[X]], i32 0, i32 3 253// CHECK-NEXT: store i16 4, i16* [[T0]], align 2 254 _Atomic(S) x = (S){1,2,3,4}; 255 256// CHECK-NEXT: [[T0:%.*]] = load [[S]]** [[FP]] 257// CHECK-NEXT: [[T1:%.*]] = bitcast [[S]]* [[T0]] to i8* 258// CHECK-NEXT: [[T2:%.*]] = bitcast [[S]]* [[F]] to i8* 259// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 8, i8* [[T1]], i8* [[T2]], i32 5) 260 S f = *fp; 261 262// CHECK-NEXT: [[T0:%.*]] = load [[S]]** [[FP]] 263// CHECK-NEXT: [[T1:%.*]] = bitcast [[S]]* [[TMP0]] to i8* 264// CHECK-NEXT: [[T2:%.*]] = bitcast [[S]]* [[F]] to i8* 265// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 8, i32 2, i1 false) 266// CHECK-NEXT: [[T3:%.*]] = bitcast [[S]]* [[T0]] to i8* 267// CHECK-NEXT: [[T4:%.*]] = bitcast [[S]]* [[TMP0]] to i8* 268// CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 8, i8* [[T3]], i8* [[T4]], i32 5) 269 *fp = f; 270 271// CHECK-NEXT: ret void 272} 273 274typedef struct { short x, y, z; } PS; 275// CHECK: define arm_aapcscc void @testPromotedStruct([[APS:.*]]* 276void testPromotedStruct(_Atomic(PS) *fp) { 277// CHECK: [[FP:%.*]] = alloca [[APS]]*, align 4 278// CHECK-NEXT: [[X:%.*]] = alloca [[APS]], align 8 279// CHECK-NEXT: [[F:%.*]] = alloca [[PS:%.*]], align 2 280// CHECK-NEXT: [[TMP0:%.*]] = alloca [[APS]], align 8 281// CHECK-NEXT: [[TMP1:%.*]] = alloca [[APS]], align 8 282// CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 283// CHECK-NEXT: [[TMP2:%.*]] = alloca %struct.PS, align 2 284// CHECK-NEXT: [[TMP3:%.*]] = alloca [[APS]], align 8 285// CHECK-NEXT: store [[APS]]* 286 287// CHECK-NEXT: [[P:%.*]] = load [[APS]]** [[FP]] 288// CHECK-NEXT: [[T0:%.*]] = bitcast [[APS]]* [[P]] to i8* 289// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false) 290// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[P]], i32 0, i32 0 291// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 0 292// CHECK-NEXT: store i16 1, i16* [[T1]], align 2 293// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 1 294// CHECK-NEXT: store i16 2, i16* [[T1]], align 2 295// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 2 296// CHECK-NEXT: store i16 3, i16* [[T1]], align 2 297 __c11_atomic_init(fp, (PS){1,2,3}); 298 299// CHECK-NEXT: [[T0:%.*]] = bitcast [[APS]]* [[X]] to i8* 300// CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* [[T0]], i8 0, i32 8, i32 8, i1 false) 301// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[X]], i32 0, i32 0 302// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 0 303// CHECK-NEXT: store i16 1, i16* [[T1]], align 2 304// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 1 305// CHECK-NEXT: store i16 2, i16* [[T1]], align 2 306// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]]* [[T0]], i32 0, i32 2 307// CHECK-NEXT: store i16 3, i16* [[T1]], align 2 308 _Atomic(PS) x = (PS){1,2,3}; 309 310// CHECK-NEXT: [[T0:%.*]] = load [[APS]]** [[FP]] 311// CHECK-NEXT: [[T1:%.*]] = bitcast [[APS]]* [[T0]] to i8* 312// CHECK-NEXT: [[T2:%.*]] = bitcast [[APS]]* [[TMP0]] to i8* 313// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 8, i8* [[T1]], i8* [[T2]], i32 5) 314// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[TMP0]], i32 0, i32 0 315// CHECK-NEXT: [[T1:%.*]] = bitcast [[PS]]* [[F]] to i8* 316// CHECK-NEXT: [[T2:%.*]] = bitcast [[PS]]* [[T0]] to i8* 317// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 6, i32 2, i1 false) 318 PS f = *fp; 319 320// CHECK-NEXT: [[T0:%.*]] = load [[APS]]** [[FP]] 321// CHECK-NEXT: [[T1:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[TMP1]] to i8* 322// CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* [[T1]], i8 0, i32 8, i32 8, i1 false) 323// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[APS]]* [[TMP1]], i32 0, i32 0 324// CHECK-NEXT: [[T2:%.*]] = bitcast [[PS]]* [[T1]] to i8* 325// CHECK-NEXT: [[T3:%.*]] = bitcast [[PS]]* [[F]] to i8* 326// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T2]], i8* [[T3]], i32 6, i32 2, i1 false) 327// CHECK-NEXT: [[T4:%.*]] = bitcast [[APS]]* [[T0]] to i8* 328// CHECK-NEXT: [[T5:%.*]] = bitcast [[APS]]* [[TMP1]] to i8* 329// CHECK-NEXT: call arm_aapcscc void @__atomic_store(i32 8, i8* [[T4]], i8* [[T5]], i32 5) 330 *fp = f; 331 332// CHECK-NEXT: [[T0:%.*]] = load [[APS]]** [[FP]], align 4 333// CHECK-NEXT: [[T1:%.*]] = bitcast [[APS]]* [[T0]] to i8* 334// CHECK-NEXT: [[T2:%.*]] = bitcast [[APS]]* [[TMP3]] to i8* 335// CHECK-NEXT: call arm_aapcscc void @__atomic_load(i32 8, i8* [[T1]], i8* [[T2]], i32 5) 336// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]]* [[TMP3]], i32 0, i32 0 337// CHECK-NEXT: [[T1:%.*]] = bitcast %struct.PS* [[TMP2]] to i8* 338// CHECK-NEXT: [[T2:%.*]] = bitcast %struct.PS* [[T0]] to i8* 339// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[T1]], i8* [[T2]], i32 6, i32 2, i1 false) 340// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds %struct.PS* [[TMP2]], i32 0, i32 0 341// CHECK-NEXT: [[T1:%.*]] = load i16* [[T0]], align 2 342// CHECK-NEXT: [[T2:%.*]] = sext i16 [[T1]] to i32 343// CHECK-NEXT: store i32 [[T2]], i32* [[A]], align 4 344 int a = ((PS)*fp).x; 345 346// CHECK-NEXT: ret void 347} 348 349// CHECK: define arm_aapcscc void @testPromotedStructOps([[APS:.*]]* 350 351// FIXME: none of these look right, but we can leave the "test" here 352// to make sure they at least don't crash. 353void testPromotedStructOps(_Atomic(PS) *p) { 354 PS a = __c11_atomic_load(p, 5); 355 __c11_atomic_store(p, a, 5); 356 PS b = __c11_atomic_exchange(p, a, 5); 357 _Bool v = __c11_atomic_compare_exchange_strong(p, &b, a, 5, 5); 358 v = __c11_atomic_compare_exchange_weak(p, &b, a, 5, 5); 359} 360