1// REQUIRES: aarch64-registered-target 2// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ 3// RUN: -ffp-contract=fast -emit-llvm -o - %s | opt -S -mem2reg \ 4// RUN: | FileCheck %s 5 6// Test new aarch64 intrinsics with poly128 7// FIXME: Currently, poly128_t equals to uint128, which will be spilt into 8// two 64-bit GPR(eg X0, X1). Now moving data from X0, X1 to FPR128 will 9// introduce 2 store and 1 load instructions(store X0, X1 to memory and 10// then load back to Q0). If target has NEON, this is better replaced by 11// FMOV or INS. 12 13#include <arm_neon.h> 14 15// CHECK-LABEL: define void @test_vstrq_p128(i128* %ptr, i128 %val) #0 { 16// CHECK: [[TMP0:%.*]] = bitcast i128* %ptr to i8* 17// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i128* 18// CHECK: store i128 %val, i128* [[TMP1]] 19// CHECK: ret void 20void test_vstrq_p128(poly128_t * ptr, poly128_t val) { 21 vstrq_p128(ptr, val); 22 23} 24 25// CHECK-LABEL: define i128 @test_vldrq_p128(i128* %ptr) #0 { 26// CHECK: [[TMP0:%.*]] = bitcast i128* %ptr to i8* 27// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i128* 28// CHECK: [[TMP2:%.*]] = load i128, i128* [[TMP1]] 29// CHECK: ret i128 [[TMP2]] 30poly128_t test_vldrq_p128(poly128_t * ptr) { 31 return vldrq_p128(ptr); 32 33} 34 35// CHECK-LABEL: define void @test_ld_st_p128(i128* %ptr) #0 { 36// CHECK: [[TMP0:%.*]] = bitcast i128* %ptr to i8* 37// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i128* 38// CHECK: [[TMP2:%.*]] = load i128, i128* [[TMP1]] 39// CHECK: [[ADD_PTR:%.*]] = getelementptr inbounds i128, i128* %ptr, i64 1 40// CHECK: [[TMP3:%.*]] = bitcast i128* [[ADD_PTR]] to i8* 41// CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to i128* 42// CHECK: store i128 [[TMP2]], i128* [[TMP4]] 43// CHECK: ret void 44void test_ld_st_p128(poly128_t * ptr) { 45 vstrq_p128(ptr+1, vldrq_p128(ptr)); 46 47} 48 49// CHECK-LABEL: define i128 @test_vmull_p64(i64 %a, i64 %b) #0 { 50// CHECK: [[VMULL_P64_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %a, i64 %b) #2 51// CHECK: [[VMULL_P641_I:%.*]] = bitcast <16 x i8> [[VMULL_P64_I]] to i128 52// CHECK: ret i128 [[VMULL_P641_I]] 53poly128_t test_vmull_p64(poly64_t a, poly64_t b) { 54 return vmull_p64(a, b); 55} 56 57// CHECK-LABEL: define i128 @test_vmull_high_p64(<2 x i64> %a, <2 x i64> %b) #0 { 58// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> <i32 1> 59// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> [[SHUFFLE_I_I]] to i64 60// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <2 x i64> %b, <2 x i64> %b, <1 x i32> <i32 1> 61// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> [[SHUFFLE_I7_I]] to i64 62// CHECK: [[VMULL_P64_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 [[TMP0]], i64 [[TMP1]]) #2 63// CHECK: [[VMULL_P641_I_I:%.*]] = bitcast <16 x i8> [[VMULL_P64_I_I]] to i128 64// CHECK: ret i128 [[VMULL_P641_I_I]] 65poly128_t test_vmull_high_p64(poly64x2_t a, poly64x2_t b) { 66 return vmull_high_p64(a, b); 67} 68 69// CHECK-LABEL: define i128 @test_vreinterpretq_p128_s8(<16 x i8> %a) #0 { 70// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to i128 71// CHECK: ret i128 [[TMP0]] 72poly128_t test_vreinterpretq_p128_s8(int8x16_t a) { 73 return vreinterpretq_p128_s8(a); 74} 75 76// CHECK-LABEL: define i128 @test_vreinterpretq_p128_s16(<8 x i16> %a) #0 { 77// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to i128 78// CHECK: ret i128 [[TMP0]] 79poly128_t test_vreinterpretq_p128_s16(int16x8_t a) { 80 return vreinterpretq_p128_s16(a); 81} 82 83// CHECK-LABEL: define i128 @test_vreinterpretq_p128_s32(<4 x i32> %a) #0 { 84// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to i128 85// CHECK: ret i128 [[TMP0]] 86poly128_t test_vreinterpretq_p128_s32(int32x4_t a) { 87 return vreinterpretq_p128_s32(a); 88} 89 90// CHECK-LABEL: define i128 @test_vreinterpretq_p128_s64(<2 x i64> %a) #0 { 91// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to i128 92// CHECK: ret i128 [[TMP0]] 93poly128_t test_vreinterpretq_p128_s64(int64x2_t a) { 94 return vreinterpretq_p128_s64(a); 95} 96 97// CHECK-LABEL: define i128 @test_vreinterpretq_p128_u8(<16 x i8> %a) #0 { 98// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to i128 99// CHECK: ret i128 [[TMP0]] 100poly128_t test_vreinterpretq_p128_u8(uint8x16_t a) { 101 return vreinterpretq_p128_u8(a); 102} 103 104// CHECK-LABEL: define i128 @test_vreinterpretq_p128_u16(<8 x i16> %a) #0 { 105// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to i128 106// CHECK: ret i128 [[TMP0]] 107poly128_t test_vreinterpretq_p128_u16(uint16x8_t a) { 108 return vreinterpretq_p128_u16(a); 109} 110 111// CHECK-LABEL: define i128 @test_vreinterpretq_p128_u32(<4 x i32> %a) #0 { 112// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to i128 113// CHECK: ret i128 [[TMP0]] 114poly128_t test_vreinterpretq_p128_u32(uint32x4_t a) { 115 return vreinterpretq_p128_u32(a); 116} 117 118// CHECK-LABEL: define i128 @test_vreinterpretq_p128_u64(<2 x i64> %a) #0 { 119// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to i128 120// CHECK: ret i128 [[TMP0]] 121poly128_t test_vreinterpretq_p128_u64(uint64x2_t a) { 122 return vreinterpretq_p128_u64(a); 123} 124 125// CHECK-LABEL: define i128 @test_vreinterpretq_p128_f32(<4 x float> %a) #0 { 126// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to i128 127// CHECK: ret i128 [[TMP0]] 128poly128_t test_vreinterpretq_p128_f32(float32x4_t a) { 129 return vreinterpretq_p128_f32(a); 130} 131 132// CHECK-LABEL: define i128 @test_vreinterpretq_p128_f64(<2 x double> %a) #0 { 133// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to i128 134// CHECK: ret i128 [[TMP0]] 135poly128_t test_vreinterpretq_p128_f64(float64x2_t a) { 136 return vreinterpretq_p128_f64(a); 137} 138 139// CHECK-LABEL: define i128 @test_vreinterpretq_p128_p8(<16 x i8> %a) #0 { 140// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to i128 141// CHECK: ret i128 [[TMP0]] 142poly128_t test_vreinterpretq_p128_p8(poly8x16_t a) { 143 return vreinterpretq_p128_p8(a); 144} 145 146// CHECK-LABEL: define i128 @test_vreinterpretq_p128_p16(<8 x i16> %a) #0 { 147// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to i128 148// CHECK: ret i128 [[TMP0]] 149poly128_t test_vreinterpretq_p128_p16(poly16x8_t a) { 150 return vreinterpretq_p128_p16(a); 151} 152 153// CHECK-LABEL: define i128 @test_vreinterpretq_p128_p64(<2 x i64> %a) #0 { 154// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to i128 155// CHECK: ret i128 [[TMP0]] 156poly128_t test_vreinterpretq_p128_p64(poly64x2_t a) { 157 return vreinterpretq_p128_p64(a); 158} 159 160// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_p128(i128 %a) #0 { 161// CHECK: [[TMP0:%.*]] = bitcast i128 %a to <16 x i8> 162// CHECK: ret <16 x i8> [[TMP0]] 163int8x16_t test_vreinterpretq_s8_p128(poly128_t a) { 164 return vreinterpretq_s8_p128(a); 165} 166 167// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_p128(i128 %a) #0 { 168// CHECK: [[TMP0:%.*]] = bitcast i128 %a to <8 x i16> 169// CHECK: ret <8 x i16> [[TMP0]] 170int16x8_t test_vreinterpretq_s16_p128(poly128_t a) { 171 return vreinterpretq_s16_p128(a); 172} 173 174// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_p128(i128 %a) #0 { 175// CHECK: [[TMP0:%.*]] = bitcast i128 %a to <4 x i32> 176// CHECK: ret <4 x i32> [[TMP0]] 177int32x4_t test_vreinterpretq_s32_p128(poly128_t a) { 178 return vreinterpretq_s32_p128(a); 179} 180 181// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_p128(i128 %a) #0 { 182// CHECK: [[TMP0:%.*]] = bitcast i128 %a to <2 x i64> 183// CHECK: ret <2 x i64> [[TMP0]] 184int64x2_t test_vreinterpretq_s64_p128(poly128_t a) { 185 return vreinterpretq_s64_p128(a); 186} 187 188// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_p128(i128 %a) #0 { 189// CHECK: [[TMP0:%.*]] = bitcast i128 %a to <16 x i8> 190// CHECK: ret <16 x i8> [[TMP0]] 191uint8x16_t test_vreinterpretq_u8_p128(poly128_t a) { 192 return vreinterpretq_u8_p128(a); 193} 194 195// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_p128(i128 %a) #0 { 196// CHECK: [[TMP0:%.*]] = bitcast i128 %a to <8 x i16> 197// CHECK: ret <8 x i16> [[TMP0]] 198uint16x8_t test_vreinterpretq_u16_p128(poly128_t a) { 199 return vreinterpretq_u16_p128(a); 200} 201 202// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_p128(i128 %a) #0 { 203// CHECK: [[TMP0:%.*]] = bitcast i128 %a to <4 x i32> 204// CHECK: ret <4 x i32> [[TMP0]] 205uint32x4_t test_vreinterpretq_u32_p128(poly128_t a) { 206 return vreinterpretq_u32_p128(a); 207} 208 209// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_p128(i128 %a) #0 { 210// CHECK: [[TMP0:%.*]] = bitcast i128 %a to <2 x i64> 211// CHECK: ret <2 x i64> [[TMP0]] 212uint64x2_t test_vreinterpretq_u64_p128(poly128_t a) { 213 return vreinterpretq_u64_p128(a); 214} 215 216// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_p128(i128 %a) #0 { 217// CHECK: [[TMP0:%.*]] = bitcast i128 %a to <4 x float> 218// CHECK: ret <4 x float> [[TMP0]] 219float32x4_t test_vreinterpretq_f32_p128(poly128_t a) { 220 return vreinterpretq_f32_p128(a); 221} 222 223// CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_p128(i128 %a) #0 { 224// CHECK: [[TMP0:%.*]] = bitcast i128 %a to <2 x double> 225// CHECK: ret <2 x double> [[TMP0]] 226float64x2_t test_vreinterpretq_f64_p128(poly128_t a) { 227 return vreinterpretq_f64_p128(a); 228} 229 230// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_p128(i128 %a) #0 { 231// CHECK: [[TMP0:%.*]] = bitcast i128 %a to <16 x i8> 232// CHECK: ret <16 x i8> [[TMP0]] 233poly8x16_t test_vreinterpretq_p8_p128(poly128_t a) { 234 return vreinterpretq_p8_p128(a); 235} 236 237// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_p128(i128 %a) #0 { 238// CHECK: [[TMP0:%.*]] = bitcast i128 %a to <8 x i16> 239// CHECK: ret <8 x i16> [[TMP0]] 240poly16x8_t test_vreinterpretq_p16_p128(poly128_t a) { 241 return vreinterpretq_p16_p128(a); 242} 243 244// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_p128(i128 %a) #0 { 245// CHECK: [[TMP0:%.*]] = bitcast i128 %a to <2 x i64> 246// CHECK: ret <2 x i64> [[TMP0]] 247poly64x2_t test_vreinterpretq_p64_p128(poly128_t a) { 248 return vreinterpretq_p64_p128(a); 249} 250 251 252