1// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ 2// RUN: -fallow-half-arguments-and-returns -ffp-contract=fast -S -emit-llvm -o - %s \ 3// RUN: | opt -S -mem2reg \ 4// RUN: | FileCheck %s 5 6// Test new aarch64 intrinsics and types 7 8#include <arm_neon.h> 9 10// CHECK-LABEL: define <8 x i8> @test_vadd_s8(<8 x i8> %v1, <8 x i8> %v2) #0 { 11// CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, %v2 12// CHECK: ret <8 x i8> [[ADD_I]] 13int8x8_t test_vadd_s8(int8x8_t v1, int8x8_t v2) { 14 return vadd_s8(v1, v2); 15} 16 17// CHECK-LABEL: define <4 x i16> @test_vadd_s16(<4 x i16> %v1, <4 x i16> %v2) #0 { 18// CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, %v2 19// CHECK: ret <4 x i16> [[ADD_I]] 20int16x4_t test_vadd_s16(int16x4_t v1, int16x4_t v2) { 21 return vadd_s16(v1, v2); 22} 23 24// CHECK-LABEL: define <2 x i32> @test_vadd_s32(<2 x i32> %v1, <2 x i32> %v2) #0 { 25// CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, %v2 26// CHECK: ret <2 x i32> [[ADD_I]] 27int32x2_t test_vadd_s32(int32x2_t v1, int32x2_t v2) { 28 return vadd_s32(v1, v2); 29} 30 31// CHECK-LABEL: define <1 x i64> @test_vadd_s64(<1 x i64> %v1, <1 x i64> %v2) #0 { 32// CHECK: [[ADD_I:%.*]] = add <1 x i64> %v1, %v2 33// CHECK: ret <1 x i64> [[ADD_I]] 34int64x1_t test_vadd_s64(int64x1_t v1, int64x1_t v2) { 35 return vadd_s64(v1, v2); 36} 37 38// CHECK-LABEL: define <2 x float> @test_vadd_f32(<2 x float> %v1, <2 x float> %v2) #0 { 39// CHECK: [[ADD_I:%.*]] = fadd <2 x float> %v1, %v2 40// CHECK: ret <2 x float> [[ADD_I]] 41float32x2_t test_vadd_f32(float32x2_t v1, float32x2_t v2) { 42 return vadd_f32(v1, v2); 43} 44 45// CHECK-LABEL: define <8 x i8> @test_vadd_u8(<8 x i8> %v1, <8 x i8> %v2) #0 { 46// CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, %v2 47// CHECK: ret <8 x i8> [[ADD_I]] 48uint8x8_t test_vadd_u8(uint8x8_t v1, uint8x8_t v2) { 49 return vadd_u8(v1, v2); 50} 51 52// CHECK-LABEL: define <4 x i16> @test_vadd_u16(<4 x i16> %v1, <4 x i16> %v2) #0 { 53// CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, %v2 54// CHECK: ret <4 x i16> [[ADD_I]] 55uint16x4_t test_vadd_u16(uint16x4_t v1, uint16x4_t v2) { 56 return vadd_u16(v1, v2); 57} 58 59// CHECK-LABEL: define <2 x i32> @test_vadd_u32(<2 x i32> %v1, <2 x i32> %v2) #0 { 60// CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, %v2 61// CHECK: ret <2 x i32> [[ADD_I]] 62uint32x2_t test_vadd_u32(uint32x2_t v1, uint32x2_t v2) { 63 return vadd_u32(v1, v2); 64} 65 66// CHECK-LABEL: define <1 x i64> @test_vadd_u64(<1 x i64> %v1, <1 x i64> %v2) #0 { 67// CHECK: [[ADD_I:%.*]] = add <1 x i64> %v1, %v2 68// CHECK: ret <1 x i64> [[ADD_I]] 69uint64x1_t test_vadd_u64(uint64x1_t v1, uint64x1_t v2) { 70 return vadd_u64(v1, v2); 71} 72 73// CHECK-LABEL: define <16 x i8> @test_vaddq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 { 74// CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, %v2 75// CHECK: ret <16 x i8> [[ADD_I]] 76int8x16_t test_vaddq_s8(int8x16_t v1, int8x16_t v2) { 77 return vaddq_s8(v1, v2); 78} 79 80// CHECK-LABEL: define <8 x i16> @test_vaddq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 { 81// CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, %v2 82// CHECK: ret <8 x i16> [[ADD_I]] 83int16x8_t test_vaddq_s16(int16x8_t v1, int16x8_t v2) { 84 return vaddq_s16(v1, v2); 85} 86 87// CHECK-LABEL: define <4 x i32> @test_vaddq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 { 88// CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, %v2 89// CHECK: ret <4 x i32> [[ADD_I]] 90int32x4_t test_vaddq_s32(int32x4_t v1,int32x4_t v2) { 91 return vaddq_s32(v1, v2); 92} 93 94// CHECK-LABEL: define <2 x i64> @test_vaddq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 { 95// CHECK: [[ADD_I:%.*]] = add <2 x i64> %v1, %v2 96// CHECK: ret <2 x i64> [[ADD_I]] 97int64x2_t test_vaddq_s64(int64x2_t v1, int64x2_t v2) { 98 return vaddq_s64(v1, v2); 99} 100 101// CHECK-LABEL: define <4 x float> @test_vaddq_f32(<4 x float> %v1, <4 x float> %v2) #0 { 102// CHECK: [[ADD_I:%.*]] = fadd <4 x float> %v1, %v2 103// CHECK: ret <4 x float> [[ADD_I]] 104float32x4_t test_vaddq_f32(float32x4_t v1, float32x4_t v2) { 105 return vaddq_f32(v1, v2); 106} 107 108// CHECK-LABEL: define <2 x double> @test_vaddq_f64(<2 x double> %v1, <2 x double> %v2) #0 { 109// CHECK: [[ADD_I:%.*]] = fadd <2 x double> %v1, %v2 110// CHECK: ret <2 x double> [[ADD_I]] 111float64x2_t test_vaddq_f64(float64x2_t v1, float64x2_t v2) { 112 return vaddq_f64(v1, v2); 113} 114 115// CHECK-LABEL: define <16 x i8> @test_vaddq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 { 116// CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, %v2 117// CHECK: ret <16 x i8> [[ADD_I]] 118uint8x16_t test_vaddq_u8(uint8x16_t v1, uint8x16_t v2) { 119 return vaddq_u8(v1, v2); 120} 121 122// CHECK-LABEL: define <8 x i16> @test_vaddq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 { 123// CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, %v2 124// CHECK: ret <8 x i16> [[ADD_I]] 125uint16x8_t test_vaddq_u16(uint16x8_t v1, uint16x8_t v2) { 126 return vaddq_u16(v1, v2); 127} 128 129// CHECK-LABEL: define <4 x i32> @test_vaddq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 { 130// CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, %v2 131// CHECK: ret <4 x i32> [[ADD_I]] 132uint32x4_t test_vaddq_u32(uint32x4_t v1, uint32x4_t v2) { 133 return vaddq_u32(v1, v2); 134} 135 136// CHECK-LABEL: define <2 x i64> @test_vaddq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 { 137// CHECK: [[ADD_I:%.*]] = add <2 x i64> %v1, %v2 138// CHECK: ret <2 x i64> [[ADD_I]] 139uint64x2_t test_vaddq_u64(uint64x2_t v1, uint64x2_t v2) { 140 return vaddq_u64(v1, v2); 141} 142 143// CHECK-LABEL: define <8 x i8> @test_vsub_s8(<8 x i8> %v1, <8 x i8> %v2) #0 { 144// CHECK: [[SUB_I:%.*]] = sub <8 x i8> %v1, %v2 145// CHECK: ret <8 x i8> [[SUB_I]] 146int8x8_t test_vsub_s8(int8x8_t v1, int8x8_t v2) { 147 return vsub_s8(v1, v2); 148} 149// CHECK-LABEL: define <4 x i16> @test_vsub_s16(<4 x i16> %v1, <4 x i16> %v2) #0 { 150// CHECK: [[SUB_I:%.*]] = sub <4 x i16> %v1, %v2 151// CHECK: ret <4 x i16> [[SUB_I]] 152int16x4_t test_vsub_s16(int16x4_t v1, int16x4_t v2) { 153 return vsub_s16(v1, v2); 154} 155// CHECK-LABEL: define <2 x i32> @test_vsub_s32(<2 x i32> %v1, <2 x i32> %v2) #0 { 156// CHECK: [[SUB_I:%.*]] = sub <2 x i32> %v1, %v2 157// CHECK: ret <2 x i32> [[SUB_I]] 158int32x2_t test_vsub_s32(int32x2_t v1, int32x2_t v2) { 159 return vsub_s32(v1, v2); 160} 161 162// CHECK-LABEL: define <1 x i64> @test_vsub_s64(<1 x i64> %v1, <1 x i64> %v2) #0 { 163// CHECK: [[SUB_I:%.*]] = sub <1 x i64> %v1, %v2 164// CHECK: ret <1 x i64> [[SUB_I]] 165int64x1_t test_vsub_s64(int64x1_t v1, int64x1_t v2) { 166 return vsub_s64(v1, v2); 167} 168 169// CHECK-LABEL: define <2 x float> @test_vsub_f32(<2 x float> %v1, <2 x float> %v2) #0 { 170// CHECK: [[SUB_I:%.*]] = fsub <2 x float> %v1, %v2 171// CHECK: ret <2 x float> [[SUB_I]] 172float32x2_t test_vsub_f32(float32x2_t v1, float32x2_t v2) { 173 return vsub_f32(v1, v2); 174} 175 176// CHECK-LABEL: define <8 x i8> @test_vsub_u8(<8 x i8> %v1, <8 x i8> %v2) #0 { 177// CHECK: [[SUB_I:%.*]] = sub <8 x i8> %v1, %v2 178// CHECK: ret <8 x i8> [[SUB_I]] 179uint8x8_t test_vsub_u8(uint8x8_t v1, uint8x8_t v2) { 180 return vsub_u8(v1, v2); 181} 182 183// CHECK-LABEL: define <4 x i16> @test_vsub_u16(<4 x i16> %v1, <4 x i16> %v2) #0 { 184// CHECK: [[SUB_I:%.*]] = sub <4 x i16> %v1, %v2 185// CHECK: ret <4 x i16> [[SUB_I]] 186uint16x4_t test_vsub_u16(uint16x4_t v1, uint16x4_t v2) { 187 return vsub_u16(v1, v2); 188} 189 190// CHECK-LABEL: define <2 x i32> @test_vsub_u32(<2 x i32> %v1, <2 x i32> %v2) #0 { 191// CHECK: [[SUB_I:%.*]] = sub <2 x i32> %v1, %v2 192// CHECK: ret <2 x i32> [[SUB_I]] 193uint32x2_t test_vsub_u32(uint32x2_t v1, uint32x2_t v2) { 194 return vsub_u32(v1, v2); 195} 196 197// CHECK-LABEL: define <1 x i64> @test_vsub_u64(<1 x i64> %v1, <1 x i64> %v2) #0 { 198// CHECK: [[SUB_I:%.*]] = sub <1 x i64> %v1, %v2 199// CHECK: ret <1 x i64> [[SUB_I]] 200uint64x1_t test_vsub_u64(uint64x1_t v1, uint64x1_t v2) { 201 return vsub_u64(v1, v2); 202} 203 204// CHECK-LABEL: define <16 x i8> @test_vsubq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 { 205// CHECK: [[SUB_I:%.*]] = sub <16 x i8> %v1, %v2 206// CHECK: ret <16 x i8> [[SUB_I]] 207int8x16_t test_vsubq_s8(int8x16_t v1, int8x16_t v2) { 208 return vsubq_s8(v1, v2); 209} 210 211// CHECK-LABEL: define <8 x i16> @test_vsubq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 { 212// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %v1, %v2 213// CHECK: ret <8 x i16> [[SUB_I]] 214int16x8_t test_vsubq_s16(int16x8_t v1, int16x8_t v2) { 215 return vsubq_s16(v1, v2); 216} 217 218// CHECK-LABEL: define <4 x i32> @test_vsubq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 { 219// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %v1, %v2 220// CHECK: ret <4 x i32> [[SUB_I]] 221int32x4_t test_vsubq_s32(int32x4_t v1,int32x4_t v2) { 222 return vsubq_s32(v1, v2); 223} 224 225// CHECK-LABEL: define <2 x i64> @test_vsubq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 { 226// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %v1, %v2 227// CHECK: ret <2 x i64> [[SUB_I]] 228int64x2_t test_vsubq_s64(int64x2_t v1, int64x2_t v2) { 229 return vsubq_s64(v1, v2); 230} 231 232// CHECK-LABEL: define <4 x float> @test_vsubq_f32(<4 x float> %v1, <4 x float> %v2) #0 { 233// CHECK: [[SUB_I:%.*]] = fsub <4 x float> %v1, %v2 234// CHECK: ret <4 x float> [[SUB_I]] 235float32x4_t test_vsubq_f32(float32x4_t v1, float32x4_t v2) { 236 return vsubq_f32(v1, v2); 237} 238 239// CHECK-LABEL: define <2 x double> @test_vsubq_f64(<2 x double> %v1, <2 x double> %v2) #0 { 240// CHECK: [[SUB_I:%.*]] = fsub <2 x double> %v1, %v2 241// CHECK: ret <2 x double> [[SUB_I]] 242float64x2_t test_vsubq_f64(float64x2_t v1, float64x2_t v2) { 243 return vsubq_f64(v1, v2); 244} 245 246// CHECK-LABEL: define <16 x i8> @test_vsubq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 { 247// CHECK: [[SUB_I:%.*]] = sub <16 x i8> %v1, %v2 248// CHECK: ret <16 x i8> [[SUB_I]] 249uint8x16_t test_vsubq_u8(uint8x16_t v1, uint8x16_t v2) { 250 return vsubq_u8(v1, v2); 251} 252 253// CHECK-LABEL: define <8 x i16> @test_vsubq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 { 254// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %v1, %v2 255// CHECK: ret <8 x i16> [[SUB_I]] 256uint16x8_t test_vsubq_u16(uint16x8_t v1, uint16x8_t v2) { 257 return vsubq_u16(v1, v2); 258} 259 260// CHECK-LABEL: define <4 x i32> @test_vsubq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 { 261// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %v1, %v2 262// CHECK: ret <4 x i32> [[SUB_I]] 263uint32x4_t test_vsubq_u32(uint32x4_t v1, uint32x4_t v2) { 264 return vsubq_u32(v1, v2); 265} 266 267// CHECK-LABEL: define <2 x i64> @test_vsubq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 { 268// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %v1, %v2 269// CHECK: ret <2 x i64> [[SUB_I]] 270uint64x2_t test_vsubq_u64(uint64x2_t v1, uint64x2_t v2) { 271 return vsubq_u64(v1, v2); 272} 273 274// CHECK-LABEL: define <8 x i8> @test_vmul_s8(<8 x i8> %v1, <8 x i8> %v2) #0 { 275// CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v1, %v2 276// CHECK: ret <8 x i8> [[MUL_I]] 277int8x8_t test_vmul_s8(int8x8_t v1, int8x8_t v2) { 278 return vmul_s8(v1, v2); 279} 280 281// CHECK-LABEL: define <4 x i16> @test_vmul_s16(<4 x i16> %v1, <4 x i16> %v2) #0 { 282// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v1, %v2 283// CHECK: ret <4 x i16> [[MUL_I]] 284int16x4_t test_vmul_s16(int16x4_t v1, int16x4_t v2) { 285 return vmul_s16(v1, v2); 286} 287 288// CHECK-LABEL: define <2 x i32> @test_vmul_s32(<2 x i32> %v1, <2 x i32> %v2) #0 { 289// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v1, %v2 290// CHECK: ret <2 x i32> [[MUL_I]] 291int32x2_t test_vmul_s32(int32x2_t v1, int32x2_t v2) { 292 return vmul_s32(v1, v2); 293} 294 295// CHECK-LABEL: define <2 x float> @test_vmul_f32(<2 x float> %v1, <2 x float> %v2) #0 { 296// CHECK: [[MUL_I:%.*]] = fmul <2 x float> %v1, %v2 297// CHECK: ret <2 x float> [[MUL_I]] 298float32x2_t test_vmul_f32(float32x2_t v1, float32x2_t v2) { 299 return vmul_f32(v1, v2); 300} 301 302 303// CHECK-LABEL: define <8 x i8> @test_vmul_u8(<8 x i8> %v1, <8 x i8> %v2) #0 { 304// CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v1, %v2 305// CHECK: ret <8 x i8> [[MUL_I]] 306uint8x8_t test_vmul_u8(uint8x8_t v1, uint8x8_t v2) { 307 return vmul_u8(v1, v2); 308} 309 310// CHECK-LABEL: define <4 x i16> @test_vmul_u16(<4 x i16> %v1, <4 x i16> %v2) #0 { 311// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v1, %v2 312// CHECK: ret <4 x i16> [[MUL_I]] 313uint16x4_t test_vmul_u16(uint16x4_t v1, uint16x4_t v2) { 314 return vmul_u16(v1, v2); 315} 316 317// CHECK-LABEL: define <2 x i32> @test_vmul_u32(<2 x i32> %v1, <2 x i32> %v2) #0 { 318// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v1, %v2 319// CHECK: ret <2 x i32> [[MUL_I]] 320uint32x2_t test_vmul_u32(uint32x2_t v1, uint32x2_t v2) { 321 return vmul_u32(v1, v2); 322} 323 324// CHECK-LABEL: define <16 x i8> @test_vmulq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 { 325// CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v1, %v2 326// CHECK: ret <16 x i8> [[MUL_I]] 327int8x16_t test_vmulq_s8(int8x16_t v1, int8x16_t v2) { 328 return vmulq_s8(v1, v2); 329} 330 331// CHECK-LABEL: define <8 x i16> @test_vmulq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 { 332// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v1, %v2 333// CHECK: ret <8 x i16> [[MUL_I]] 334int16x8_t test_vmulq_s16(int16x8_t v1, int16x8_t v2) { 335 return vmulq_s16(v1, v2); 336} 337 338// CHECK-LABEL: define <4 x i32> @test_vmulq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 { 339// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v1, %v2 340// CHECK: ret <4 x i32> [[MUL_I]] 341int32x4_t test_vmulq_s32(int32x4_t v1, int32x4_t v2) { 342 return vmulq_s32(v1, v2); 343} 344 345// CHECK-LABEL: define <16 x i8> @test_vmulq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 { 346// CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v1, %v2 347// CHECK: ret <16 x i8> [[MUL_I]] 348uint8x16_t test_vmulq_u8(uint8x16_t v1, uint8x16_t v2) { 349 return vmulq_u8(v1, v2); 350} 351 352// CHECK-LABEL: define <8 x i16> @test_vmulq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 { 353// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v1, %v2 354// CHECK: ret <8 x i16> [[MUL_I]] 355uint16x8_t test_vmulq_u16(uint16x8_t v1, uint16x8_t v2) { 356 return vmulq_u16(v1, v2); 357} 358 359// CHECK-LABEL: define <4 x i32> @test_vmulq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 { 360// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v1, %v2 361// CHECK: ret <4 x i32> [[MUL_I]] 362uint32x4_t test_vmulq_u32(uint32x4_t v1, uint32x4_t v2) { 363 return vmulq_u32(v1, v2); 364} 365 366// CHECK-LABEL: define <4 x float> @test_vmulq_f32(<4 x float> %v1, <4 x float> %v2) #0 { 367// CHECK: [[MUL_I:%.*]] = fmul <4 x float> %v1, %v2 368// CHECK: ret <4 x float> [[MUL_I]] 369float32x4_t test_vmulq_f32(float32x4_t v1, float32x4_t v2) { 370 return vmulq_f32(v1, v2); 371} 372 373// CHECK-LABEL: define <2 x double> @test_vmulq_f64(<2 x double> %v1, <2 x double> %v2) #0 { 374// CHECK: [[MUL_I:%.*]] = fmul <2 x double> %v1, %v2 375// CHECK: ret <2 x double> [[MUL_I]] 376float64x2_t test_vmulq_f64(float64x2_t v1, float64x2_t v2) { 377 return vmulq_f64(v1, v2); 378} 379 380// CHECK-LABEL: define <8 x i8> @test_vmul_p8(<8 x i8> %v1, <8 x i8> %v2) #0 { 381// CHECK: [[VMUL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.pmul.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4 382// CHECK: ret <8 x i8> [[VMUL_V_I]] 383poly8x8_t test_vmul_p8(poly8x8_t v1, poly8x8_t v2) { 384 // test_vmul_p8 385 return vmul_p8(v1, v2); 386 // pmul {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 387} 388 389// CHECK-LABEL: define <16 x i8> @test_vmulq_p8(<16 x i8> %v1, <16 x i8> %v2) #0 { 390// CHECK: [[VMULQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.pmul.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4 391// CHECK: ret <16 x i8> [[VMULQ_V_I]] 392poly8x16_t test_vmulq_p8(poly8x16_t v1, poly8x16_t v2) { 393 // test_vmulq_p8 394 return vmulq_p8(v1, v2); 395 // pmul {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b 396} 397 398 399// CHECK-LABEL: define <8 x i8> @test_vmla_s8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 { 400// CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3 401// CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[MUL_I]] 402// CHECK: ret <8 x i8> [[ADD_I]] 403int8x8_t test_vmla_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) { 404 return vmla_s8(v1, v2, v3); 405} 406 407// CHECK-LABEL: define <8 x i8> @test_vmla_s16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 { 408// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3 409// CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[MUL_I]] 410// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[ADD_I]] to <8 x i8> 411// CHECK: ret <8 x i8> [[TMP0]] 412int8x8_t test_vmla_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) { 413 return vmla_s16(v1, v2, v3); 414} 415 416// CHECK-LABEL: define <2 x i32> @test_vmla_s32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 { 417// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3 418// CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[MUL_I]] 419// CHECK: ret <2 x i32> [[ADD_I]] 420int32x2_t test_vmla_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) { 421 return vmla_s32(v1, v2, v3); 422} 423 424// CHECK-LABEL: define <2 x float> @test_vmla_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) #0 { 425// CHECK: [[MUL_I:%.*]] = fmul <2 x float> %v2, %v3 426// CHECK: [[ADD_I:%.*]] = fadd <2 x float> %v1, [[MUL_I]] 427// CHECK: ret <2 x float> [[ADD_I]] 428float32x2_t test_vmla_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) { 429 return vmla_f32(v1, v2, v3); 430} 431 432// CHECK-LABEL: define <8 x i8> @test_vmla_u8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 { 433// CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3 434// CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[MUL_I]] 435// CHECK: ret <8 x i8> [[ADD_I]] 436uint8x8_t test_vmla_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) { 437 return vmla_u8(v1, v2, v3); 438} 439 440// CHECK-LABEL: define <4 x i16> @test_vmla_u16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 { 441// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3 442// CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[MUL_I]] 443// CHECK: ret <4 x i16> [[ADD_I]] 444uint16x4_t test_vmla_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) { 445 return vmla_u16(v1, v2, v3); 446} 447 448// CHECK-LABEL: define <2 x i32> @test_vmla_u32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 { 449// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3 450// CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[MUL_I]] 451// CHECK: ret <2 x i32> [[ADD_I]] 452uint32x2_t test_vmla_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) { 453 return vmla_u32(v1, v2, v3); 454} 455 456// CHECK-LABEL: define <16 x i8> @test_vmlaq_s8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 { 457// CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3 458// CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[MUL_I]] 459// CHECK: ret <16 x i8> [[ADD_I]] 460int8x16_t test_vmlaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) { 461 return vmlaq_s8(v1, v2, v3); 462} 463 464// CHECK-LABEL: define <8 x i16> @test_vmlaq_s16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 { 465// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3 466// CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[MUL_I]] 467// CHECK: ret <8 x i16> [[ADD_I]] 468int16x8_t test_vmlaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) { 469 return vmlaq_s16(v1, v2, v3); 470} 471 472// CHECK-LABEL: define <4 x i32> @test_vmlaq_s32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 { 473// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3 474// CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[MUL_I]] 475// CHECK: ret <4 x i32> [[ADD_I]] 476int32x4_t test_vmlaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) { 477 return vmlaq_s32(v1, v2, v3); 478} 479 480// CHECK-LABEL: define <4 x float> @test_vmlaq_f32(<4 x float> %v1, <4 x float> %v2, <4 x float> %v3) #0 { 481// CHECK: [[MUL_I:%.*]] = fmul <4 x float> %v2, %v3 482// CHECK: [[ADD_I:%.*]] = fadd <4 x float> %v1, [[MUL_I]] 483// CHECK: ret <4 x float> [[ADD_I]] 484float32x4_t test_vmlaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) { 485 return vmlaq_f32(v1, v2, v3); 486} 487 488// CHECK-LABEL: define <16 x i8> @test_vmlaq_u8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 { 489// CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3 490// CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[MUL_I]] 491// CHECK: ret <16 x i8> [[ADD_I]] 492uint8x16_t test_vmlaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) { 493 return vmlaq_u8(v1, v2, v3); 494} 495 496// CHECK-LABEL: define <8 x i16> @test_vmlaq_u16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 { 497// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3 498// CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[MUL_I]] 499// CHECK: ret <8 x i16> [[ADD_I]] 500uint16x8_t test_vmlaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) { 501 return vmlaq_u16(v1, v2, v3); 502} 503 504// CHECK-LABEL: define <4 x i32> @test_vmlaq_u32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 { 505// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3 506// CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[MUL_I]] 507// CHECK: ret <4 x i32> [[ADD_I]] 508uint32x4_t test_vmlaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) { 509 return vmlaq_u32(v1, v2, v3); 510} 511 512// CHECK-LABEL: define <2 x double> @test_vmlaq_f64(<2 x double> %v1, <2 x double> %v2, <2 x double> %v3) #0 { 513// CHECK: [[MUL_I:%.*]] = fmul <2 x double> %v2, %v3 514// CHECK: [[ADD_I:%.*]] = fadd <2 x double> %v1, [[MUL_I]] 515// CHECK: ret <2 x double> [[ADD_I]] 516float64x2_t test_vmlaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) { 517 return vmlaq_f64(v1, v2, v3); 518} 519 520// CHECK-LABEL: define <8 x i8> @test_vmls_s8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 { 521// CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3 522// CHECK: [[SUB_I:%.*]] = sub <8 x i8> %v1, [[MUL_I]] 523// CHECK: ret <8 x i8> [[SUB_I]] 524int8x8_t test_vmls_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) { 525 return vmls_s8(v1, v2, v3); 526} 527 528// CHECK-LABEL: define <8 x i8> @test_vmls_s16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 { 529// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3 530// CHECK: [[SUB_I:%.*]] = sub <4 x i16> %v1, [[MUL_I]] 531// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SUB_I]] to <8 x i8> 532// CHECK: ret <8 x i8> [[TMP0]] 533int8x8_t test_vmls_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) { 534 return vmls_s16(v1, v2, v3); 535} 536 537// CHECK-LABEL: define <2 x i32> @test_vmls_s32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 { 538// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3 539// CHECK: [[SUB_I:%.*]] = sub <2 x i32> %v1, [[MUL_I]] 540// CHECK: ret <2 x i32> [[SUB_I]] 541int32x2_t test_vmls_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) { 542 return vmls_s32(v1, v2, v3); 543} 544 545// CHECK-LABEL: define <2 x float> @test_vmls_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) #0 { 546// CHECK: [[MUL_I:%.*]] = fmul <2 x float> %v2, %v3 547// CHECK: [[SUB_I:%.*]] = fsub <2 x float> %v1, [[MUL_I]] 548// CHECK: ret <2 x float> [[SUB_I]] 549float32x2_t test_vmls_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) { 550 return vmls_f32(v1, v2, v3); 551} 552 553// CHECK-LABEL: define <8 x i8> @test_vmls_u8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 { 554// CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3 555// CHECK: [[SUB_I:%.*]] = sub <8 x i8> %v1, [[MUL_I]] 556// CHECK: ret <8 x i8> [[SUB_I]] 557uint8x8_t test_vmls_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) { 558 return vmls_u8(v1, v2, v3); 559} 560 561// CHECK-LABEL: define <4 x i16> @test_vmls_u16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 { 562// CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3 563// CHECK: [[SUB_I:%.*]] = sub <4 x i16> %v1, [[MUL_I]] 564// CHECK: ret <4 x i16> [[SUB_I]] 565uint16x4_t test_vmls_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) { 566 return vmls_u16(v1, v2, v3); 567} 568 569// CHECK-LABEL: define <2 x i32> @test_vmls_u32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 { 570// CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3 571// CHECK: [[SUB_I:%.*]] = sub <2 x i32> %v1, [[MUL_I]] 572// CHECK: ret <2 x i32> [[SUB_I]] 573uint32x2_t test_vmls_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) { 574 return vmls_u32(v1, v2, v3); 575} 576// CHECK-LABEL: define <16 x i8> @test_vmlsq_s8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 { 577// CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3 578// CHECK: [[SUB_I:%.*]] = sub <16 x i8> %v1, [[MUL_I]] 579// CHECK: ret <16 x i8> [[SUB_I]] 580int8x16_t test_vmlsq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) { 581 return vmlsq_s8(v1, v2, v3); 582} 583 584// CHECK-LABEL: define <8 x i16> @test_vmlsq_s16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 { 585// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3 586// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %v1, [[MUL_I]] 587// CHECK: ret <8 x i16> [[SUB_I]] 588int16x8_t test_vmlsq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) { 589 return vmlsq_s16(v1, v2, v3); 590} 591 592// CHECK-LABEL: define <4 x i32> @test_vmlsq_s32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 { 593// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3 594// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %v1, [[MUL_I]] 595// CHECK: ret <4 x i32> [[SUB_I]] 596int32x4_t test_vmlsq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) { 597 return vmlsq_s32(v1, v2, v3); 598} 599 600// CHECK-LABEL: define <4 x float> @test_vmlsq_f32(<4 x float> %v1, <4 x float> %v2, <4 x float> %v3) #0 { 601// CHECK: [[MUL_I:%.*]] = fmul <4 x float> %v2, %v3 602// CHECK: [[SUB_I:%.*]] = fsub <4 x float> %v1, [[MUL_I]] 603// CHECK: ret <4 x float> [[SUB_I]] 604float32x4_t test_vmlsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) { 605 return vmlsq_f32(v1, v2, v3); 606} 607// CHECK-LABEL: define <16 x i8> @test_vmlsq_u8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 { 608// CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3 609// CHECK: [[SUB_I:%.*]] = sub <16 x i8> %v1, [[MUL_I]] 610// CHECK: ret <16 x i8> [[SUB_I]] 611uint8x16_t test_vmlsq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) { 612 return vmlsq_u8(v1, v2, v3); 613} 614 615// CHECK-LABEL: define <8 x i16> @test_vmlsq_u16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 { 616// CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3 617// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %v1, [[MUL_I]] 618// CHECK: ret <8 x i16> [[SUB_I]] 619uint16x8_t test_vmlsq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) { 620 return vmlsq_u16(v1, v2, v3); 621} 622 623// CHECK-LABEL: define <4 x i32> @test_vmlsq_u32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 { 624// CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3 625// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %v1, [[MUL_I]] 626// CHECK: ret <4 x i32> [[SUB_I]] 627uint32x4_t test_vmlsq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) { 628 return vmlsq_u32(v1, v2, v3); 629} 630 631// CHECK-LABEL: define <2 x double> @test_vmlsq_f64(<2 x double> %v1, <2 x double> %v2, <2 x double> %v3) #0 { 632// CHECK: [[MUL_I:%.*]] = fmul <2 x double> %v2, %v3 633// CHECK: [[SUB_I:%.*]] = fsub <2 x double> %v1, [[MUL_I]] 634// CHECK: ret <2 x double> [[SUB_I]] 635float64x2_t test_vmlsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) { 636 return vmlsq_f64(v1, v2, v3); 637} 638// CHECK-LABEL: define <2 x float> @test_vfma_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) #0 { 639// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> 640// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> 641// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v3 to <8 x i8> 642// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 643// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 644// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> 645// CHECK: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x float> [[TMP3]]) #4 646// CHECK: ret <2 x float> [[TMP6]] 647float32x2_t test_vfma_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) { 648 return vfma_f32(v1, v2, v3); 649} 650 651// CHECK-LABEL: define <4 x float> @test_vfmaq_f32(<4 x float> %v1, <4 x float> %v2, <4 x float> %v3) #0 { 652// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> 653// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> 654// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8> 655// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 656// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 657// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> 658// CHECK: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x float> [[TMP3]]) #4 659// CHECK: ret <4 x float> [[TMP6]] 660float32x4_t test_vfmaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) { 661 return vfmaq_f32(v1, v2, v3); 662} 663 664// CHECK-LABEL: define <2 x double> @test_vfmaq_f64(<2 x double> %v1, <2 x double> %v2, <2 x double> %v3) #0 { 665// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> 666// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> 667// CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8> 668// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 669// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 670// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double> 671// CHECK: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP4]], <2 x double> [[TMP5]], <2 x double> [[TMP3]]) #4 672// CHECK: ret <2 x double> [[TMP6]] 673float64x2_t test_vfmaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) { 674 return vfmaq_f64(v1, v2, v3); 675} 676// CHECK-LABEL: define <2 x float> @test_vfms_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) #0 { 677// CHECK: [[SUB_I:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v2 678// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> 679// CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB_I]] to <8 x i8> 680// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v3 to <8 x i8> 681// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 682// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 683// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> 684// CHECK: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x float> [[TMP3]]) #4 685// CHECK: ret <2 x float> [[TMP6]] 686float32x2_t test_vfms_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) { 687 return vfms_f32(v1, v2, v3); 688} 689 690// CHECK-LABEL: define <4 x float> @test_vfmsq_f32(<4 x float> %v1, <4 x float> %v2, <4 x float> %v3) #0 { 691// CHECK: [[SUB_I:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v2 692// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> 693// CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB_I]] to <16 x i8> 694// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8> 695// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 696// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 697// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> 698// CHECK: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x float> [[TMP3]]) #4 699// CHECK: ret <4 x float> [[TMP6]] 700float32x4_t test_vfmsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) { 701 return vfmsq_f32(v1, v2, v3); 702} 703 704// CHECK-LABEL: define <2 x double> @test_vfmsq_f64(<2 x double> %v1, <2 x double> %v2, <2 x double> %v3) #0 { 705// CHECK: [[SUB_I:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v2 706// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> 707// CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SUB_I]] to <16 x i8> 708// CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8> 709// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 710// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 711// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double> 712// CHECK: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP4]], <2 x double> [[TMP5]], <2 x double> [[TMP3]]) #4 713// CHECK: ret <2 x double> [[TMP6]] 714float64x2_t test_vfmsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) { 715 return vfmsq_f64(v1, v2, v3); 716} 717 718// CHECK-LABEL: define <2 x double> @test_vdivq_f64(<2 x double> %v1, <2 x double> %v2) #0 { 719// CHECK: [[DIV_I:%.*]] = fdiv <2 x double> %v1, %v2 720// CHECK: ret <2 x double> [[DIV_I]] 721float64x2_t test_vdivq_f64(float64x2_t v1, float64x2_t v2) { 722 return vdivq_f64(v1, v2); 723} 724 725// CHECK-LABEL: define <4 x float> @test_vdivq_f32(<4 x float> %v1, <4 x float> %v2) #0 { 726// CHECK: [[DIV_I:%.*]] = fdiv <4 x float> %v1, %v2 727// CHECK: ret <4 x float> [[DIV_I]] 728float32x4_t test_vdivq_f32(float32x4_t v1, float32x4_t v2) { 729 return vdivq_f32(v1, v2); 730} 731 732// CHECK-LABEL: define <2 x float> @test_vdiv_f32(<2 x float> %v1, <2 x float> %v2) #0 { 733// CHECK: [[DIV_I:%.*]] = fdiv <2 x float> %v1, %v2 734// CHECK: ret <2 x float> [[DIV_I]] 735float32x2_t test_vdiv_f32(float32x2_t v1, float32x2_t v2) { 736 return vdiv_f32(v1, v2); 737} 738 739// CHECK-LABEL: define <8 x i8> @test_vaba_s8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 { 740// CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %v2, <8 x i8> %v3) #4 741// CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[VABD_I_I]] 742// CHECK: ret <8 x i8> [[ADD_I]] 743int8x8_t test_vaba_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) { 744 return vaba_s8(v1, v2, v3); 745} 746 747// CHECK-LABEL: define <4 x i16> @test_vaba_s16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 { 748// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 749// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v3 to <8 x i8> 750// CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 751// CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 752// CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I_I]], <4 x i16> [[VABD1_I_I]]) #4 753// CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[VABD2_I_I]] 754// CHECK: ret <4 x i16> [[ADD_I]] 755int16x4_t test_vaba_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) { 756 return vaba_s16(v1, v2, v3); 757} 758 759// CHECK-LABEL: define <2 x i32> @test_vaba_s32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 { 760// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> 761// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v3 to <8 x i8> 762// CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 763// CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 764// CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I_I]], <2 x i32> [[VABD1_I_I]]) #4 765// CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[VABD2_I_I]] 766// CHECK: ret <2 x i32> [[ADD_I]] 767int32x2_t test_vaba_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) { 768 return vaba_s32(v1, v2, v3); 769} 770 771// CHECK-LABEL: define <8 x i8> @test_vaba_u8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 { 772// CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %v2, <8 x i8> %v3) #4 773// CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[VABD_I_I]] 774// CHECK: ret <8 x i8> [[ADD_I]] 775uint8x8_t test_vaba_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) { 776 return vaba_u8(v1, v2, v3); 777} 778 779// CHECK-LABEL: define <4 x i16> @test_vaba_u16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 { 780// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 781// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v3 to <8 x i8> 782// CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 783// CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 784// CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I_I]], <4 x i16> [[VABD1_I_I]]) #4 785// CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[VABD2_I_I]] 786// CHECK: ret <4 x i16> [[ADD_I]] 787uint16x4_t test_vaba_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) { 788 return vaba_u16(v1, v2, v3); 789} 790 791// CHECK-LABEL: define <2 x i32> @test_vaba_u32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 { 792// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> 793// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v3 to <8 x i8> 794// CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 795// CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 796// CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I_I]], <2 x i32> [[VABD1_I_I]]) #4 797// CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[VABD2_I_I]] 798// CHECK: ret <2 x i32> [[ADD_I]] 799uint32x2_t test_vaba_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) { 800 return vaba_u32(v1, v2, v3); 801} 802 803// CHECK-LABEL: define <16 x i8> @test_vabaq_s8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 { 804// CHECK: [[VABD_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %v2, <16 x i8> %v3) #4 805// CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[VABD_I_I]] 806// CHECK: ret <16 x i8> [[ADD_I]] 807int8x16_t test_vabaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) { 808 return vabaq_s8(v1, v2, v3); 809} 810 811// CHECK-LABEL: define <8 x i16> @test_vabaq_s16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 { 812// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 813// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v3 to <16 x i8> 814// CHECK: [[VABD_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 815// CHECK: [[VABD1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 816// CHECK: [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> [[VABD_I_I]], <8 x i16> [[VABD1_I_I]]) #4 817// CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[VABD2_I_I]] 818// CHECK: ret <8 x i16> [[ADD_I]] 819int16x8_t test_vabaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) { 820 return vabaq_s16(v1, v2, v3); 821} 822 823// CHECK-LABEL: define <4 x i32> @test_vabaq_s32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 { 824// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> 825// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v3 to <16 x i8> 826// CHECK: [[VABD_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 827// CHECK: [[VABD1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 828// CHECK: [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> [[VABD_I_I]], <4 x i32> [[VABD1_I_I]]) #4 829// CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[VABD2_I_I]] 830// CHECK: ret <4 x i32> [[ADD_I]] 831int32x4_t test_vabaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) { 832 return vabaq_s32(v1, v2, v3); 833} 834 835// CHECK-LABEL: define <16 x i8> @test_vabaq_u8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 { 836// CHECK: [[VABD_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %v2, <16 x i8> %v3) #4 837// CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[VABD_I_I]] 838// CHECK: ret <16 x i8> [[ADD_I]] 839uint8x16_t test_vabaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) { 840 return vabaq_u8(v1, v2, v3); 841} 842 843// CHECK-LABEL: define <8 x i16> @test_vabaq_u16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 { 844// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 845// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v3 to <16 x i8> 846// CHECK: [[VABD_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 847// CHECK: [[VABD1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 848// CHECK: [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> [[VABD_I_I]], <8 x i16> [[VABD1_I_I]]) #4 849// CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[VABD2_I_I]] 850// CHECK: ret <8 x i16> [[ADD_I]] 851uint16x8_t test_vabaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) { 852 return vabaq_u16(v1, v2, v3); 853} 854 855// CHECK-LABEL: define <4 x i32> @test_vabaq_u32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 { 856// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> 857// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v3 to <16 x i8> 858// CHECK: [[VABD_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 859// CHECK: [[VABD1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 860// CHECK: [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> [[VABD_I_I]], <4 x i32> [[VABD1_I_I]]) #4 861// CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[VABD2_I_I]] 862// CHECK: ret <4 x i32> [[ADD_I]] 863uint32x4_t test_vabaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) { 864 return vabaq_u32(v1, v2, v3); 865} 866 867// CHECK-LABEL: define <8 x i8> @test_vabd_s8(<8 x i8> %v1, <8 x i8> %v2) #0 { 868// CHECK: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4 869// CHECK: ret <8 x i8> [[VABD_I]] 870int8x8_t test_vabd_s8(int8x8_t v1, int8x8_t v2) { 871 return vabd_s8(v1, v2); 872} 873 874// CHECK-LABEL: define <4 x i16> @test_vabd_s16(<4 x i16> %v1, <4 x i16> %v2) #0 { 875// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> 876// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 877// CHECK: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 878// CHECK: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 879// CHECK: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I]], <4 x i16> [[VABD1_I]]) #4 880// CHECK: ret <4 x i16> [[VABD2_I]] 881int16x4_t test_vabd_s16(int16x4_t v1, int16x4_t v2) { 882 return vabd_s16(v1, v2); 883} 884 885// CHECK-LABEL: define <2 x i32> @test_vabd_s32(<2 x i32> %v1, <2 x i32> %v2) #0 { 886// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> 887// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> 888// CHECK: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 889// CHECK: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 890// CHECK: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I]], <2 x i32> [[VABD1_I]]) #4 891// CHECK: ret <2 x i32> [[VABD2_I]] 892int32x2_t test_vabd_s32(int32x2_t v1, int32x2_t v2) { 893 return vabd_s32(v1, v2); 894} 895 896// CHECK-LABEL: define <8 x i8> @test_vabd_u8(<8 x i8> %v1, <8 x i8> %v2) #0 { 897// CHECK: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4 898// CHECK: ret <8 x i8> [[VABD_I]] 899uint8x8_t test_vabd_u8(uint8x8_t v1, uint8x8_t v2) { 900 return vabd_u8(v1, v2); 901} 902 903// CHECK-LABEL: define <4 x i16> @test_vabd_u16(<4 x i16> %v1, <4 x i16> %v2) #0 { 904// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> 905// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 906// CHECK: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 907// CHECK: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 908// CHECK: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I]], <4 x i16> [[VABD1_I]]) #4 909// CHECK: ret <4 x i16> [[VABD2_I]] 910uint16x4_t test_vabd_u16(uint16x4_t v1, uint16x4_t v2) { 911 return vabd_u16(v1, v2); 912} 913 914// CHECK-LABEL: define <2 x i32> @test_vabd_u32(<2 x i32> %v1, <2 x i32> %v2) #0 { 915// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> 916// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> 917// CHECK: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 918// CHECK: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 919// CHECK: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I]], <2 x i32> [[VABD1_I]]) #4 920// CHECK: ret <2 x i32> [[VABD2_I]] 921uint32x2_t test_vabd_u32(uint32x2_t v1, uint32x2_t v2) { 922 return vabd_u32(v1, v2); 923} 924 925// CHECK-LABEL: define <2 x float> @test_vabd_f32(<2 x float> %v1, <2 x float> %v2) #0 { 926// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> 927// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> 928// CHECK: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 929// CHECK: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 930// CHECK: [[VABD2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float> [[VABD_I]], <2 x float> [[VABD1_I]]) #4 931// CHECK: ret <2 x float> [[VABD2_I]] 932float32x2_t test_vabd_f32(float32x2_t v1, float32x2_t v2) { 933 return vabd_f32(v1, v2); 934} 935 936// CHECK-LABEL: define <16 x i8> @test_vabdq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 { 937// CHECK: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4 938// CHECK: ret <16 x i8> [[VABD_I]] 939int8x16_t test_vabdq_s8(int8x16_t v1, int8x16_t v2) { 940 return vabdq_s8(v1, v2); 941} 942 943// CHECK-LABEL: define <8 x i16> @test_vabdq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 { 944// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> 945// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 946// CHECK: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 947// CHECK: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 948// CHECK: [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> [[VABD_I]], <8 x i16> [[VABD1_I]]) #4 949// CHECK: ret <8 x i16> [[VABD2_I]] 950int16x8_t test_vabdq_s16(int16x8_t v1, int16x8_t v2) { 951 return vabdq_s16(v1, v2); 952} 953 954// CHECK-LABEL: define <4 x i32> @test_vabdq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 { 955// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> 956// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> 957// CHECK: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 958// CHECK: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 959// CHECK: [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> [[VABD_I]], <4 x i32> [[VABD1_I]]) #4 960// CHECK: ret <4 x i32> [[VABD2_I]] 961int32x4_t test_vabdq_s32(int32x4_t v1, int32x4_t v2) { 962 return vabdq_s32(v1, v2); 963} 964 965// CHECK-LABEL: define <16 x i8> @test_vabdq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 { 966// CHECK: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4 967// CHECK: ret <16 x i8> [[VABD_I]] 968uint8x16_t test_vabdq_u8(uint8x16_t v1, uint8x16_t v2) { 969 return vabdq_u8(v1, v2); 970} 971 972// CHECK-LABEL: define <8 x i16> @test_vabdq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 { 973// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> 974// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 975// CHECK: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 976// CHECK: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 977// CHECK: [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> [[VABD_I]], <8 x i16> [[VABD1_I]]) #4 978// CHECK: ret <8 x i16> [[VABD2_I]] 979uint16x8_t test_vabdq_u16(uint16x8_t v1, uint16x8_t v2) { 980 return vabdq_u16(v1, v2); 981} 982 983// CHECK-LABEL: define <4 x i32> @test_vabdq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 { 984// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> 985// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> 986// CHECK: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 987// CHECK: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 988// CHECK: [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> [[VABD_I]], <4 x i32> [[VABD1_I]]) #4 989// CHECK: ret <4 x i32> [[VABD2_I]] 990uint32x4_t test_vabdq_u32(uint32x4_t v1, uint32x4_t v2) { 991 return vabdq_u32(v1, v2); 992} 993 994// CHECK-LABEL: define <4 x float> @test_vabdq_f32(<4 x float> %v1, <4 x float> %v2) #0 { 995// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> 996// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> 997// CHECK: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 998// CHECK: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 999// CHECK: [[VABD2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float> [[VABD_I]], <4 x float> [[VABD1_I]]) #4 1000// CHECK: ret <4 x float> [[VABD2_I]] 1001float32x4_t test_vabdq_f32(float32x4_t v1, float32x4_t v2) { 1002 return vabdq_f32(v1, v2); 1003} 1004 1005// CHECK-LABEL: define <2 x double> @test_vabdq_f64(<2 x double> %v1, <2 x double> %v2) #0 { 1006// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> 1007// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> 1008// CHECK: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 1009// CHECK: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 1010// CHECK: [[VABD2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double> [[VABD_I]], <2 x double> [[VABD1_I]]) #4 1011// CHECK: ret <2 x double> [[VABD2_I]] 1012float64x2_t test_vabdq_f64(float64x2_t v1, float64x2_t v2) { 1013 return vabdq_f64(v1, v2); 1014} 1015 1016 1017// CHECK-LABEL: define <8 x i8> @test_vbsl_s8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 { 1018// CHECK: [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2 1019// CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1020// CHECK: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3 1021// CHECK: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]] 1022// CHECK: ret <8 x i8> [[VBSL2_I]] 1023int8x8_t test_vbsl_s8(uint8x8_t v1, int8x8_t v2, int8x8_t v3) { 1024 return vbsl_s8(v1, v2, v3); 1025} 1026 1027// CHECK-LABEL: define <8 x i8> @test_vbsl_s16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 { 1028// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> 1029// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 1030// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8> 1031// CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 1032// CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 1033// CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 1034// CHECK: [[VBSL3_I:%.*]] = and <4 x i16> [[VBSL_I]], [[VBSL1_I]] 1035// CHECK: [[TMP3:%.*]] = xor <4 x i16> [[VBSL_I]], <i16 -1, i16 -1, i16 -1, i16 -1> 1036// CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], [[VBSL2_I]] 1037// CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]] 1038// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[VBSL5_I]] to <8 x i8> 1039// CHECK: ret <8 x i8> [[TMP4]] 1040int8x8_t test_vbsl_s16(uint16x4_t v1, int16x4_t v2, int16x4_t v3) { 1041 return vbsl_s16(v1, v2, v3); 1042} 1043 1044// CHECK-LABEL: define <2 x i32> @test_vbsl_s32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 { 1045// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> 1046// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> 1047// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %v3 to <8 x i8> 1048// CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 1049// CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 1050// CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 1051// CHECK: [[VBSL3_I:%.*]] = and <2 x i32> [[VBSL_I]], [[VBSL1_I]] 1052// CHECK: [[TMP3:%.*]] = xor <2 x i32> [[VBSL_I]], <i32 -1, i32 -1> 1053// CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], [[VBSL2_I]] 1054// CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]] 1055// CHECK: ret <2 x i32> [[VBSL5_I]] 1056int32x2_t test_vbsl_s32(uint32x2_t v1, int32x2_t v2, int32x2_t v3) { 1057 return vbsl_s32(v1, v2, v3); 1058} 1059 1060// CHECK-LABEL: define <1 x i64> @test_vbsl_s64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) #0 { 1061// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8> 1062// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %v2 to <8 x i8> 1063// CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %v3 to <8 x i8> 1064// CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 1065// CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 1066// CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64> 1067// CHECK: [[VBSL3_I:%.*]] = and <1 x i64> [[VBSL_I]], [[VBSL1_I]] 1068// CHECK: [[TMP3:%.*]] = xor <1 x i64> [[VBSL_I]], <i64 -1> 1069// CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], [[VBSL2_I]] 1070// CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]] 1071// CHECK: ret <1 x i64> [[VBSL5_I]] 1072uint64x1_t test_vbsl_s64(uint64x1_t v1, uint64x1_t v2, uint64x1_t v3) { 1073 return vbsl_s64(v1, v2, v3); 1074} 1075 1076// CHECK-LABEL: define <8 x i8> @test_vbsl_u8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 { 1077// CHECK: [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2 1078// CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1079// CHECK: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3 1080// CHECK: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]] 1081// CHECK: ret <8 x i8> [[VBSL2_I]] 1082uint8x8_t test_vbsl_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) { 1083 return vbsl_u8(v1, v2, v3); 1084} 1085 1086// CHECK-LABEL: define <4 x i16> @test_vbsl_u16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 { 1087// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> 1088// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 1089// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8> 1090// CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 1091// CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 1092// CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 1093// CHECK: [[VBSL3_I:%.*]] = and <4 x i16> [[VBSL_I]], [[VBSL1_I]] 1094// CHECK: [[TMP3:%.*]] = xor <4 x i16> [[VBSL_I]], <i16 -1, i16 -1, i16 -1, i16 -1> 1095// CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], [[VBSL2_I]] 1096// CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]] 1097// CHECK: ret <4 x i16> [[VBSL5_I]] 1098uint16x4_t test_vbsl_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) { 1099 return vbsl_u16(v1, v2, v3); 1100} 1101 1102// CHECK-LABEL: define <2 x i32> @test_vbsl_u32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 { 1103// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> 1104// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> 1105// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %v3 to <8 x i8> 1106// CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 1107// CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 1108// CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 1109// CHECK: [[VBSL3_I:%.*]] = and <2 x i32> [[VBSL_I]], [[VBSL1_I]] 1110// CHECK: [[TMP3:%.*]] = xor <2 x i32> [[VBSL_I]], <i32 -1, i32 -1> 1111// CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], [[VBSL2_I]] 1112// CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]] 1113// CHECK: ret <2 x i32> [[VBSL5_I]] 1114uint32x2_t test_vbsl_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) { 1115 return vbsl_u32(v1, v2, v3); 1116} 1117 1118// CHECK-LABEL: define <1 x i64> @test_vbsl_u64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) #0 { 1119// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8> 1120// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %v2 to <8 x i8> 1121// CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %v3 to <8 x i8> 1122// CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 1123// CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 1124// CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64> 1125// CHECK: [[VBSL3_I:%.*]] = and <1 x i64> [[VBSL_I]], [[VBSL1_I]] 1126// CHECK: [[TMP3:%.*]] = xor <1 x i64> [[VBSL_I]], <i64 -1> 1127// CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], [[VBSL2_I]] 1128// CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]] 1129// CHECK: ret <1 x i64> [[VBSL5_I]] 1130uint64x1_t test_vbsl_u64(uint64x1_t v1, uint64x1_t v2, uint64x1_t v3) { 1131 return vbsl_u64(v1, v2, v3); 1132} 1133 1134// CHECK-LABEL: define <2 x float> @test_vbsl_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) #0 { 1135// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <2 x i32> 1136// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8> 1137// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v2 to <8 x i8> 1138// CHECK: [[TMP3:%.*]] = bitcast <2 x float> %v3 to <8 x i8> 1139// CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 1140// CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 1141// CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32> 1142// CHECK: [[VBSL3_I:%.*]] = and <2 x i32> [[VBSL_I]], [[VBSL1_I]] 1143// CHECK: [[TMP4:%.*]] = xor <2 x i32> [[VBSL_I]], <i32 -1, i32 -1> 1144// CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP4]], [[VBSL2_I]] 1145// CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]] 1146// CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[VBSL5_I]] to <2 x float> 1147// CHECK: ret <2 x float> [[TMP5]] 1148float32x2_t test_vbsl_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) { 1149 return vbsl_f32(v1, v2, v3); 1150} 1151 1152// CHECK-LABEL: define <1 x double> @test_vbsl_f64(<1 x i64> %v1, <1 x double> %v2, <1 x double> %v3) #0 { 1153// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8> 1154// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %v2 to <8 x i8> 1155// CHECK: [[TMP2:%.*]] = bitcast <1 x double> %v3 to <8 x i8> 1156// CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 1157// CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 1158// CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64> 1159// CHECK: [[VBSL3_I:%.*]] = and <1 x i64> [[VBSL_I]], [[VBSL1_I]] 1160// CHECK: [[TMP3:%.*]] = xor <1 x i64> [[VBSL_I]], <i64 -1> 1161// CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], [[VBSL2_I]] 1162// CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]] 1163// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[VBSL5_I]] to <1 x double> 1164// CHECK: ret <1 x double> [[TMP4]] 1165float64x1_t test_vbsl_f64(uint64x1_t v1, float64x1_t v2, float64x1_t v3) { 1166 return vbsl_f64(v1, v2, v3); 1167} 1168 1169// CHECK-LABEL: define <8 x i8> @test_vbsl_p8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 { 1170// CHECK: [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2 1171// CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1172// CHECK: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3 1173// CHECK: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]] 1174// CHECK: ret <8 x i8> [[VBSL2_I]] 1175poly8x8_t test_vbsl_p8(uint8x8_t v1, poly8x8_t v2, poly8x8_t v3) { 1176 return vbsl_p8(v1, v2, v3); 1177} 1178 1179// CHECK-LABEL: define <4 x i16> @test_vbsl_p16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 { 1180// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> 1181// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 1182// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8> 1183// CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 1184// CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 1185// CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 1186// CHECK: [[VBSL3_I:%.*]] = and <4 x i16> [[VBSL_I]], [[VBSL1_I]] 1187// CHECK: [[TMP3:%.*]] = xor <4 x i16> [[VBSL_I]], <i16 -1, i16 -1, i16 -1, i16 -1> 1188// CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], [[VBSL2_I]] 1189// CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]] 1190// CHECK: ret <4 x i16> [[VBSL5_I]] 1191poly16x4_t test_vbsl_p16(uint16x4_t v1, poly16x4_t v2, poly16x4_t v3) { 1192 return vbsl_p16(v1, v2, v3); 1193} 1194 1195// CHECK-LABEL: define <16 x i8> @test_vbslq_s8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 { 1196// CHECK: [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2 1197// CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1198// CHECK: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3 1199// CHECK: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]] 1200// CHECK: ret <16 x i8> [[VBSL2_I]] 1201int8x16_t test_vbslq_s8(uint8x16_t v1, int8x16_t v2, int8x16_t v3) { 1202 return vbslq_s8(v1, v2, v3); 1203} 1204 1205// CHECK-LABEL: define <8 x i16> @test_vbslq_s16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 { 1206// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> 1207// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 1208// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8> 1209// CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 1210// CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 1211// CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> 1212// CHECK: [[VBSL3_I:%.*]] = and <8 x i16> [[VBSL_I]], [[VBSL1_I]] 1213// CHECK: [[TMP3:%.*]] = xor <8 x i16> [[VBSL_I]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 1214// CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], [[VBSL2_I]] 1215// CHECK: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]] 1216// CHECK: ret <8 x i16> [[VBSL5_I]] 1217int16x8_t test_vbslq_s16(uint16x8_t v1, int16x8_t v2, int16x8_t v3) { 1218 return vbslq_s16(v1, v2, v3); 1219} 1220 1221// CHECK-LABEL: define <4 x i32> @test_vbslq_s32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 { 1222// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> 1223// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> 1224// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %v3 to <16 x i8> 1225// CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 1226// CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 1227// CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> 1228// CHECK: [[VBSL3_I:%.*]] = and <4 x i32> [[VBSL_I]], [[VBSL1_I]] 1229// CHECK: [[TMP3:%.*]] = xor <4 x i32> [[VBSL_I]], <i32 -1, i32 -1, i32 -1, i32 -1> 1230// CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], [[VBSL2_I]] 1231// CHECK: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]] 1232// CHECK: ret <4 x i32> [[VBSL5_I]] 1233int32x4_t test_vbslq_s32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) { 1234 return vbslq_s32(v1, v2, v3); 1235} 1236 1237// CHECK-LABEL: define <2 x i64> @test_vbslq_s64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) #0 { 1238// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8> 1239// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8> 1240// CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %v3 to <16 x i8> 1241// CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 1242// CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 1243// CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> 1244// CHECK: [[VBSL3_I:%.*]] = and <2 x i64> [[VBSL_I]], [[VBSL1_I]] 1245// CHECK: [[TMP3:%.*]] = xor <2 x i64> [[VBSL_I]], <i64 -1, i64 -1> 1246// CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], [[VBSL2_I]] 1247// CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]] 1248// CHECK: ret <2 x i64> [[VBSL5_I]] 1249int64x2_t test_vbslq_s64(uint64x2_t v1, int64x2_t v2, int64x2_t v3) { 1250 return vbslq_s64(v1, v2, v3); 1251} 1252 1253// CHECK-LABEL: define <16 x i8> @test_vbslq_u8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 { 1254// CHECK: [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2 1255// CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1256// CHECK: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3 1257// CHECK: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]] 1258// CHECK: ret <16 x i8> [[VBSL2_I]] 1259uint8x16_t test_vbslq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) { 1260 return vbslq_u8(v1, v2, v3); 1261} 1262 1263// CHECK-LABEL: define <8 x i16> @test_vbslq_u16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 { 1264// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> 1265// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 1266// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8> 1267// CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 1268// CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 1269// CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> 1270// CHECK: [[VBSL3_I:%.*]] = and <8 x i16> [[VBSL_I]], [[VBSL1_I]] 1271// CHECK: [[TMP3:%.*]] = xor <8 x i16> [[VBSL_I]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 1272// CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], [[VBSL2_I]] 1273// CHECK: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]] 1274// CHECK: ret <8 x i16> [[VBSL5_I]] 1275uint16x8_t test_vbslq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) { 1276 return vbslq_u16(v1, v2, v3); 1277} 1278 1279// CHECK-LABEL: define <4 x i32> @test_vbslq_u32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 { 1280// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> 1281// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> 1282// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %v3 to <16 x i8> 1283// CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 1284// CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 1285// CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> 1286// CHECK: [[VBSL3_I:%.*]] = and <4 x i32> [[VBSL_I]], [[VBSL1_I]] 1287// CHECK: [[TMP3:%.*]] = xor <4 x i32> [[VBSL_I]], <i32 -1, i32 -1, i32 -1, i32 -1> 1288// CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], [[VBSL2_I]] 1289// CHECK: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]] 1290// CHECK: ret <4 x i32> [[VBSL5_I]] 1291int32x4_t test_vbslq_u32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) { 1292 return vbslq_s32(v1, v2, v3); 1293} 1294 1295// CHECK-LABEL: define <2 x i64> @test_vbslq_u64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) #0 { 1296// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8> 1297// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8> 1298// CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %v3 to <16 x i8> 1299// CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 1300// CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 1301// CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> 1302// CHECK: [[VBSL3_I:%.*]] = and <2 x i64> [[VBSL_I]], [[VBSL1_I]] 1303// CHECK: [[TMP3:%.*]] = xor <2 x i64> [[VBSL_I]], <i64 -1, i64 -1> 1304// CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], [[VBSL2_I]] 1305// CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]] 1306// CHECK: ret <2 x i64> [[VBSL5_I]] 1307uint64x2_t test_vbslq_u64(uint64x2_t v1, uint64x2_t v2, uint64x2_t v3) { 1308 return vbslq_u64(v1, v2, v3); 1309} 1310 1311// CHECK-LABEL: define <4 x float> @test_vbslq_f32(<4 x i32> %v1, <4 x float> %v2, <4 x float> %v3) #0 { 1312// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> 1313// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> 1314// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8> 1315// CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 1316// CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 1317// CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> 1318// CHECK: [[VBSL3_I:%.*]] = and <4 x i32> [[VBSL_I]], [[VBSL1_I]] 1319// CHECK: [[TMP3:%.*]] = xor <4 x i32> [[VBSL_I]], <i32 -1, i32 -1, i32 -1, i32 -1> 1320// CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], [[VBSL2_I]] 1321// CHECK: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]] 1322// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[VBSL5_I]] to <4 x float> 1323// CHECK: ret <4 x float> [[TMP4]] 1324float32x4_t test_vbslq_f32(uint32x4_t v1, float32x4_t v2, float32x4_t v3) { 1325 return vbslq_f32(v1, v2, v3); 1326} 1327 1328// CHECK-LABEL: define <16 x i8> @test_vbslq_p8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 { 1329// CHECK: [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2 1330// CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1331// CHECK: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3 1332// CHECK: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]] 1333// CHECK: ret <16 x i8> [[VBSL2_I]] 1334poly8x16_t test_vbslq_p8(uint8x16_t v1, poly8x16_t v2, poly8x16_t v3) { 1335 return vbslq_p8(v1, v2, v3); 1336} 1337 1338// CHECK-LABEL: define <8 x i16> @test_vbslq_p16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 { 1339// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> 1340// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 1341// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8> 1342// CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 1343// CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 1344// CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> 1345// CHECK: [[VBSL3_I:%.*]] = and <8 x i16> [[VBSL_I]], [[VBSL1_I]] 1346// CHECK: [[TMP3:%.*]] = xor <8 x i16> [[VBSL_I]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 1347// CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], [[VBSL2_I]] 1348// CHECK: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]] 1349// CHECK: ret <8 x i16> [[VBSL5_I]] 1350poly16x8_t test_vbslq_p16(uint16x8_t v1, poly16x8_t v2, poly16x8_t v3) { 1351 return vbslq_p16(v1, v2, v3); 1352} 1353 1354// CHECK-LABEL: define <2 x double> @test_vbslq_f64(<2 x i64> %v1, <2 x double> %v2, <2 x double> %v3) #0 { 1355// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8> 1356// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> 1357// CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8> 1358// CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 1359// CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 1360// CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> 1361// CHECK: [[VBSL3_I:%.*]] = and <2 x i64> [[VBSL_I]], [[VBSL1_I]] 1362// CHECK: [[TMP3:%.*]] = xor <2 x i64> [[VBSL_I]], <i64 -1, i64 -1> 1363// CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], [[VBSL2_I]] 1364// CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]] 1365// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[VBSL5_I]] to <2 x double> 1366// CHECK: ret <2 x double> [[TMP4]] 1367float64x2_t test_vbslq_f64(uint64x2_t v1, float64x2_t v2, float64x2_t v3) { 1368 return vbslq_f64(v1, v2, v3); 1369} 1370 1371// CHECK-LABEL: define <2 x float> @test_vrecps_f32(<2 x float> %v1, <2 x float> %v2) #0 { 1372// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> 1373// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> 1374// CHECK: [[VRECPS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 1375// CHECK: [[VRECPS_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 1376// CHECK: [[VRECPS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frecps.v2f32(<2 x float> [[VRECPS_V_I]], <2 x float> [[VRECPS_V1_I]]) #4 1377// CHECK: [[VRECPS_V3_I:%.*]] = bitcast <2 x float> [[VRECPS_V2_I]] to <8 x i8> 1378// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRECPS_V3_I]] to <2 x float> 1379// CHECK: ret <2 x float> [[TMP2]] 1380float32x2_t test_vrecps_f32(float32x2_t v1, float32x2_t v2) { 1381 return vrecps_f32(v1, v2); 1382} 1383 1384// CHECK-LABEL: define <4 x float> @test_vrecpsq_f32(<4 x float> %v1, <4 x float> %v2) #0 { 1385// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> 1386// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> 1387// CHECK: [[VRECPSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 1388// CHECK: [[VRECPSQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 1389// CHECK: [[VRECPSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frecps.v4f32(<4 x float> [[VRECPSQ_V_I]], <4 x float> [[VRECPSQ_V1_I]]) #4 1390// CHECK: [[VRECPSQ_V3_I:%.*]] = bitcast <4 x float> [[VRECPSQ_V2_I]] to <16 x i8> 1391// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRECPSQ_V3_I]] to <4 x float> 1392// CHECK: ret <4 x float> [[TMP2]] 1393float32x4_t test_vrecpsq_f32(float32x4_t v1, float32x4_t v2) { 1394 return vrecpsq_f32(v1, v2); 1395} 1396 1397// CHECK-LABEL: define <2 x double> @test_vrecpsq_f64(<2 x double> %v1, <2 x double> %v2) #0 { 1398// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> 1399// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> 1400// CHECK: [[VRECPSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 1401// CHECK: [[VRECPSQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 1402// CHECK: [[VRECPSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frecps.v2f64(<2 x double> [[VRECPSQ_V_I]], <2 x double> [[VRECPSQ_V1_I]]) #4 1403// CHECK: [[VRECPSQ_V3_I:%.*]] = bitcast <2 x double> [[VRECPSQ_V2_I]] to <16 x i8> 1404// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRECPSQ_V3_I]] to <2 x double> 1405// CHECK: ret <2 x double> [[TMP2]] 1406float64x2_t test_vrecpsq_f64(float64x2_t v1, float64x2_t v2) { 1407 return vrecpsq_f64(v1, v2); 1408} 1409 1410// CHECK-LABEL: define <2 x float> @test_vrsqrts_f32(<2 x float> %v1, <2 x float> %v2) #0 { 1411// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> 1412// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> 1413// CHECK: [[VRSQRTS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 1414// CHECK: [[VRSQRTS_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 1415// CHECK: [[VRSQRTS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frsqrts.v2f32(<2 x float> [[VRSQRTS_V_I]], <2 x float> [[VRSQRTS_V1_I]]) #4 1416// CHECK: [[VRSQRTS_V3_I:%.*]] = bitcast <2 x float> [[VRSQRTS_V2_I]] to <8 x i8> 1417// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSQRTS_V3_I]] to <2 x float> 1418// CHECK: ret <2 x float> [[TMP2]] 1419float32x2_t test_vrsqrts_f32(float32x2_t v1, float32x2_t v2) { 1420 return vrsqrts_f32(v1, v2); 1421} 1422 1423// CHECK-LABEL: define <4 x float> @test_vrsqrtsq_f32(<4 x float> %v1, <4 x float> %v2) #0 { 1424// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> 1425// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> 1426// CHECK: [[VRSQRTSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 1427// CHECK: [[VRSQRTSQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 1428// CHECK: [[VRSQRTSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frsqrts.v4f32(<4 x float> [[VRSQRTSQ_V_I]], <4 x float> [[VRSQRTSQ_V1_I]]) #4 1429// CHECK: [[VRSQRTSQ_V3_I:%.*]] = bitcast <4 x float> [[VRSQRTSQ_V2_I]] to <16 x i8> 1430// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSQRTSQ_V3_I]] to <4 x float> 1431// CHECK: ret <4 x float> [[TMP2]] 1432float32x4_t test_vrsqrtsq_f32(float32x4_t v1, float32x4_t v2) { 1433 return vrsqrtsq_f32(v1, v2); 1434} 1435 1436// CHECK-LABEL: define <2 x double> @test_vrsqrtsq_f64(<2 x double> %v1, <2 x double> %v2) #0 { 1437// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> 1438// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> 1439// CHECK: [[VRSQRTSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 1440// CHECK: [[VRSQRTSQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 1441// CHECK: [[VRSQRTSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frsqrts.v2f64(<2 x double> [[VRSQRTSQ_V_I]], <2 x double> [[VRSQRTSQ_V1_I]]) #4 1442// CHECK: [[VRSQRTSQ_V3_I:%.*]] = bitcast <2 x double> [[VRSQRTSQ_V2_I]] to <16 x i8> 1443// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSQRTSQ_V3_I]] to <2 x double> 1444// CHECK: ret <2 x double> [[TMP2]] 1445float64x2_t test_vrsqrtsq_f64(float64x2_t v1, float64x2_t v2) { 1446 return vrsqrtsq_f64(v1, v2); 1447} 1448 1449// CHECK-LABEL: define <2 x i32> @test_vcage_f32(<2 x float> %v1, <2 x float> %v2) #0 { 1450// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> 1451// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> 1452// CHECK: [[VCAGE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 1453// CHECK: [[VCAGE_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 1454// CHECK: [[VCAGE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> [[VCAGE_V_I]], <2 x float> [[VCAGE_V1_I]]) #4 1455// CHECK: ret <2 x i32> [[VCAGE_V2_I]] 1456uint32x2_t test_vcage_f32(float32x2_t v1, float32x2_t v2) { 1457 return vcage_f32(v1, v2); 1458} 1459 1460// CHECK-LABEL: define <1 x i64> @test_vcage_f64(<1 x double> %a, <1 x double> %b) #0 { 1461// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 1462// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 1463// CHECK: [[VCAGE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 1464// CHECK: [[VCAGE_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> 1465// CHECK: [[VCAGE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> [[VCAGE_V_I]], <1 x double> [[VCAGE_V1_I]]) #4 1466// CHECK: ret <1 x i64> [[VCAGE_V2_I]] 1467uint64x1_t test_vcage_f64(float64x1_t a, float64x1_t b) { 1468 return vcage_f64(a, b); 1469} 1470 1471// CHECK-LABEL: define <4 x i32> @test_vcageq_f32(<4 x float> %v1, <4 x float> %v2) #0 { 1472// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> 1473// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> 1474// CHECK: [[VCAGEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 1475// CHECK: [[VCAGEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 1476// CHECK: [[VCAGEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> [[VCAGEQ_V_I]], <4 x float> [[VCAGEQ_V1_I]]) #4 1477// CHECK: ret <4 x i32> [[VCAGEQ_V2_I]] 1478uint32x4_t test_vcageq_f32(float32x4_t v1, float32x4_t v2) { 1479 return vcageq_f32(v1, v2); 1480} 1481 1482// CHECK-LABEL: define <2 x i64> @test_vcageq_f64(<2 x double> %v1, <2 x double> %v2) #0 { 1483// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> 1484// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> 1485// CHECK: [[VCAGEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 1486// CHECK: [[VCAGEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 1487// CHECK: [[VCAGEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> [[VCAGEQ_V_I]], <2 x double> [[VCAGEQ_V1_I]]) #4 1488// CHECK: ret <2 x i64> [[VCAGEQ_V2_I]] 1489uint64x2_t test_vcageq_f64(float64x2_t v1, float64x2_t v2) { 1490 return vcageq_f64(v1, v2); 1491} 1492 1493// CHECK-LABEL: define <2 x i32> @test_vcagt_f32(<2 x float> %v1, <2 x float> %v2) #0 { 1494// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> 1495// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> 1496// CHECK: [[VCAGT_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 1497// CHECK: [[VCAGT_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 1498// CHECK: [[VCAGT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> [[VCAGT_V_I]], <2 x float> [[VCAGT_V1_I]]) #4 1499// CHECK: ret <2 x i32> [[VCAGT_V2_I]] 1500uint32x2_t test_vcagt_f32(float32x2_t v1, float32x2_t v2) { 1501 return vcagt_f32(v1, v2); 1502} 1503 1504// CHECK-LABEL: define <1 x i64> @test_vcagt_f64(<1 x double> %a, <1 x double> %b) #0 { 1505// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 1506// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 1507// CHECK: [[VCAGT_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 1508// CHECK: [[VCAGT_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> 1509// CHECK: [[VCAGT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> [[VCAGT_V_I]], <1 x double> [[VCAGT_V1_I]]) #4 1510// CHECK: ret <1 x i64> [[VCAGT_V2_I]] 1511uint64x1_t test_vcagt_f64(float64x1_t a, float64x1_t b) { 1512 return vcagt_f64(a, b); 1513} 1514 1515// CHECK-LABEL: define <4 x i32> @test_vcagtq_f32(<4 x float> %v1, <4 x float> %v2) #0 { 1516// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> 1517// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> 1518// CHECK: [[VCAGTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 1519// CHECK: [[VCAGTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 1520// CHECK: [[VCAGTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> [[VCAGTQ_V_I]], <4 x float> [[VCAGTQ_V1_I]]) #4 1521// CHECK: ret <4 x i32> [[VCAGTQ_V2_I]] 1522uint32x4_t test_vcagtq_f32(float32x4_t v1, float32x4_t v2) { 1523 return vcagtq_f32(v1, v2); 1524} 1525 1526// CHECK-LABEL: define <2 x i64> @test_vcagtq_f64(<2 x double> %v1, <2 x double> %v2) #0 { 1527// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> 1528// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> 1529// CHECK: [[VCAGTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 1530// CHECK: [[VCAGTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 1531// CHECK: [[VCAGTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> [[VCAGTQ_V_I]], <2 x double> [[VCAGTQ_V1_I]]) #4 1532// CHECK: ret <2 x i64> [[VCAGTQ_V2_I]] 1533uint64x2_t test_vcagtq_f64(float64x2_t v1, float64x2_t v2) { 1534 return vcagtq_f64(v1, v2); 1535} 1536 1537// CHECK-LABEL: define <2 x i32> @test_vcale_f32(<2 x float> %v1, <2 x float> %v2) #0 { 1538// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> 1539// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> 1540// CHECK: [[VCALE_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 1541// CHECK: [[VCALE_V1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 1542// CHECK: [[VCALE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> [[VCALE_V_I]], <2 x float> [[VCALE_V1_I]]) #4 1543// CHECK: ret <2 x i32> [[VCALE_V2_I]] 1544uint32x2_t test_vcale_f32(float32x2_t v1, float32x2_t v2) { 1545 return vcale_f32(v1, v2); 1546 // Using registers other than v0, v1 are possible, but would be odd. 1547} 1548 1549// CHECK-LABEL: define <1 x i64> @test_vcale_f64(<1 x double> %a, <1 x double> %b) #0 { 1550// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 1551// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 1552// CHECK: [[VCALE_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> 1553// CHECK: [[VCALE_V1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 1554// CHECK: [[VCALE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> [[VCALE_V_I]], <1 x double> [[VCALE_V1_I]]) #4 1555// CHECK: ret <1 x i64> [[VCALE_V2_I]] 1556uint64x1_t test_vcale_f64(float64x1_t a, float64x1_t b) { 1557 return vcale_f64(a, b); 1558} 1559 1560// CHECK-LABEL: define <4 x i32> @test_vcaleq_f32(<4 x float> %v1, <4 x float> %v2) #0 { 1561// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> 1562// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> 1563// CHECK: [[VCALEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 1564// CHECK: [[VCALEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 1565// CHECK: [[VCALEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> [[VCALEQ_V_I]], <4 x float> [[VCALEQ_V1_I]]) #4 1566// CHECK: ret <4 x i32> [[VCALEQ_V2_I]] 1567uint32x4_t test_vcaleq_f32(float32x4_t v1, float32x4_t v2) { 1568 return vcaleq_f32(v1, v2); 1569 // Using registers other than v0, v1 are possible, but would be odd. 1570} 1571 1572// CHECK-LABEL: define <2 x i64> @test_vcaleq_f64(<2 x double> %v1, <2 x double> %v2) #0 { 1573// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> 1574// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> 1575// CHECK: [[VCALEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 1576// CHECK: [[VCALEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 1577// CHECK: [[VCALEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> [[VCALEQ_V_I]], <2 x double> [[VCALEQ_V1_I]]) #4 1578// CHECK: ret <2 x i64> [[VCALEQ_V2_I]] 1579uint64x2_t test_vcaleq_f64(float64x2_t v1, float64x2_t v2) { 1580 return vcaleq_f64(v1, v2); 1581 // Using registers other than v0, v1 are possible, but would be odd. 1582} 1583 1584// CHECK-LABEL: define <2 x i32> @test_vcalt_f32(<2 x float> %v1, <2 x float> %v2) #0 { 1585// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> 1586// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> 1587// CHECK: [[VCALT_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 1588// CHECK: [[VCALT_V1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 1589// CHECK: [[VCALT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> [[VCALT_V_I]], <2 x float> [[VCALT_V1_I]]) #4 1590// CHECK: ret <2 x i32> [[VCALT_V2_I]] 1591uint32x2_t test_vcalt_f32(float32x2_t v1, float32x2_t v2) { 1592 return vcalt_f32(v1, v2); 1593 // Using registers other than v0, v1 are possible, but would be odd. 1594} 1595 1596// CHECK-LABEL: define <1 x i64> @test_vcalt_f64(<1 x double> %a, <1 x double> %b) #0 { 1597// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 1598// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 1599// CHECK: [[VCALT_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> 1600// CHECK: [[VCALT_V1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 1601// CHECK: [[VCALT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> [[VCALT_V_I]], <1 x double> [[VCALT_V1_I]]) #4 1602// CHECK: ret <1 x i64> [[VCALT_V2_I]] 1603uint64x1_t test_vcalt_f64(float64x1_t a, float64x1_t b) { 1604 return vcalt_f64(a, b); 1605} 1606 1607// CHECK-LABEL: define <4 x i32> @test_vcaltq_f32(<4 x float> %v1, <4 x float> %v2) #0 { 1608// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> 1609// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> 1610// CHECK: [[VCALTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 1611// CHECK: [[VCALTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 1612// CHECK: [[VCALTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> [[VCALTQ_V_I]], <4 x float> [[VCALTQ_V1_I]]) #4 1613// CHECK: ret <4 x i32> [[VCALTQ_V2_I]] 1614uint32x4_t test_vcaltq_f32(float32x4_t v1, float32x4_t v2) { 1615 return vcaltq_f32(v1, v2); 1616 // Using registers other than v0, v1 are possible, but would be odd. 1617} 1618 1619// CHECK-LABEL: define <2 x i64> @test_vcaltq_f64(<2 x double> %v1, <2 x double> %v2) #0 { 1620// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> 1621// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> 1622// CHECK: [[VCALTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 1623// CHECK: [[VCALTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 1624// CHECK: [[VCALTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> [[VCALTQ_V_I]], <2 x double> [[VCALTQ_V1_I]]) #4 1625// CHECK: ret <2 x i64> [[VCALTQ_V2_I]] 1626uint64x2_t test_vcaltq_f64(float64x2_t v1, float64x2_t v2) { 1627 return vcaltq_f64(v1, v2); 1628 // Using registers other than v0, v1 are possible, but would be odd. 1629} 1630 1631// CHECK-LABEL: define <8 x i8> @test_vtst_s8(<8 x i8> %v1, <8 x i8> %v2) #0 { 1632// CHECK: [[TMP0:%.*]] = and <8 x i8> %v1, %v2 1633// CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer 1634// CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8> 1635// CHECK: ret <8 x i8> [[VTST_I]] 1636uint8x8_t test_vtst_s8(int8x8_t v1, int8x8_t v2) { 1637 return vtst_s8(v1, v2); 1638} 1639 1640// CHECK-LABEL: define <4 x i16> @test_vtst_s16(<4 x i16> %v1, <4 x i16> %v2) #0 { 1641// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> 1642// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 1643// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 1644// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 1645// CHECK: [[TMP4:%.*]] = and <4 x i16> [[TMP2]], [[TMP3]] 1646// CHECK: [[TMP5:%.*]] = icmp ne <4 x i16> [[TMP4]], zeroinitializer 1647// CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i16> 1648// CHECK: ret <4 x i16> [[VTST_I]] 1649uint16x4_t test_vtst_s16(int16x4_t v1, int16x4_t v2) { 1650 return vtst_s16(v1, v2); 1651} 1652 1653// CHECK-LABEL: define <2 x i32> @test_vtst_s32(<2 x i32> %v1, <2 x i32> %v2) #0 { 1654// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> 1655// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> 1656// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 1657// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 1658// CHECK: [[TMP4:%.*]] = and <2 x i32> [[TMP2]], [[TMP3]] 1659// CHECK: [[TMP5:%.*]] = icmp ne <2 x i32> [[TMP4]], zeroinitializer 1660// CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i32> 1661// CHECK: ret <2 x i32> [[VTST_I]] 1662uint32x2_t test_vtst_s32(int32x2_t v1, int32x2_t v2) { 1663 return vtst_s32(v1, v2); 1664} 1665 1666// CHECK-LABEL: define <8 x i8> @test_vtst_u8(<8 x i8> %v1, <8 x i8> %v2) #0 { 1667// CHECK: [[TMP0:%.*]] = and <8 x i8> %v1, %v2 1668// CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer 1669// CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8> 1670// CHECK: ret <8 x i8> [[VTST_I]] 1671uint8x8_t test_vtst_u8(uint8x8_t v1, uint8x8_t v2) { 1672 return vtst_u8(v1, v2); 1673} 1674 1675// CHECK-LABEL: define <4 x i16> @test_vtst_u16(<4 x i16> %v1, <4 x i16> %v2) #0 { 1676// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> 1677// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 1678// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 1679// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 1680// CHECK: [[TMP4:%.*]] = and <4 x i16> [[TMP2]], [[TMP3]] 1681// CHECK: [[TMP5:%.*]] = icmp ne <4 x i16> [[TMP4]], zeroinitializer 1682// CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i16> 1683// CHECK: ret <4 x i16> [[VTST_I]] 1684uint16x4_t test_vtst_u16(uint16x4_t v1, uint16x4_t v2) { 1685 return vtst_u16(v1, v2); 1686} 1687 1688// CHECK-LABEL: define <2 x i32> @test_vtst_u32(<2 x i32> %v1, <2 x i32> %v2) #0 { 1689// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> 1690// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> 1691// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 1692// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 1693// CHECK: [[TMP4:%.*]] = and <2 x i32> [[TMP2]], [[TMP3]] 1694// CHECK: [[TMP5:%.*]] = icmp ne <2 x i32> [[TMP4]], zeroinitializer 1695// CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i32> 1696// CHECK: ret <2 x i32> [[VTST_I]] 1697uint32x2_t test_vtst_u32(uint32x2_t v1, uint32x2_t v2) { 1698 return vtst_u32(v1, v2); 1699} 1700 1701// CHECK-LABEL: define <16 x i8> @test_vtstq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 { 1702// CHECK: [[TMP0:%.*]] = and <16 x i8> %v1, %v2 1703// CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer 1704// CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8> 1705// CHECK: ret <16 x i8> [[VTST_I]] 1706uint8x16_t test_vtstq_s8(int8x16_t v1, int8x16_t v2) { 1707 return vtstq_s8(v1, v2); 1708} 1709 1710// CHECK-LABEL: define <8 x i16> @test_vtstq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 { 1711// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> 1712// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 1713// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 1714// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 1715// CHECK: [[TMP4:%.*]] = and <8 x i16> [[TMP2]], [[TMP3]] 1716// CHECK: [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP4]], zeroinitializer 1717// CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16> 1718// CHECK: ret <8 x i16> [[VTST_I]] 1719uint16x8_t test_vtstq_s16(int16x8_t v1, int16x8_t v2) { 1720 return vtstq_s16(v1, v2); 1721} 1722 1723// CHECK-LABEL: define <4 x i32> @test_vtstq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 { 1724// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> 1725// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> 1726// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 1727// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 1728// CHECK: [[TMP4:%.*]] = and <4 x i32> [[TMP2]], [[TMP3]] 1729// CHECK: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer 1730// CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32> 1731// CHECK: ret <4 x i32> [[VTST_I]] 1732uint32x4_t test_vtstq_s32(int32x4_t v1, int32x4_t v2) { 1733 return vtstq_s32(v1, v2); 1734} 1735 1736// CHECK-LABEL: define <16 x i8> @test_vtstq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 { 1737// CHECK: [[TMP0:%.*]] = and <16 x i8> %v1, %v2 1738// CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer 1739// CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8> 1740// CHECK: ret <16 x i8> [[VTST_I]] 1741uint8x16_t test_vtstq_u8(uint8x16_t v1, uint8x16_t v2) { 1742 return vtstq_u8(v1, v2); 1743} 1744 1745// CHECK-LABEL: define <8 x i16> @test_vtstq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 { 1746// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> 1747// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 1748// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 1749// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 1750// CHECK: [[TMP4:%.*]] = and <8 x i16> [[TMP2]], [[TMP3]] 1751// CHECK: [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP4]], zeroinitializer 1752// CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16> 1753// CHECK: ret <8 x i16> [[VTST_I]] 1754uint16x8_t test_vtstq_u16(uint16x8_t v1, uint16x8_t v2) { 1755 return vtstq_u16(v1, v2); 1756} 1757 1758// CHECK-LABEL: define <4 x i32> @test_vtstq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 { 1759// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> 1760// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> 1761// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 1762// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 1763// CHECK: [[TMP4:%.*]] = and <4 x i32> [[TMP2]], [[TMP3]] 1764// CHECK: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer 1765// CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32> 1766// CHECK: ret <4 x i32> [[VTST_I]] 1767uint32x4_t test_vtstq_u32(uint32x4_t v1, uint32x4_t v2) { 1768 return vtstq_u32(v1, v2); 1769} 1770 1771// CHECK-LABEL: define <2 x i64> @test_vtstq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 { 1772// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8> 1773// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8> 1774// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 1775// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 1776// CHECK: [[TMP4:%.*]] = and <2 x i64> [[TMP2]], [[TMP3]] 1777// CHECK: [[TMP5:%.*]] = icmp ne <2 x i64> [[TMP4]], zeroinitializer 1778// CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i64> 1779// CHECK: ret <2 x i64> [[VTST_I]] 1780uint64x2_t test_vtstq_s64(int64x2_t v1, int64x2_t v2) { 1781 return vtstq_s64(v1, v2); 1782} 1783 1784// CHECK-LABEL: define <2 x i64> @test_vtstq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 { 1785// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8> 1786// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8> 1787// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 1788// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 1789// CHECK: [[TMP4:%.*]] = and <2 x i64> [[TMP2]], [[TMP3]] 1790// CHECK: [[TMP5:%.*]] = icmp ne <2 x i64> [[TMP4]], zeroinitializer 1791// CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i64> 1792// CHECK: ret <2 x i64> [[VTST_I]] 1793uint64x2_t test_vtstq_u64(uint64x2_t v1, uint64x2_t v2) { 1794 return vtstq_u64(v1, v2); 1795} 1796 1797// CHECK-LABEL: define <8 x i8> @test_vtst_p8(<8 x i8> %v1, <8 x i8> %v2) #0 { 1798// CHECK: [[TMP0:%.*]] = and <8 x i8> %v1, %v2 1799// CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer 1800// CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8> 1801// CHECK: ret <8 x i8> [[VTST_I]] 1802uint8x8_t test_vtst_p8(poly8x8_t v1, poly8x8_t v2) { 1803 return vtst_p8(v1, v2); 1804} 1805 1806// CHECK-LABEL: define <4 x i16> @test_vtst_p16(<4 x i16> %v1, <4 x i16> %v2) #0 { 1807// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> 1808// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 1809// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 1810// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 1811// CHECK: [[TMP4:%.*]] = and <4 x i16> [[TMP2]], [[TMP3]] 1812// CHECK: [[TMP5:%.*]] = icmp ne <4 x i16> [[TMP4]], zeroinitializer 1813// CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i16> 1814// CHECK: ret <4 x i16> [[VTST_I]] 1815uint16x4_t test_vtst_p16(poly16x4_t v1, poly16x4_t v2) { 1816 return vtst_p16(v1, v2); 1817} 1818 1819// CHECK-LABEL: define <16 x i8> @test_vtstq_p8(<16 x i8> %v1, <16 x i8> %v2) #0 { 1820// CHECK: [[TMP0:%.*]] = and <16 x i8> %v1, %v2 1821// CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer 1822// CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8> 1823// CHECK: ret <16 x i8> [[VTST_I]] 1824uint8x16_t test_vtstq_p8(poly8x16_t v1, poly8x16_t v2) { 1825 return vtstq_p8(v1, v2); 1826} 1827 1828// CHECK-LABEL: define <8 x i16> @test_vtstq_p16(<8 x i16> %v1, <8 x i16> %v2) #0 { 1829// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> 1830// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 1831// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 1832// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 1833// CHECK: [[TMP4:%.*]] = and <8 x i16> [[TMP2]], [[TMP3]] 1834// CHECK: [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP4]], zeroinitializer 1835// CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16> 1836// CHECK: ret <8 x i16> [[VTST_I]] 1837uint16x8_t test_vtstq_p16(poly16x8_t v1, poly16x8_t v2) { 1838 return vtstq_p16(v1, v2); 1839} 1840 1841// CHECK-LABEL: define <1 x i64> @test_vtst_s64(<1 x i64> %a, <1 x i64> %b) #0 { 1842// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 1843// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 1844// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 1845// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 1846// CHECK: [[TMP4:%.*]] = and <1 x i64> [[TMP2]], [[TMP3]] 1847// CHECK: [[TMP5:%.*]] = icmp ne <1 x i64> [[TMP4]], zeroinitializer 1848// CHECK: [[VTST_I:%.*]] = sext <1 x i1> [[TMP5]] to <1 x i64> 1849// CHECK: ret <1 x i64> [[VTST_I]] 1850uint64x1_t test_vtst_s64(int64x1_t a, int64x1_t b) { 1851 return vtst_s64(a, b); 1852} 1853 1854// CHECK-LABEL: define <1 x i64> @test_vtst_u64(<1 x i64> %a, <1 x i64> %b) #0 { 1855// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 1856// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 1857// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 1858// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 1859// CHECK: [[TMP4:%.*]] = and <1 x i64> [[TMP2]], [[TMP3]] 1860// CHECK: [[TMP5:%.*]] = icmp ne <1 x i64> [[TMP4]], zeroinitializer 1861// CHECK: [[VTST_I:%.*]] = sext <1 x i1> [[TMP5]] to <1 x i64> 1862// CHECK: ret <1 x i64> [[VTST_I]] 1863uint64x1_t test_vtst_u64(uint64x1_t a, uint64x1_t b) { 1864 return vtst_u64(a, b); 1865} 1866 1867// CHECK-LABEL: define <8 x i8> @test_vceq_s8(<8 x i8> %v1, <8 x i8> %v2) #0 { 1868// CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2 1869// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 1870// CHECK: ret <8 x i8> [[SEXT_I]] 1871uint8x8_t test_vceq_s8(int8x8_t v1, int8x8_t v2) { 1872 return vceq_s8(v1, v2); 1873} 1874 1875// CHECK-LABEL: define <4 x i16> @test_vceq_s16(<4 x i16> %v1, <4 x i16> %v2) #0 { 1876// CHECK: [[CMP_I:%.*]] = icmp eq <4 x i16> %v1, %v2 1877// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 1878// CHECK: ret <4 x i16> [[SEXT_I]] 1879uint16x4_t test_vceq_s16(int16x4_t v1, int16x4_t v2) { 1880 return vceq_s16(v1, v2); 1881} 1882 1883// CHECK-LABEL: define <2 x i32> @test_vceq_s32(<2 x i32> %v1, <2 x i32> %v2) #0 { 1884// CHECK: [[CMP_I:%.*]] = icmp eq <2 x i32> %v1, %v2 1885// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 1886// CHECK: ret <2 x i32> [[SEXT_I]] 1887uint32x2_t test_vceq_s32(int32x2_t v1, int32x2_t v2) { 1888 return vceq_s32(v1, v2); 1889} 1890 1891// CHECK-LABEL: define <1 x i64> @test_vceq_s64(<1 x i64> %a, <1 x i64> %b) #0 { 1892// CHECK: [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b 1893// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 1894// CHECK: ret <1 x i64> [[SEXT_I]] 1895uint64x1_t test_vceq_s64(int64x1_t a, int64x1_t b) { 1896 return vceq_s64(a, b); 1897} 1898 1899// CHECK-LABEL: define <1 x i64> @test_vceq_u64(<1 x i64> %a, <1 x i64> %b) #0 { 1900// CHECK: [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b 1901// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 1902// CHECK: ret <1 x i64> [[SEXT_I]] 1903uint64x1_t test_vceq_u64(uint64x1_t a, uint64x1_t b) { 1904 return vceq_u64(a, b); 1905} 1906 1907// CHECK-LABEL: define <2 x i32> @test_vceq_f32(<2 x float> %v1, <2 x float> %v2) #0 { 1908// CHECK: [[CMP_I:%.*]] = fcmp oeq <2 x float> %v1, %v2 1909// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 1910// CHECK: ret <2 x i32> [[SEXT_I]] 1911uint32x2_t test_vceq_f32(float32x2_t v1, float32x2_t v2) { 1912 return vceq_f32(v1, v2); 1913} 1914 1915// CHECK-LABEL: define <1 x i64> @test_vceq_f64(<1 x double> %a, <1 x double> %b) #0 { 1916// CHECK: [[CMP_I:%.*]] = fcmp oeq <1 x double> %a, %b 1917// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 1918// CHECK: ret <1 x i64> [[SEXT_I]] 1919uint64x1_t test_vceq_f64(float64x1_t a, float64x1_t b) { 1920 return vceq_f64(a, b); 1921} 1922 1923// CHECK-LABEL: define <8 x i8> @test_vceq_u8(<8 x i8> %v1, <8 x i8> %v2) #0 { 1924// CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2 1925// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 1926// CHECK: ret <8 x i8> [[SEXT_I]] 1927uint8x8_t test_vceq_u8(uint8x8_t v1, uint8x8_t v2) { 1928 return vceq_u8(v1, v2); 1929} 1930 1931// CHECK-LABEL: define <4 x i16> @test_vceq_u16(<4 x i16> %v1, <4 x i16> %v2) #0 { 1932// CHECK: [[CMP_I:%.*]] = icmp eq <4 x i16> %v1, %v2 1933// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 1934// CHECK: ret <4 x i16> [[SEXT_I]] 1935uint16x4_t test_vceq_u16(uint16x4_t v1, uint16x4_t v2) { 1936 return vceq_u16(v1, v2); 1937} 1938 1939// CHECK-LABEL: define <2 x i32> @test_vceq_u32(<2 x i32> %v1, <2 x i32> %v2) #0 { 1940// CHECK: [[CMP_I:%.*]] = icmp eq <2 x i32> %v1, %v2 1941// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 1942// CHECK: ret <2 x i32> [[SEXT_I]] 1943uint32x2_t test_vceq_u32(uint32x2_t v1, uint32x2_t v2) { 1944 return vceq_u32(v1, v2); 1945} 1946 1947// CHECK-LABEL: define <8 x i8> @test_vceq_p8(<8 x i8> %v1, <8 x i8> %v2) #0 { 1948// CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2 1949// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 1950// CHECK: ret <8 x i8> [[SEXT_I]] 1951uint8x8_t test_vceq_p8(poly8x8_t v1, poly8x8_t v2) { 1952 return vceq_p8(v1, v2); 1953} 1954 1955// CHECK-LABEL: define <16 x i8> @test_vceqq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 { 1956// CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2 1957// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 1958// CHECK: ret <16 x i8> [[SEXT_I]] 1959uint8x16_t test_vceqq_s8(int8x16_t v1, int8x16_t v2) { 1960 return vceqq_s8(v1, v2); 1961} 1962 1963// CHECK-LABEL: define <8 x i16> @test_vceqq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 { 1964// CHECK: [[CMP_I:%.*]] = icmp eq <8 x i16> %v1, %v2 1965// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 1966// CHECK: ret <8 x i16> [[SEXT_I]] 1967uint16x8_t test_vceqq_s16(int16x8_t v1, int16x8_t v2) { 1968 return vceqq_s16(v1, v2); 1969} 1970 1971// CHECK-LABEL: define <4 x i32> @test_vceqq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 { 1972// CHECK: [[CMP_I:%.*]] = icmp eq <4 x i32> %v1, %v2 1973// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 1974// CHECK: ret <4 x i32> [[SEXT_I]] 1975uint32x4_t test_vceqq_s32(int32x4_t v1, int32x4_t v2) { 1976 return vceqq_s32(v1, v2); 1977} 1978 1979// CHECK-LABEL: define <4 x i32> @test_vceqq_f32(<4 x float> %v1, <4 x float> %v2) #0 { 1980// CHECK: [[CMP_I:%.*]] = fcmp oeq <4 x float> %v1, %v2 1981// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 1982// CHECK: ret <4 x i32> [[SEXT_I]] 1983uint32x4_t test_vceqq_f32(float32x4_t v1, float32x4_t v2) { 1984 return vceqq_f32(v1, v2); 1985} 1986 1987// CHECK-LABEL: define <16 x i8> @test_vceqq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 { 1988// CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2 1989// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 1990// CHECK: ret <16 x i8> [[SEXT_I]] 1991uint8x16_t test_vceqq_u8(uint8x16_t v1, uint8x16_t v2) { 1992 return vceqq_u8(v1, v2); 1993} 1994 1995// CHECK-LABEL: define <8 x i16> @test_vceqq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 { 1996// CHECK: [[CMP_I:%.*]] = icmp eq <8 x i16> %v1, %v2 1997// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 1998// CHECK: ret <8 x i16> [[SEXT_I]] 1999uint16x8_t test_vceqq_u16(uint16x8_t v1, uint16x8_t v2) { 2000 return vceqq_u16(v1, v2); 2001} 2002 2003// CHECK-LABEL: define <4 x i32> @test_vceqq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 { 2004// CHECK: [[CMP_I:%.*]] = icmp eq <4 x i32> %v1, %v2 2005// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 2006// CHECK: ret <4 x i32> [[SEXT_I]] 2007uint32x4_t test_vceqq_u32(uint32x4_t v1, uint32x4_t v2) { 2008 return vceqq_u32(v1, v2); 2009} 2010 2011// CHECK-LABEL: define <16 x i8> @test_vceqq_p8(<16 x i8> %v1, <16 x i8> %v2) #0 { 2012// CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2 2013// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 2014// CHECK: ret <16 x i8> [[SEXT_I]] 2015uint8x16_t test_vceqq_p8(poly8x16_t v1, poly8x16_t v2) { 2016 return vceqq_p8(v1, v2); 2017} 2018 2019 2020// CHECK-LABEL: define <2 x i64> @test_vceqq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 { 2021// CHECK: [[CMP_I:%.*]] = icmp eq <2 x i64> %v1, %v2 2022// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 2023// CHECK: ret <2 x i64> [[SEXT_I]] 2024uint64x2_t test_vceqq_s64(int64x2_t v1, int64x2_t v2) { 2025 return vceqq_s64(v1, v2); 2026} 2027 2028// CHECK-LABEL: define <2 x i64> @test_vceqq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 { 2029// CHECK: [[CMP_I:%.*]] = icmp eq <2 x i64> %v1, %v2 2030// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 2031// CHECK: ret <2 x i64> [[SEXT_I]] 2032uint64x2_t test_vceqq_u64(uint64x2_t v1, uint64x2_t v2) { 2033 return vceqq_u64(v1, v2); 2034} 2035 2036// CHECK-LABEL: define <2 x i64> @test_vceqq_f64(<2 x double> %v1, <2 x double> %v2) #0 { 2037// CHECK: [[CMP_I:%.*]] = fcmp oeq <2 x double> %v1, %v2 2038// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 2039// CHECK: ret <2 x i64> [[SEXT_I]] 2040uint64x2_t test_vceqq_f64(float64x2_t v1, float64x2_t v2) { 2041 return vceqq_f64(v1, v2); 2042} 2043// CHECK-LABEL: define <8 x i8> @test_vcge_s8(<8 x i8> %v1, <8 x i8> %v2) #0 { 2044// CHECK: [[CMP_I:%.*]] = icmp sge <8 x i8> %v1, %v2 2045// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 2046// CHECK: ret <8 x i8> [[SEXT_I]] 2047uint8x8_t test_vcge_s8(int8x8_t v1, int8x8_t v2) { 2048 return vcge_s8(v1, v2); 2049} 2050 2051// CHECK-LABEL: define <4 x i16> @test_vcge_s16(<4 x i16> %v1, <4 x i16> %v2) #0 { 2052// CHECK: [[CMP_I:%.*]] = icmp sge <4 x i16> %v1, %v2 2053// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 2054// CHECK: ret <4 x i16> [[SEXT_I]] 2055uint16x4_t test_vcge_s16(int16x4_t v1, int16x4_t v2) { 2056 return vcge_s16(v1, v2); 2057} 2058 2059// CHECK-LABEL: define <2 x i32> @test_vcge_s32(<2 x i32> %v1, <2 x i32> %v2) #0 { 2060// CHECK: [[CMP_I:%.*]] = icmp sge <2 x i32> %v1, %v2 2061// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 2062// CHECK: ret <2 x i32> [[SEXT_I]] 2063uint32x2_t test_vcge_s32(int32x2_t v1, int32x2_t v2) { 2064 return vcge_s32(v1, v2); 2065} 2066 2067// CHECK-LABEL: define <1 x i64> @test_vcge_s64(<1 x i64> %a, <1 x i64> %b) #0 { 2068// CHECK: [[CMP_I:%.*]] = icmp sge <1 x i64> %a, %b 2069// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 2070// CHECK: ret <1 x i64> [[SEXT_I]] 2071uint64x1_t test_vcge_s64(int64x1_t a, int64x1_t b) { 2072 return vcge_s64(a, b); 2073} 2074 2075// CHECK-LABEL: define <1 x i64> @test_vcge_u64(<1 x i64> %a, <1 x i64> %b) #0 { 2076// CHECK: [[CMP_I:%.*]] = icmp uge <1 x i64> %a, %b 2077// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 2078// CHECK: ret <1 x i64> [[SEXT_I]] 2079uint64x1_t test_vcge_u64(uint64x1_t a, uint64x1_t b) { 2080 return vcge_u64(a, b); 2081} 2082 2083// CHECK-LABEL: define <2 x i32> @test_vcge_f32(<2 x float> %v1, <2 x float> %v2) #0 { 2084// CHECK: [[CMP_I:%.*]] = fcmp oge <2 x float> %v1, %v2 2085// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 2086// CHECK: ret <2 x i32> [[SEXT_I]] 2087uint32x2_t test_vcge_f32(float32x2_t v1, float32x2_t v2) { 2088 return vcge_f32(v1, v2); 2089} 2090 2091// CHECK-LABEL: define <1 x i64> @test_vcge_f64(<1 x double> %a, <1 x double> %b) #0 { 2092// CHECK: [[CMP_I:%.*]] = fcmp oge <1 x double> %a, %b 2093// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 2094// CHECK: ret <1 x i64> [[SEXT_I]] 2095uint64x1_t test_vcge_f64(float64x1_t a, float64x1_t b) { 2096 return vcge_f64(a, b); 2097} 2098 2099// CHECK-LABEL: define <8 x i8> @test_vcge_u8(<8 x i8> %v1, <8 x i8> %v2) #0 { 2100// CHECK: [[CMP_I:%.*]] = icmp uge <8 x i8> %v1, %v2 2101// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 2102// CHECK: ret <8 x i8> [[SEXT_I]] 2103uint8x8_t test_vcge_u8(uint8x8_t v1, uint8x8_t v2) { 2104 return vcge_u8(v1, v2); 2105} 2106 2107// CHECK-LABEL: define <4 x i16> @test_vcge_u16(<4 x i16> %v1, <4 x i16> %v2) #0 { 2108// CHECK: [[CMP_I:%.*]] = icmp uge <4 x i16> %v1, %v2 2109// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 2110// CHECK: ret <4 x i16> [[SEXT_I]] 2111uint16x4_t test_vcge_u16(uint16x4_t v1, uint16x4_t v2) { 2112 return vcge_u16(v1, v2); 2113} 2114 2115// CHECK-LABEL: define <2 x i32> @test_vcge_u32(<2 x i32> %v1, <2 x i32> %v2) #0 { 2116// CHECK: [[CMP_I:%.*]] = icmp uge <2 x i32> %v1, %v2 2117// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 2118// CHECK: ret <2 x i32> [[SEXT_I]] 2119uint32x2_t test_vcge_u32(uint32x2_t v1, uint32x2_t v2) { 2120 return vcge_u32(v1, v2); 2121} 2122 2123// CHECK-LABEL: define <16 x i8> @test_vcgeq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 { 2124// CHECK: [[CMP_I:%.*]] = icmp sge <16 x i8> %v1, %v2 2125// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 2126// CHECK: ret <16 x i8> [[SEXT_I]] 2127uint8x16_t test_vcgeq_s8(int8x16_t v1, int8x16_t v2) { 2128 return vcgeq_s8(v1, v2); 2129} 2130 2131// CHECK-LABEL: define <8 x i16> @test_vcgeq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 { 2132// CHECK: [[CMP_I:%.*]] = icmp sge <8 x i16> %v1, %v2 2133// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 2134// CHECK: ret <8 x i16> [[SEXT_I]] 2135uint16x8_t test_vcgeq_s16(int16x8_t v1, int16x8_t v2) { 2136 return vcgeq_s16(v1, v2); 2137} 2138 2139// CHECK-LABEL: define <4 x i32> @test_vcgeq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 { 2140// CHECK: [[CMP_I:%.*]] = icmp sge <4 x i32> %v1, %v2 2141// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 2142// CHECK: ret <4 x i32> [[SEXT_I]] 2143uint32x4_t test_vcgeq_s32(int32x4_t v1, int32x4_t v2) { 2144 return vcgeq_s32(v1, v2); 2145} 2146 2147// CHECK-LABEL: define <4 x i32> @test_vcgeq_f32(<4 x float> %v1, <4 x float> %v2) #0 { 2148// CHECK: [[CMP_I:%.*]] = fcmp oge <4 x float> %v1, %v2 2149// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 2150// CHECK: ret <4 x i32> [[SEXT_I]] 2151uint32x4_t test_vcgeq_f32(float32x4_t v1, float32x4_t v2) { 2152 return vcgeq_f32(v1, v2); 2153} 2154 2155// CHECK-LABEL: define <16 x i8> @test_vcgeq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 { 2156// CHECK: [[CMP_I:%.*]] = icmp uge <16 x i8> %v1, %v2 2157// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 2158// CHECK: ret <16 x i8> [[SEXT_I]] 2159uint8x16_t test_vcgeq_u8(uint8x16_t v1, uint8x16_t v2) { 2160 return vcgeq_u8(v1, v2); 2161} 2162 2163// CHECK-LABEL: define <8 x i16> @test_vcgeq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 { 2164// CHECK: [[CMP_I:%.*]] = icmp uge <8 x i16> %v1, %v2 2165// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 2166// CHECK: ret <8 x i16> [[SEXT_I]] 2167uint16x8_t test_vcgeq_u16(uint16x8_t v1, uint16x8_t v2) { 2168 return vcgeq_u16(v1, v2); 2169} 2170 2171// CHECK-LABEL: define <4 x i32> @test_vcgeq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 { 2172// CHECK: [[CMP_I:%.*]] = icmp uge <4 x i32> %v1, %v2 2173// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 2174// CHECK: ret <4 x i32> [[SEXT_I]] 2175uint32x4_t test_vcgeq_u32(uint32x4_t v1, uint32x4_t v2) { 2176 return vcgeq_u32(v1, v2); 2177} 2178 2179// CHECK-LABEL: define <2 x i64> @test_vcgeq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 { 2180// CHECK: [[CMP_I:%.*]] = icmp sge <2 x i64> %v1, %v2 2181// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 2182// CHECK: ret <2 x i64> [[SEXT_I]] 2183uint64x2_t test_vcgeq_s64(int64x2_t v1, int64x2_t v2) { 2184 return vcgeq_s64(v1, v2); 2185} 2186 2187// CHECK-LABEL: define <2 x i64> @test_vcgeq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 { 2188// CHECK: [[CMP_I:%.*]] = icmp uge <2 x i64> %v1, %v2 2189// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 2190// CHECK: ret <2 x i64> [[SEXT_I]] 2191uint64x2_t test_vcgeq_u64(uint64x2_t v1, uint64x2_t v2) { 2192 return vcgeq_u64(v1, v2); 2193} 2194 2195// CHECK-LABEL: define <2 x i64> @test_vcgeq_f64(<2 x double> %v1, <2 x double> %v2) #0 { 2196// CHECK: [[CMP_I:%.*]] = fcmp oge <2 x double> %v1, %v2 2197// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 2198// CHECK: ret <2 x i64> [[SEXT_I]] 2199uint64x2_t test_vcgeq_f64(float64x2_t v1, float64x2_t v2) { 2200 return vcgeq_f64(v1, v2); 2201} 2202 2203// Notes about vcle: 2204// LE condition predicate implemented as GE, so check reversed operands. 2205// Using registers other than v0, v1 are possible, but would be odd. 2206// CHECK-LABEL: define <8 x i8> @test_vcle_s8(<8 x i8> %v1, <8 x i8> %v2) #0 { 2207// CHECK: [[CMP_I:%.*]] = icmp sle <8 x i8> %v1, %v2 2208// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 2209// CHECK: ret <8 x i8> [[SEXT_I]] 2210uint8x8_t test_vcle_s8(int8x8_t v1, int8x8_t v2) { 2211 return vcle_s8(v1, v2); 2212} 2213 2214// CHECK-LABEL: define <4 x i16> @test_vcle_s16(<4 x i16> %v1, <4 x i16> %v2) #0 { 2215// CHECK: [[CMP_I:%.*]] = icmp sle <4 x i16> %v1, %v2 2216// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 2217// CHECK: ret <4 x i16> [[SEXT_I]] 2218uint16x4_t test_vcle_s16(int16x4_t v1, int16x4_t v2) { 2219 return vcle_s16(v1, v2); 2220} 2221 2222// CHECK-LABEL: define <2 x i32> @test_vcle_s32(<2 x i32> %v1, <2 x i32> %v2) #0 { 2223// CHECK: [[CMP_I:%.*]] = icmp sle <2 x i32> %v1, %v2 2224// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 2225// CHECK: ret <2 x i32> [[SEXT_I]] 2226uint32x2_t test_vcle_s32(int32x2_t v1, int32x2_t v2) { 2227 return vcle_s32(v1, v2); 2228} 2229 2230// CHECK-LABEL: define <1 x i64> @test_vcle_s64(<1 x i64> %a, <1 x i64> %b) #0 { 2231// CHECK: [[CMP_I:%.*]] = icmp sle <1 x i64> %a, %b 2232// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 2233// CHECK: ret <1 x i64> [[SEXT_I]] 2234uint64x1_t test_vcle_s64(int64x1_t a, int64x1_t b) { 2235 return vcle_s64(a, b); 2236} 2237 2238// CHECK-LABEL: define <1 x i64> @test_vcle_u64(<1 x i64> %a, <1 x i64> %b) #0 { 2239// CHECK: [[CMP_I:%.*]] = icmp ule <1 x i64> %a, %b 2240// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 2241// CHECK: ret <1 x i64> [[SEXT_I]] 2242uint64x1_t test_vcle_u64(uint64x1_t a, uint64x1_t b) { 2243 return vcle_u64(a, b); 2244} 2245 2246// CHECK-LABEL: define <2 x i32> @test_vcle_f32(<2 x float> %v1, <2 x float> %v2) #0 { 2247// CHECK: [[CMP_I:%.*]] = fcmp ole <2 x float> %v1, %v2 2248// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 2249// CHECK: ret <2 x i32> [[SEXT_I]] 2250uint32x2_t test_vcle_f32(float32x2_t v1, float32x2_t v2) { 2251 return vcle_f32(v1, v2); 2252} 2253 2254// CHECK-LABEL: define <1 x i64> @test_vcle_f64(<1 x double> %a, <1 x double> %b) #0 { 2255// CHECK: [[CMP_I:%.*]] = fcmp ole <1 x double> %a, %b 2256// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 2257// CHECK: ret <1 x i64> [[SEXT_I]] 2258uint64x1_t test_vcle_f64(float64x1_t a, float64x1_t b) { 2259 return vcle_f64(a, b); 2260} 2261 2262// CHECK-LABEL: define <8 x i8> @test_vcle_u8(<8 x i8> %v1, <8 x i8> %v2) #0 { 2263// CHECK: [[CMP_I:%.*]] = icmp ule <8 x i8> %v1, %v2 2264// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 2265// CHECK: ret <8 x i8> [[SEXT_I]] 2266uint8x8_t test_vcle_u8(uint8x8_t v1, uint8x8_t v2) { 2267 return vcle_u8(v1, v2); 2268} 2269 2270// CHECK-LABEL: define <4 x i16> @test_vcle_u16(<4 x i16> %v1, <4 x i16> %v2) #0 { 2271// CHECK: [[CMP_I:%.*]] = icmp ule <4 x i16> %v1, %v2 2272// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 2273// CHECK: ret <4 x i16> [[SEXT_I]] 2274uint16x4_t test_vcle_u16(uint16x4_t v1, uint16x4_t v2) { 2275 return vcle_u16(v1, v2); 2276} 2277 2278// CHECK-LABEL: define <2 x i32> @test_vcle_u32(<2 x i32> %v1, <2 x i32> %v2) #0 { 2279// CHECK: [[CMP_I:%.*]] = icmp ule <2 x i32> %v1, %v2 2280// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 2281// CHECK: ret <2 x i32> [[SEXT_I]] 2282uint32x2_t test_vcle_u32(uint32x2_t v1, uint32x2_t v2) { 2283 return vcle_u32(v1, v2); 2284} 2285 2286// CHECK-LABEL: define <16 x i8> @test_vcleq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 { 2287// CHECK: [[CMP_I:%.*]] = icmp sle <16 x i8> %v1, %v2 2288// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 2289// CHECK: ret <16 x i8> [[SEXT_I]] 2290uint8x16_t test_vcleq_s8(int8x16_t v1, int8x16_t v2) { 2291 return vcleq_s8(v1, v2); 2292} 2293 2294// CHECK-LABEL: define <8 x i16> @test_vcleq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 { 2295// CHECK: [[CMP_I:%.*]] = icmp sle <8 x i16> %v1, %v2 2296// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 2297// CHECK: ret <8 x i16> [[SEXT_I]] 2298uint16x8_t test_vcleq_s16(int16x8_t v1, int16x8_t v2) { 2299 return vcleq_s16(v1, v2); 2300} 2301 2302// CHECK-LABEL: define <4 x i32> @test_vcleq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 { 2303// CHECK: [[CMP_I:%.*]] = icmp sle <4 x i32> %v1, %v2 2304// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 2305// CHECK: ret <4 x i32> [[SEXT_I]] 2306uint32x4_t test_vcleq_s32(int32x4_t v1, int32x4_t v2) { 2307 return vcleq_s32(v1, v2); 2308} 2309 2310// CHECK-LABEL: define <4 x i32> @test_vcleq_f32(<4 x float> %v1, <4 x float> %v2) #0 { 2311// CHECK: [[CMP_I:%.*]] = fcmp ole <4 x float> %v1, %v2 2312// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 2313// CHECK: ret <4 x i32> [[SEXT_I]] 2314uint32x4_t test_vcleq_f32(float32x4_t v1, float32x4_t v2) { 2315 return vcleq_f32(v1, v2); 2316} 2317 2318// CHECK-LABEL: define <16 x i8> @test_vcleq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 { 2319// CHECK: [[CMP_I:%.*]] = icmp ule <16 x i8> %v1, %v2 2320// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 2321// CHECK: ret <16 x i8> [[SEXT_I]] 2322uint8x16_t test_vcleq_u8(uint8x16_t v1, uint8x16_t v2) { 2323 return vcleq_u8(v1, v2); 2324} 2325 2326// CHECK-LABEL: define <8 x i16> @test_vcleq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 { 2327// CHECK: [[CMP_I:%.*]] = icmp ule <8 x i16> %v1, %v2 2328// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 2329// CHECK: ret <8 x i16> [[SEXT_I]] 2330uint16x8_t test_vcleq_u16(uint16x8_t v1, uint16x8_t v2) { 2331 return vcleq_u16(v1, v2); 2332} 2333 2334// CHECK-LABEL: define <4 x i32> @test_vcleq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 { 2335// CHECK: [[CMP_I:%.*]] = icmp ule <4 x i32> %v1, %v2 2336// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 2337// CHECK: ret <4 x i32> [[SEXT_I]] 2338uint32x4_t test_vcleq_u32(uint32x4_t v1, uint32x4_t v2) { 2339 return vcleq_u32(v1, v2); 2340} 2341 2342// CHECK-LABEL: define <2 x i64> @test_vcleq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 { 2343// CHECK: [[CMP_I:%.*]] = icmp sle <2 x i64> %v1, %v2 2344// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 2345// CHECK: ret <2 x i64> [[SEXT_I]] 2346uint64x2_t test_vcleq_s64(int64x2_t v1, int64x2_t v2) { 2347 return vcleq_s64(v1, v2); 2348} 2349 2350// CHECK-LABEL: define <2 x i64> @test_vcleq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 { 2351// CHECK: [[CMP_I:%.*]] = icmp ule <2 x i64> %v1, %v2 2352// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 2353// CHECK: ret <2 x i64> [[SEXT_I]] 2354uint64x2_t test_vcleq_u64(uint64x2_t v1, uint64x2_t v2) { 2355 return vcleq_u64(v1, v2); 2356} 2357 2358// CHECK-LABEL: define <2 x i64> @test_vcleq_f64(<2 x double> %v1, <2 x double> %v2) #0 { 2359// CHECK: [[CMP_I:%.*]] = fcmp ole <2 x double> %v1, %v2 2360// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 2361// CHECK: ret <2 x i64> [[SEXT_I]] 2362uint64x2_t test_vcleq_f64(float64x2_t v1, float64x2_t v2) { 2363 return vcleq_f64(v1, v2); 2364} 2365 2366 2367// CHECK-LABEL: define <8 x i8> @test_vcgt_s8(<8 x i8> %v1, <8 x i8> %v2) #0 { 2368// CHECK: [[CMP_I:%.*]] = icmp sgt <8 x i8> %v1, %v2 2369// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 2370// CHECK: ret <8 x i8> [[SEXT_I]] 2371uint8x8_t test_vcgt_s8(int8x8_t v1, int8x8_t v2) { 2372 return vcgt_s8(v1, v2); 2373} 2374 2375// CHECK-LABEL: define <4 x i16> @test_vcgt_s16(<4 x i16> %v1, <4 x i16> %v2) #0 { 2376// CHECK: [[CMP_I:%.*]] = icmp sgt <4 x i16> %v1, %v2 2377// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 2378// CHECK: ret <4 x i16> [[SEXT_I]] 2379uint16x4_t test_vcgt_s16(int16x4_t v1, int16x4_t v2) { 2380 return vcgt_s16(v1, v2); 2381} 2382 2383// CHECK-LABEL: define <2 x i32> @test_vcgt_s32(<2 x i32> %v1, <2 x i32> %v2) #0 { 2384// CHECK: [[CMP_I:%.*]] = icmp sgt <2 x i32> %v1, %v2 2385// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 2386// CHECK: ret <2 x i32> [[SEXT_I]] 2387uint32x2_t test_vcgt_s32(int32x2_t v1, int32x2_t v2) { 2388 return vcgt_s32(v1, v2); 2389} 2390 2391// CHECK-LABEL: define <1 x i64> @test_vcgt_s64(<1 x i64> %a, <1 x i64> %b) #0 { 2392// CHECK: [[CMP_I:%.*]] = icmp sgt <1 x i64> %a, %b 2393// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 2394// CHECK: ret <1 x i64> [[SEXT_I]] 2395uint64x1_t test_vcgt_s64(int64x1_t a, int64x1_t b) { 2396 return vcgt_s64(a, b); 2397} 2398 2399// CHECK-LABEL: define <1 x i64> @test_vcgt_u64(<1 x i64> %a, <1 x i64> %b) #0 { 2400// CHECK: [[CMP_I:%.*]] = icmp ugt <1 x i64> %a, %b 2401// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 2402// CHECK: ret <1 x i64> [[SEXT_I]] 2403uint64x1_t test_vcgt_u64(uint64x1_t a, uint64x1_t b) { 2404 return vcgt_u64(a, b); 2405} 2406 2407// CHECK-LABEL: define <2 x i32> @test_vcgt_f32(<2 x float> %v1, <2 x float> %v2) #0 { 2408// CHECK: [[CMP_I:%.*]] = fcmp ogt <2 x float> %v1, %v2 2409// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 2410// CHECK: ret <2 x i32> [[SEXT_I]] 2411uint32x2_t test_vcgt_f32(float32x2_t v1, float32x2_t v2) { 2412 return vcgt_f32(v1, v2); 2413} 2414 2415// CHECK-LABEL: define <1 x i64> @test_vcgt_f64(<1 x double> %a, <1 x double> %b) #0 { 2416// CHECK: [[CMP_I:%.*]] = fcmp ogt <1 x double> %a, %b 2417// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 2418// CHECK: ret <1 x i64> [[SEXT_I]] 2419uint64x1_t test_vcgt_f64(float64x1_t a, float64x1_t b) { 2420 return vcgt_f64(a, b); 2421} 2422 2423// CHECK-LABEL: define <8 x i8> @test_vcgt_u8(<8 x i8> %v1, <8 x i8> %v2) #0 { 2424// CHECK: [[CMP_I:%.*]] = icmp ugt <8 x i8> %v1, %v2 2425// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 2426// CHECK: ret <8 x i8> [[SEXT_I]] 2427uint8x8_t test_vcgt_u8(uint8x8_t v1, uint8x8_t v2) { 2428 return vcgt_u8(v1, v2); 2429} 2430 2431// CHECK-LABEL: define <4 x i16> @test_vcgt_u16(<4 x i16> %v1, <4 x i16> %v2) #0 { 2432// CHECK: [[CMP_I:%.*]] = icmp ugt <4 x i16> %v1, %v2 2433// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 2434// CHECK: ret <4 x i16> [[SEXT_I]] 2435uint16x4_t test_vcgt_u16(uint16x4_t v1, uint16x4_t v2) { 2436 return vcgt_u16(v1, v2); 2437} 2438 2439// CHECK-LABEL: define <2 x i32> @test_vcgt_u32(<2 x i32> %v1, <2 x i32> %v2) #0 { 2440// CHECK: [[CMP_I:%.*]] = icmp ugt <2 x i32> %v1, %v2 2441// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 2442// CHECK: ret <2 x i32> [[SEXT_I]] 2443uint32x2_t test_vcgt_u32(uint32x2_t v1, uint32x2_t v2) { 2444 return vcgt_u32(v1, v2); 2445} 2446 2447// CHECK-LABEL: define <16 x i8> @test_vcgtq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 { 2448// CHECK: [[CMP_I:%.*]] = icmp sgt <16 x i8> %v1, %v2 2449// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 2450// CHECK: ret <16 x i8> [[SEXT_I]] 2451uint8x16_t test_vcgtq_s8(int8x16_t v1, int8x16_t v2) { 2452 return vcgtq_s8(v1, v2); 2453} 2454 2455// CHECK-LABEL: define <8 x i16> @test_vcgtq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 { 2456// CHECK: [[CMP_I:%.*]] = icmp sgt <8 x i16> %v1, %v2 2457// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 2458// CHECK: ret <8 x i16> [[SEXT_I]] 2459uint16x8_t test_vcgtq_s16(int16x8_t v1, int16x8_t v2) { 2460 return vcgtq_s16(v1, v2); 2461} 2462 2463// CHECK-LABEL: define <4 x i32> @test_vcgtq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 { 2464// CHECK: [[CMP_I:%.*]] = icmp sgt <4 x i32> %v1, %v2 2465// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 2466// CHECK: ret <4 x i32> [[SEXT_I]] 2467uint32x4_t test_vcgtq_s32(int32x4_t v1, int32x4_t v2) { 2468 return vcgtq_s32(v1, v2); 2469} 2470 2471// CHECK-LABEL: define <4 x i32> @test_vcgtq_f32(<4 x float> %v1, <4 x float> %v2) #0 { 2472// CHECK: [[CMP_I:%.*]] = fcmp ogt <4 x float> %v1, %v2 2473// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 2474// CHECK: ret <4 x i32> [[SEXT_I]] 2475uint32x4_t test_vcgtq_f32(float32x4_t v1, float32x4_t v2) { 2476 return vcgtq_f32(v1, v2); 2477} 2478 2479// CHECK-LABEL: define <16 x i8> @test_vcgtq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 { 2480// CHECK: [[CMP_I:%.*]] = icmp ugt <16 x i8> %v1, %v2 2481// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 2482// CHECK: ret <16 x i8> [[SEXT_I]] 2483uint8x16_t test_vcgtq_u8(uint8x16_t v1, uint8x16_t v2) { 2484 return vcgtq_u8(v1, v2); 2485} 2486 2487// CHECK-LABEL: define <8 x i16> @test_vcgtq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 { 2488// CHECK: [[CMP_I:%.*]] = icmp ugt <8 x i16> %v1, %v2 2489// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 2490// CHECK: ret <8 x i16> [[SEXT_I]] 2491uint16x8_t test_vcgtq_u16(uint16x8_t v1, uint16x8_t v2) { 2492 return vcgtq_u16(v1, v2); 2493} 2494 2495// CHECK-LABEL: define <4 x i32> @test_vcgtq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 { 2496// CHECK: [[CMP_I:%.*]] = icmp ugt <4 x i32> %v1, %v2 2497// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 2498// CHECK: ret <4 x i32> [[SEXT_I]] 2499uint32x4_t test_vcgtq_u32(uint32x4_t v1, uint32x4_t v2) { 2500 return vcgtq_u32(v1, v2); 2501} 2502 2503// CHECK-LABEL: define <2 x i64> @test_vcgtq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 { 2504// CHECK: [[CMP_I:%.*]] = icmp sgt <2 x i64> %v1, %v2 2505// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 2506// CHECK: ret <2 x i64> [[SEXT_I]] 2507uint64x2_t test_vcgtq_s64(int64x2_t v1, int64x2_t v2) { 2508 return vcgtq_s64(v1, v2); 2509} 2510 2511// CHECK-LABEL: define <2 x i64> @test_vcgtq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 { 2512// CHECK: [[CMP_I:%.*]] = icmp ugt <2 x i64> %v1, %v2 2513// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 2514// CHECK: ret <2 x i64> [[SEXT_I]] 2515uint64x2_t test_vcgtq_u64(uint64x2_t v1, uint64x2_t v2) { 2516 return vcgtq_u64(v1, v2); 2517} 2518 2519// CHECK-LABEL: define <2 x i64> @test_vcgtq_f64(<2 x double> %v1, <2 x double> %v2) #0 { 2520// CHECK: [[CMP_I:%.*]] = fcmp ogt <2 x double> %v1, %v2 2521// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 2522// CHECK: ret <2 x i64> [[SEXT_I]] 2523uint64x2_t test_vcgtq_f64(float64x2_t v1, float64x2_t v2) { 2524 return vcgtq_f64(v1, v2); 2525} 2526 2527 2528// Notes about vclt: 2529// LT condition predicate implemented as GT, so check reversed operands. 2530// Using registers other than v0, v1 are possible, but would be odd. 2531 2532// CHECK-LABEL: define <8 x i8> @test_vclt_s8(<8 x i8> %v1, <8 x i8> %v2) #0 { 2533// CHECK: [[CMP_I:%.*]] = icmp slt <8 x i8> %v1, %v2 2534// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 2535// CHECK: ret <8 x i8> [[SEXT_I]] 2536uint8x8_t test_vclt_s8(int8x8_t v1, int8x8_t v2) { 2537 return vclt_s8(v1, v2); 2538} 2539 2540// CHECK-LABEL: define <4 x i16> @test_vclt_s16(<4 x i16> %v1, <4 x i16> %v2) #0 { 2541// CHECK: [[CMP_I:%.*]] = icmp slt <4 x i16> %v1, %v2 2542// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 2543// CHECK: ret <4 x i16> [[SEXT_I]] 2544uint16x4_t test_vclt_s16(int16x4_t v1, int16x4_t v2) { 2545 return vclt_s16(v1, v2); 2546} 2547 2548// CHECK-LABEL: define <2 x i32> @test_vclt_s32(<2 x i32> %v1, <2 x i32> %v2) #0 { 2549// CHECK: [[CMP_I:%.*]] = icmp slt <2 x i32> %v1, %v2 2550// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 2551// CHECK: ret <2 x i32> [[SEXT_I]] 2552uint32x2_t test_vclt_s32(int32x2_t v1, int32x2_t v2) { 2553 return vclt_s32(v1, v2); 2554} 2555 2556// CHECK-LABEL: define <1 x i64> @test_vclt_s64(<1 x i64> %a, <1 x i64> %b) #0 { 2557// CHECK: [[CMP_I:%.*]] = icmp slt <1 x i64> %a, %b 2558// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 2559// CHECK: ret <1 x i64> [[SEXT_I]] 2560uint64x1_t test_vclt_s64(int64x1_t a, int64x1_t b) { 2561 return vclt_s64(a, b); 2562} 2563 2564// CHECK-LABEL: define <1 x i64> @test_vclt_u64(<1 x i64> %a, <1 x i64> %b) #0 { 2565// CHECK: [[CMP_I:%.*]] = icmp ult <1 x i64> %a, %b 2566// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 2567// CHECK: ret <1 x i64> [[SEXT_I]] 2568uint64x1_t test_vclt_u64(uint64x1_t a, uint64x1_t b) { 2569 return vclt_u64(a, b); 2570} 2571 2572// CHECK-LABEL: define <2 x i32> @test_vclt_f32(<2 x float> %v1, <2 x float> %v2) #0 { 2573// CHECK: [[CMP_I:%.*]] = fcmp olt <2 x float> %v1, %v2 2574// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 2575// CHECK: ret <2 x i32> [[SEXT_I]] 2576uint32x2_t test_vclt_f32(float32x2_t v1, float32x2_t v2) { 2577 return vclt_f32(v1, v2); 2578} 2579 2580// CHECK-LABEL: define <1 x i64> @test_vclt_f64(<1 x double> %a, <1 x double> %b) #0 { 2581// CHECK: [[CMP_I:%.*]] = fcmp olt <1 x double> %a, %b 2582// CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 2583// CHECK: ret <1 x i64> [[SEXT_I]] 2584uint64x1_t test_vclt_f64(float64x1_t a, float64x1_t b) { 2585 return vclt_f64(a, b); 2586} 2587 2588// CHECK-LABEL: define <8 x i8> @test_vclt_u8(<8 x i8> %v1, <8 x i8> %v2) #0 { 2589// CHECK: [[CMP_I:%.*]] = icmp ult <8 x i8> %v1, %v2 2590// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 2591// CHECK: ret <8 x i8> [[SEXT_I]] 2592uint8x8_t test_vclt_u8(uint8x8_t v1, uint8x8_t v2) { 2593 return vclt_u8(v1, v2); 2594} 2595 2596// CHECK-LABEL: define <4 x i16> @test_vclt_u16(<4 x i16> %v1, <4 x i16> %v2) #0 { 2597// CHECK: [[CMP_I:%.*]] = icmp ult <4 x i16> %v1, %v2 2598// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 2599// CHECK: ret <4 x i16> [[SEXT_I]] 2600uint16x4_t test_vclt_u16(uint16x4_t v1, uint16x4_t v2) { 2601 return vclt_u16(v1, v2); 2602} 2603 2604// CHECK-LABEL: define <2 x i32> @test_vclt_u32(<2 x i32> %v1, <2 x i32> %v2) #0 { 2605// CHECK: [[CMP_I:%.*]] = icmp ult <2 x i32> %v1, %v2 2606// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 2607// CHECK: ret <2 x i32> [[SEXT_I]] 2608uint32x2_t test_vclt_u32(uint32x2_t v1, uint32x2_t v2) { 2609 return vclt_u32(v1, v2); 2610} 2611 2612// CHECK-LABEL: define <16 x i8> @test_vcltq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 { 2613// CHECK: [[CMP_I:%.*]] = icmp slt <16 x i8> %v1, %v2 2614// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 2615// CHECK: ret <16 x i8> [[SEXT_I]] 2616uint8x16_t test_vcltq_s8(int8x16_t v1, int8x16_t v2) { 2617 return vcltq_s8(v1, v2); 2618} 2619 2620// CHECK-LABEL: define <8 x i16> @test_vcltq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 { 2621// CHECK: [[CMP_I:%.*]] = icmp slt <8 x i16> %v1, %v2 2622// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 2623// CHECK: ret <8 x i16> [[SEXT_I]] 2624uint16x8_t test_vcltq_s16(int16x8_t v1, int16x8_t v2) { 2625 return vcltq_s16(v1, v2); 2626} 2627 2628// CHECK-LABEL: define <4 x i32> @test_vcltq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 { 2629// CHECK: [[CMP_I:%.*]] = icmp slt <4 x i32> %v1, %v2 2630// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 2631// CHECK: ret <4 x i32> [[SEXT_I]] 2632uint32x4_t test_vcltq_s32(int32x4_t v1, int32x4_t v2) { 2633 return vcltq_s32(v1, v2); 2634} 2635 2636// CHECK-LABEL: define <4 x i32> @test_vcltq_f32(<4 x float> %v1, <4 x float> %v2) #0 { 2637// CHECK: [[CMP_I:%.*]] = fcmp olt <4 x float> %v1, %v2 2638// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 2639// CHECK: ret <4 x i32> [[SEXT_I]] 2640uint32x4_t test_vcltq_f32(float32x4_t v1, float32x4_t v2) { 2641 return vcltq_f32(v1, v2); 2642} 2643 2644// CHECK-LABEL: define <16 x i8> @test_vcltq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 { 2645// CHECK: [[CMP_I:%.*]] = icmp ult <16 x i8> %v1, %v2 2646// CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 2647// CHECK: ret <16 x i8> [[SEXT_I]] 2648uint8x16_t test_vcltq_u8(uint8x16_t v1, uint8x16_t v2) { 2649 return vcltq_u8(v1, v2); 2650} 2651 2652// CHECK-LABEL: define <8 x i16> @test_vcltq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 { 2653// CHECK: [[CMP_I:%.*]] = icmp ult <8 x i16> %v1, %v2 2654// CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 2655// CHECK: ret <8 x i16> [[SEXT_I]] 2656uint16x8_t test_vcltq_u16(uint16x8_t v1, uint16x8_t v2) { 2657 return vcltq_u16(v1, v2); 2658} 2659 2660// CHECK-LABEL: define <4 x i32> @test_vcltq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 { 2661// CHECK: [[CMP_I:%.*]] = icmp ult <4 x i32> %v1, %v2 2662// CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 2663// CHECK: ret <4 x i32> [[SEXT_I]] 2664uint32x4_t test_vcltq_u32(uint32x4_t v1, uint32x4_t v2) { 2665 return vcltq_u32(v1, v2); 2666} 2667 2668// CHECK-LABEL: define <2 x i64> @test_vcltq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 { 2669// CHECK: [[CMP_I:%.*]] = icmp slt <2 x i64> %v1, %v2 2670// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 2671// CHECK: ret <2 x i64> [[SEXT_I]] 2672uint64x2_t test_vcltq_s64(int64x2_t v1, int64x2_t v2) { 2673 return vcltq_s64(v1, v2); 2674} 2675 2676// CHECK-LABEL: define <2 x i64> @test_vcltq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 { 2677// CHECK: [[CMP_I:%.*]] = icmp ult <2 x i64> %v1, %v2 2678// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 2679// CHECK: ret <2 x i64> [[SEXT_I]] 2680uint64x2_t test_vcltq_u64(uint64x2_t v1, uint64x2_t v2) { 2681 return vcltq_u64(v1, v2); 2682} 2683 2684// CHECK-LABEL: define <2 x i64> @test_vcltq_f64(<2 x double> %v1, <2 x double> %v2) #0 { 2685// CHECK: [[CMP_I:%.*]] = fcmp olt <2 x double> %v1, %v2 2686// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 2687// CHECK: ret <2 x i64> [[SEXT_I]] 2688uint64x2_t test_vcltq_f64(float64x2_t v1, float64x2_t v2) { 2689 return vcltq_f64(v1, v2); 2690} 2691 2692 2693// CHECK-LABEL: define <8 x i8> @test_vhadd_s8(<8 x i8> %v1, <8 x i8> %v2) #0 { 2694// CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4 2695// CHECK: ret <8 x i8> [[VHADD_V_I]] 2696int8x8_t test_vhadd_s8(int8x8_t v1, int8x8_t v2) { 2697 return vhadd_s8(v1, v2); 2698} 2699 2700// CHECK-LABEL: define <4 x i16> @test_vhadd_s16(<4 x i16> %v1, <4 x i16> %v2) #0 { 2701// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> 2702// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 2703// CHECK: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2704// CHECK: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2705// CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16> [[VHADD_V_I]], <4 x i16> [[VHADD_V1_I]]) #4 2706// CHECK: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8> 2707// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <4 x i16> 2708// CHECK: ret <4 x i16> [[TMP2]] 2709int16x4_t test_vhadd_s16(int16x4_t v1, int16x4_t v2) { 2710 return vhadd_s16(v1, v2); 2711} 2712 2713// CHECK-LABEL: define <2 x i32> @test_vhadd_s32(<2 x i32> %v1, <2 x i32> %v2) #0 { 2714// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> 2715// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> 2716// CHECK: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2717// CHECK: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2718// CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32> [[VHADD_V_I]], <2 x i32> [[VHADD_V1_I]]) #4 2719// CHECK: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8> 2720// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <2 x i32> 2721// CHECK: ret <2 x i32> [[TMP2]] 2722int32x2_t test_vhadd_s32(int32x2_t v1, int32x2_t v2) { 2723 return vhadd_s32(v1, v2); 2724} 2725 2726// CHECK-LABEL: define <8 x i8> @test_vhadd_u8(<8 x i8> %v1, <8 x i8> %v2) #0 { 2727// CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4 2728// CHECK: ret <8 x i8> [[VHADD_V_I]] 2729uint8x8_t test_vhadd_u8(uint8x8_t v1, uint8x8_t v2) { 2730 return vhadd_u8(v1, v2); 2731} 2732 2733// CHECK-LABEL: define <4 x i16> @test_vhadd_u16(<4 x i16> %v1, <4 x i16> %v2) #0 { 2734// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> 2735// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 2736// CHECK: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2737// CHECK: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2738// CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16> [[VHADD_V_I]], <4 x i16> [[VHADD_V1_I]]) #4 2739// CHECK: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8> 2740// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <4 x i16> 2741// CHECK: ret <4 x i16> [[TMP2]] 2742uint16x4_t test_vhadd_u16(uint16x4_t v1, uint16x4_t v2) { 2743 return vhadd_u16(v1, v2); 2744} 2745 2746// CHECK-LABEL: define <2 x i32> @test_vhadd_u32(<2 x i32> %v1, <2 x i32> %v2) #0 { 2747// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> 2748// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> 2749// CHECK: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2750// CHECK: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2751// CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32> [[VHADD_V_I]], <2 x i32> [[VHADD_V1_I]]) #4 2752// CHECK: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8> 2753// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <2 x i32> 2754// CHECK: ret <2 x i32> [[TMP2]] 2755uint32x2_t test_vhadd_u32(uint32x2_t v1, uint32x2_t v2) { 2756 return vhadd_u32(v1, v2); 2757} 2758 2759// CHECK-LABEL: define <16 x i8> @test_vhaddq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 { 2760// CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4 2761// CHECK: ret <16 x i8> [[VHADDQ_V_I]] 2762int8x16_t test_vhaddq_s8(int8x16_t v1, int8x16_t v2) { 2763 return vhaddq_s8(v1, v2); 2764} 2765 2766// CHECK-LABEL: define <8 x i16> @test_vhaddq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 { 2767// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> 2768// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 2769// CHECK: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 2770// CHECK: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 2771// CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> [[VHADDQ_V_I]], <8 x i16> [[VHADDQ_V1_I]]) #4 2772// CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8> 2773// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <8 x i16> 2774// CHECK: ret <8 x i16> [[TMP2]] 2775int16x8_t test_vhaddq_s16(int16x8_t v1, int16x8_t v2) { 2776 return vhaddq_s16(v1, v2); 2777} 2778 2779// CHECK-LABEL: define <4 x i32> @test_vhaddq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 { 2780// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> 2781// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> 2782// CHECK: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 2783// CHECK: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 2784// CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32> [[VHADDQ_V_I]], <4 x i32> [[VHADDQ_V1_I]]) #4 2785// CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8> 2786// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <4 x i32> 2787// CHECK: ret <4 x i32> [[TMP2]] 2788int32x4_t test_vhaddq_s32(int32x4_t v1, int32x4_t v2) { 2789 return vhaddq_s32(v1, v2); 2790} 2791 2792// CHECK-LABEL: define <16 x i8> @test_vhaddq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 { 2793// CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4 2794// CHECK: ret <16 x i8> [[VHADDQ_V_I]] 2795uint8x16_t test_vhaddq_u8(uint8x16_t v1, uint8x16_t v2) { 2796 return vhaddq_u8(v1, v2); 2797} 2798 2799// CHECK-LABEL: define <8 x i16> @test_vhaddq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 { 2800// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> 2801// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 2802// CHECK: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 2803// CHECK: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 2804// CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> [[VHADDQ_V_I]], <8 x i16> [[VHADDQ_V1_I]]) #4 2805// CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8> 2806// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <8 x i16> 2807// CHECK: ret <8 x i16> [[TMP2]] 2808uint16x8_t test_vhaddq_u16(uint16x8_t v1, uint16x8_t v2) { 2809 return vhaddq_u16(v1, v2); 2810} 2811 2812// CHECK-LABEL: define <4 x i32> @test_vhaddq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 { 2813// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> 2814// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> 2815// CHECK: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 2816// CHECK: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 2817// CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32> [[VHADDQ_V_I]], <4 x i32> [[VHADDQ_V1_I]]) #4 2818// CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8> 2819// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <4 x i32> 2820// CHECK: ret <4 x i32> [[TMP2]] 2821uint32x4_t test_vhaddq_u32(uint32x4_t v1, uint32x4_t v2) { 2822 return vhaddq_u32(v1, v2); 2823} 2824 2825 2826// CHECK-LABEL: define <8 x i8> @test_vhsub_s8(<8 x i8> %v1, <8 x i8> %v2) #0 { 2827// CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shsub.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4 2828// CHECK: ret <8 x i8> [[VHSUB_V_I]] 2829int8x8_t test_vhsub_s8(int8x8_t v1, int8x8_t v2) { 2830 return vhsub_s8(v1, v2); 2831} 2832 2833// CHECK-LABEL: define <4 x i16> @test_vhsub_s16(<4 x i16> %v1, <4 x i16> %v2) #0 { 2834// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> 2835// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 2836// CHECK: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2837// CHECK: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2838// CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shsub.v4i16(<4 x i16> [[VHSUB_V_I]], <4 x i16> [[VHSUB_V1_I]]) #4 2839// CHECK: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8> 2840// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <4 x i16> 2841// CHECK: ret <4 x i16> [[TMP2]] 2842int16x4_t test_vhsub_s16(int16x4_t v1, int16x4_t v2) { 2843 return vhsub_s16(v1, v2); 2844} 2845 2846// CHECK-LABEL: define <2 x i32> @test_vhsub_s32(<2 x i32> %v1, <2 x i32> %v2) #0 { 2847// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> 2848// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> 2849// CHECK: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2850// CHECK: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2851// CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shsub.v2i32(<2 x i32> [[VHSUB_V_I]], <2 x i32> [[VHSUB_V1_I]]) #4 2852// CHECK: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8> 2853// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <2 x i32> 2854// CHECK: ret <2 x i32> [[TMP2]] 2855int32x2_t test_vhsub_s32(int32x2_t v1, int32x2_t v2) { 2856 return vhsub_s32(v1, v2); 2857} 2858 2859// CHECK-LABEL: define <8 x i8> @test_vhsub_u8(<8 x i8> %v1, <8 x i8> %v2) #0 { 2860// CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhsub.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4 2861// CHECK: ret <8 x i8> [[VHSUB_V_I]] 2862uint8x8_t test_vhsub_u8(uint8x8_t v1, uint8x8_t v2) { 2863 return vhsub_u8(v1, v2); 2864} 2865 2866// CHECK-LABEL: define <4 x i16> @test_vhsub_u16(<4 x i16> %v1, <4 x i16> %v2) #0 { 2867// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> 2868// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 2869// CHECK: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2870// CHECK: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2871// CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhsub.v4i16(<4 x i16> [[VHSUB_V_I]], <4 x i16> [[VHSUB_V1_I]]) #4 2872// CHECK: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8> 2873// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <4 x i16> 2874// CHECK: ret <4 x i16> [[TMP2]] 2875uint16x4_t test_vhsub_u16(uint16x4_t v1, uint16x4_t v2) { 2876 return vhsub_u16(v1, v2); 2877} 2878 2879// CHECK-LABEL: define <2 x i32> @test_vhsub_u32(<2 x i32> %v1, <2 x i32> %v2) #0 { 2880// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> 2881// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> 2882// CHECK: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2883// CHECK: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2884// CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhsub.v2i32(<2 x i32> [[VHSUB_V_I]], <2 x i32> [[VHSUB_V1_I]]) #4 2885// CHECK: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8> 2886// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <2 x i32> 2887// CHECK: ret <2 x i32> [[TMP2]] 2888uint32x2_t test_vhsub_u32(uint32x2_t v1, uint32x2_t v2) { 2889 return vhsub_u32(v1, v2); 2890} 2891 2892// CHECK-LABEL: define <16 x i8> @test_vhsubq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 { 2893// CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shsub.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4 2894// CHECK: ret <16 x i8> [[VHSUBQ_V_I]] 2895int8x16_t test_vhsubq_s8(int8x16_t v1, int8x16_t v2) { 2896 return vhsubq_s8(v1, v2); 2897} 2898 2899// CHECK-LABEL: define <8 x i16> @test_vhsubq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 { 2900// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> 2901// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 2902// CHECK: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 2903// CHECK: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 2904// CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shsub.v8i16(<8 x i16> [[VHSUBQ_V_I]], <8 x i16> [[VHSUBQ_V1_I]]) #4 2905// CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8> 2906// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <8 x i16> 2907// CHECK: ret <8 x i16> [[TMP2]] 2908int16x8_t test_vhsubq_s16(int16x8_t v1, int16x8_t v2) { 2909 return vhsubq_s16(v1, v2); 2910} 2911 2912// CHECK-LABEL: define <4 x i32> @test_vhsubq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 { 2913// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> 2914// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> 2915// CHECK: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 2916// CHECK: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 2917// CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shsub.v4i32(<4 x i32> [[VHSUBQ_V_I]], <4 x i32> [[VHSUBQ_V1_I]]) #4 2918// CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8> 2919// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <4 x i32> 2920// CHECK: ret <4 x i32> [[TMP2]] 2921int32x4_t test_vhsubq_s32(int32x4_t v1, int32x4_t v2) { 2922 return vhsubq_s32(v1, v2); 2923} 2924 2925// CHECK-LABEL: define <16 x i8> @test_vhsubq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 { 2926// CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhsub.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4 2927// CHECK: ret <16 x i8> [[VHSUBQ_V_I]] 2928uint8x16_t test_vhsubq_u8(uint8x16_t v1, uint8x16_t v2) { 2929 return vhsubq_u8(v1, v2); 2930} 2931 2932// CHECK-LABEL: define <8 x i16> @test_vhsubq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 { 2933// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> 2934// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 2935// CHECK: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 2936// CHECK: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 2937// CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhsub.v8i16(<8 x i16> [[VHSUBQ_V_I]], <8 x i16> [[VHSUBQ_V1_I]]) #4 2938// CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8> 2939// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <8 x i16> 2940// CHECK: ret <8 x i16> [[TMP2]] 2941uint16x8_t test_vhsubq_u16(uint16x8_t v1, uint16x8_t v2) { 2942 return vhsubq_u16(v1, v2); 2943} 2944 2945// CHECK-LABEL: define <4 x i32> @test_vhsubq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 { 2946// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> 2947// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> 2948// CHECK: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 2949// CHECK: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 2950// CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhsub.v4i32(<4 x i32> [[VHSUBQ_V_I]], <4 x i32> [[VHSUBQ_V1_I]]) #4 2951// CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8> 2952// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <4 x i32> 2953// CHECK: ret <4 x i32> [[TMP2]] 2954uint32x4_t test_vhsubq_u32(uint32x4_t v1, uint32x4_t v2) { 2955 return vhsubq_u32(v1, v2); 2956} 2957 2958 2959// CHECK-LABEL: define <8 x i8> @test_vrhadd_s8(<8 x i8> %v1, <8 x i8> %v2) #0 { 2960// CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4 2961// CHECK: ret <8 x i8> [[VRHADD_V_I]] 2962int8x8_t test_vrhadd_s8(int8x8_t v1, int8x8_t v2) { 2963 return vrhadd_s8(v1, v2); 2964} 2965 2966// CHECK-LABEL: define <4 x i16> @test_vrhadd_s16(<4 x i16> %v1, <4 x i16> %v2) #0 { 2967// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> 2968// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 2969// CHECK: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2970// CHECK: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2971// CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16> [[VRHADD_V_I]], <4 x i16> [[VRHADD_V1_I]]) #4 2972// CHECK: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8> 2973// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <4 x i16> 2974// CHECK: ret <4 x i16> [[TMP2]] 2975int16x4_t test_vrhadd_s16(int16x4_t v1, int16x4_t v2) { 2976 return vrhadd_s16(v1, v2); 2977} 2978 2979// CHECK-LABEL: define <2 x i32> @test_vrhadd_s32(<2 x i32> %v1, <2 x i32> %v2) #0 { 2980// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> 2981// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> 2982// CHECK: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2983// CHECK: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2984// CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32> [[VRHADD_V_I]], <2 x i32> [[VRHADD_V1_I]]) #4 2985// CHECK: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8> 2986// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <2 x i32> 2987// CHECK: ret <2 x i32> [[TMP2]] 2988int32x2_t test_vrhadd_s32(int32x2_t v1, int32x2_t v2) { 2989 return vrhadd_s32(v1, v2); 2990} 2991 2992// CHECK-LABEL: define <8 x i8> @test_vrhadd_u8(<8 x i8> %v1, <8 x i8> %v2) #0 { 2993// CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4 2994// CHECK: ret <8 x i8> [[VRHADD_V_I]] 2995uint8x8_t test_vrhadd_u8(uint8x8_t v1, uint8x8_t v2) { 2996 return vrhadd_u8(v1, v2); 2997} 2998 2999// CHECK-LABEL: define <4 x i16> @test_vrhadd_u16(<4 x i16> %v1, <4 x i16> %v2) #0 { 3000// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> 3001// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 3002// CHECK: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3003// CHECK: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3004// CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16> [[VRHADD_V_I]], <4 x i16> [[VRHADD_V1_I]]) #4 3005// CHECK: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8> 3006// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <4 x i16> 3007// CHECK: ret <4 x i16> [[TMP2]] 3008uint16x4_t test_vrhadd_u16(uint16x4_t v1, uint16x4_t v2) { 3009 return vrhadd_u16(v1, v2); 3010} 3011 3012// CHECK-LABEL: define <2 x i32> @test_vrhadd_u32(<2 x i32> %v1, <2 x i32> %v2) #0 { 3013// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> 3014// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> 3015// CHECK: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3016// CHECK: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3017// CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32> [[VRHADD_V_I]], <2 x i32> [[VRHADD_V1_I]]) #4 3018// CHECK: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8> 3019// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <2 x i32> 3020// CHECK: ret <2 x i32> [[TMP2]] 3021uint32x2_t test_vrhadd_u32(uint32x2_t v1, uint32x2_t v2) { 3022 return vrhadd_u32(v1, v2); 3023} 3024 3025// CHECK-LABEL: define <16 x i8> @test_vrhaddq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 { 3026// CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4 3027// CHECK: ret <16 x i8> [[VRHADDQ_V_I]] 3028int8x16_t test_vrhaddq_s8(int8x16_t v1, int8x16_t v2) { 3029 return vrhaddq_s8(v1, v2); 3030} 3031 3032// CHECK-LABEL: define <8 x i16> @test_vrhaddq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 { 3033// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> 3034// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 3035// CHECK: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3036// CHECK: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 3037// CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> [[VRHADDQ_V_I]], <8 x i16> [[VRHADDQ_V1_I]]) #4 3038// CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8> 3039// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <8 x i16> 3040// CHECK: ret <8 x i16> [[TMP2]] 3041int16x8_t test_vrhaddq_s16(int16x8_t v1, int16x8_t v2) { 3042 return vrhaddq_s16(v1, v2); 3043} 3044 3045// CHECK-LABEL: define <4 x i32> @test_vrhaddq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 { 3046// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> 3047// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> 3048// CHECK: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3049// CHECK: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 3050// CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32> [[VRHADDQ_V_I]], <4 x i32> [[VRHADDQ_V1_I]]) #4 3051// CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8> 3052// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <4 x i32> 3053// CHECK: ret <4 x i32> [[TMP2]] 3054int32x4_t test_vrhaddq_s32(int32x4_t v1, int32x4_t v2) { 3055 return vrhaddq_s32(v1, v2); 3056} 3057 3058// CHECK-LABEL: define <16 x i8> @test_vrhaddq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 { 3059// CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4 3060// CHECK: ret <16 x i8> [[VRHADDQ_V_I]] 3061uint8x16_t test_vrhaddq_u8(uint8x16_t v1, uint8x16_t v2) { 3062 return vrhaddq_u8(v1, v2); 3063} 3064 3065// CHECK-LABEL: define <8 x i16> @test_vrhaddq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 { 3066// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> 3067// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 3068// CHECK: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3069// CHECK: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 3070// CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> [[VRHADDQ_V_I]], <8 x i16> [[VRHADDQ_V1_I]]) #4 3071// CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8> 3072// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <8 x i16> 3073// CHECK: ret <8 x i16> [[TMP2]] 3074uint16x8_t test_vrhaddq_u16(uint16x8_t v1, uint16x8_t v2) { 3075 return vrhaddq_u16(v1, v2); 3076} 3077 3078// CHECK-LABEL: define <4 x i32> @test_vrhaddq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 { 3079// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> 3080// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> 3081// CHECK: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3082// CHECK: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 3083// CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32> [[VRHADDQ_V_I]], <4 x i32> [[VRHADDQ_V1_I]]) #4 3084// CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8> 3085// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <4 x i32> 3086// CHECK: ret <4 x i32> [[TMP2]] 3087uint32x4_t test_vrhaddq_u32(uint32x4_t v1, uint32x4_t v2) { 3088 return vrhaddq_u32(v1, v2); 3089} 3090// CHECK-LABEL: define <8 x i8> @test_vqadd_s8(<8 x i8> %a, <8 x i8> %b) #0 { 3091// CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4 3092// CHECK: ret <8 x i8> [[VQADD_V_I]] 3093int8x8_t test_vqadd_s8(int8x8_t a, int8x8_t b) { 3094 return vqadd_s8(a, b); 3095} 3096 3097// CHECK-LABEL: define <4 x i16> @test_vqadd_s16(<4 x i16> %a, <4 x i16> %b) #0 { 3098// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3099// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3100// CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3101// CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3102// CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[VQADD_V_I]], <4 x i16> [[VQADD_V1_I]]) #4 3103// CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8> 3104// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <4 x i16> 3105// CHECK: ret <4 x i16> [[TMP2]] 3106int16x4_t test_vqadd_s16(int16x4_t a, int16x4_t b) { 3107 return vqadd_s16(a, b); 3108} 3109 3110// CHECK-LABEL: define <2 x i32> @test_vqadd_s32(<2 x i32> %a, <2 x i32> %b) #0 { 3111// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3112// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 3113// CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3114// CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3115// CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> [[VQADD_V_I]], <2 x i32> [[VQADD_V1_I]]) #4 3116// CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8> 3117// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <2 x i32> 3118// CHECK: ret <2 x i32> [[TMP2]] 3119int32x2_t test_vqadd_s32(int32x2_t a, int32x2_t b) { 3120 return vqadd_s32(a, b); 3121} 3122 3123// CHECK-LABEL: define <1 x i64> @test_vqadd_s64(<1 x i64> %a, <1 x i64> %b) #0 { 3124// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 3125// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 3126// CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 3127// CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 3128// CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqadd.v1i64(<1 x i64> [[VQADD_V_I]], <1 x i64> [[VQADD_V1_I]]) #4 3129// CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8> 3130// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <1 x i64> 3131// CHECK: ret <1 x i64> [[TMP2]] 3132int64x1_t test_vqadd_s64(int64x1_t a, int64x1_t b) { 3133 return vqadd_s64(a, b); 3134} 3135 3136// CHECK-LABEL: define <8 x i8> @test_vqadd_u8(<8 x i8> %a, <8 x i8> %b) #0 { 3137// CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4 3138// CHECK: ret <8 x i8> [[VQADD_V_I]] 3139uint8x8_t test_vqadd_u8(uint8x8_t a, uint8x8_t b) { 3140 return vqadd_u8(a, b); 3141} 3142 3143// CHECK-LABEL: define <4 x i16> @test_vqadd_u16(<4 x i16> %a, <4 x i16> %b) #0 { 3144// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3145// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3146// CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3147// CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3148// CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> [[VQADD_V_I]], <4 x i16> [[VQADD_V1_I]]) #4 3149// CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8> 3150// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <4 x i16> 3151// CHECK: ret <4 x i16> [[TMP2]] 3152uint16x4_t test_vqadd_u16(uint16x4_t a, uint16x4_t b) { 3153 return vqadd_u16(a, b); 3154} 3155 3156// CHECK-LABEL: define <2 x i32> @test_vqadd_u32(<2 x i32> %a, <2 x i32> %b) #0 { 3157// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3158// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 3159// CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3160// CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3161// CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqadd.v2i32(<2 x i32> [[VQADD_V_I]], <2 x i32> [[VQADD_V1_I]]) #4 3162// CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8> 3163// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <2 x i32> 3164// CHECK: ret <2 x i32> [[TMP2]] 3165uint32x2_t test_vqadd_u32(uint32x2_t a, uint32x2_t b) { 3166 return vqadd_u32(a, b); 3167} 3168 3169// CHECK-LABEL: define <1 x i64> @test_vqadd_u64(<1 x i64> %a, <1 x i64> %b) #0 { 3170// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 3171// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 3172// CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 3173// CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 3174// CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqadd.v1i64(<1 x i64> [[VQADD_V_I]], <1 x i64> [[VQADD_V1_I]]) #4 3175// CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8> 3176// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <1 x i64> 3177// CHECK: ret <1 x i64> [[TMP2]] 3178uint64x1_t test_vqadd_u64(uint64x1_t a, uint64x1_t b) { 3179 return vqadd_u64(a, b); 3180} 3181 3182// CHECK-LABEL: define <16 x i8> @test_vqaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 3183// CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqadd.v16i8(<16 x i8> %a, <16 x i8> %b) #4 3184// CHECK: ret <16 x i8> [[VQADDQ_V_I]] 3185int8x16_t test_vqaddq_s8(int8x16_t a, int8x16_t b) { 3186 return vqaddq_s8(a, b); 3187} 3188 3189// CHECK-LABEL: define <8 x i16> @test_vqaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 3190// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3191// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3192// CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3193// CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 3194// CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> [[VQADDQ_V_I]], <8 x i16> [[VQADDQ_V1_I]]) #4 3195// CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8> 3196// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <8 x i16> 3197// CHECK: ret <8 x i16> [[TMP2]] 3198int16x8_t test_vqaddq_s16(int16x8_t a, int16x8_t b) { 3199 return vqaddq_s16(a, b); 3200} 3201 3202// CHECK-LABEL: define <4 x i32> @test_vqaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 3203// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3204// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 3205// CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3206// CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 3207// CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQADDQ_V_I]], <4 x i32> [[VQADDQ_V1_I]]) #4 3208// CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8> 3209// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <4 x i32> 3210// CHECK: ret <4 x i32> [[TMP2]] 3211int32x4_t test_vqaddq_s32(int32x4_t a, int32x4_t b) { 3212 return vqaddq_s32(a, b); 3213} 3214 3215// CHECK-LABEL: define <2 x i64> @test_vqaddq_s64(<2 x i64> %a, <2 x i64> %b) #0 { 3216// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 3217// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 3218// CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 3219// CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 3220// CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQADDQ_V_I]], <2 x i64> [[VQADDQ_V1_I]]) #4 3221// CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8> 3222// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <2 x i64> 3223// CHECK: ret <2 x i64> [[TMP2]] 3224int64x2_t test_vqaddq_s64(int64x2_t a, int64x2_t b) { 3225 return vqaddq_s64(a, b); 3226} 3227 3228// CHECK-LABEL: define <16 x i8> @test_vqaddq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 3229// CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8> %a, <16 x i8> %b) #4 3230// CHECK: ret <16 x i8> [[VQADDQ_V_I]] 3231uint8x16_t test_vqaddq_u8(uint8x16_t a, uint8x16_t b) { 3232 return vqaddq_u8(a, b); 3233} 3234 3235// CHECK-LABEL: define <8 x i16> @test_vqaddq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 3236// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3237// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3238// CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3239// CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 3240// CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqadd.v8i16(<8 x i16> [[VQADDQ_V_I]], <8 x i16> [[VQADDQ_V1_I]]) #4 3241// CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8> 3242// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <8 x i16> 3243// CHECK: ret <8 x i16> [[TMP2]] 3244uint16x8_t test_vqaddq_u16(uint16x8_t a, uint16x8_t b) { 3245 return vqaddq_u16(a, b); 3246} 3247 3248// CHECK-LABEL: define <4 x i32> @test_vqaddq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 3249// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3250// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 3251// CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3252// CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 3253// CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqadd.v4i32(<4 x i32> [[VQADDQ_V_I]], <4 x i32> [[VQADDQ_V1_I]]) #4 3254// CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8> 3255// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <4 x i32> 3256// CHECK: ret <4 x i32> [[TMP2]] 3257uint32x4_t test_vqaddq_u32(uint32x4_t a, uint32x4_t b) { 3258 return vqaddq_u32(a, b); 3259} 3260 3261// CHECK-LABEL: define <2 x i64> @test_vqaddq_u64(<2 x i64> %a, <2 x i64> %b) #0 { 3262// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 3263// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 3264// CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 3265// CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 3266// CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64> [[VQADDQ_V_I]], <2 x i64> [[VQADDQ_V1_I]]) #4 3267// CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8> 3268// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <2 x i64> 3269// CHECK: ret <2 x i64> [[TMP2]] 3270uint64x2_t test_vqaddq_u64(uint64x2_t a, uint64x2_t b) { 3271 return vqaddq_u64(a, b); 3272} 3273 3274 3275// CHECK-LABEL: define <8 x i8> @test_vqsub_s8(<8 x i8> %a, <8 x i8> %b) #0 { 3276// CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> %a, <8 x i8> %b) #4 3277// CHECK: ret <8 x i8> [[VQSUB_V_I]] 3278int8x8_t test_vqsub_s8(int8x8_t a, int8x8_t b) { 3279 return vqsub_s8(a, b); 3280} 3281 3282// CHECK-LABEL: define <4 x i16> @test_vqsub_s16(<4 x i16> %a, <4 x i16> %b) #0 { 3283// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3284// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3285// CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3286// CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3287// CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[VQSUB_V_I]], <4 x i16> [[VQSUB_V1_I]]) #4 3288// CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8> 3289// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <4 x i16> 3290// CHECK: ret <4 x i16> [[TMP2]] 3291int16x4_t test_vqsub_s16(int16x4_t a, int16x4_t b) { 3292 return vqsub_s16(a, b); 3293} 3294 3295// CHECK-LABEL: define <2 x i32> @test_vqsub_s32(<2 x i32> %a, <2 x i32> %b) #0 { 3296// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3297// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 3298// CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3299// CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3300// CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> [[VQSUB_V_I]], <2 x i32> [[VQSUB_V1_I]]) #4 3301// CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8> 3302// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <2 x i32> 3303// CHECK: ret <2 x i32> [[TMP2]] 3304int32x2_t test_vqsub_s32(int32x2_t a, int32x2_t b) { 3305 return vqsub_s32(a, b); 3306} 3307 3308// CHECK-LABEL: define <1 x i64> @test_vqsub_s64(<1 x i64> %a, <1 x i64> %b) #0 { 3309// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 3310// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 3311// CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 3312// CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 3313// CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqsub.v1i64(<1 x i64> [[VQSUB_V_I]], <1 x i64> [[VQSUB_V1_I]]) #4 3314// CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8> 3315// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <1 x i64> 3316// CHECK: ret <1 x i64> [[TMP2]] 3317int64x1_t test_vqsub_s64(int64x1_t a, int64x1_t b) { 3318 return vqsub_s64(a, b); 3319} 3320 3321// CHECK-LABEL: define <8 x i8> @test_vqsub_u8(<8 x i8> %a, <8 x i8> %b) #0 { 3322// CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> %a, <8 x i8> %b) #4 3323// CHECK: ret <8 x i8> [[VQSUB_V_I]] 3324uint8x8_t test_vqsub_u8(uint8x8_t a, uint8x8_t b) { 3325 return vqsub_u8(a, b); 3326} 3327 3328// CHECK-LABEL: define <4 x i16> @test_vqsub_u16(<4 x i16> %a, <4 x i16> %b) #0 { 3329// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3330// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3331// CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3332// CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3333// CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> [[VQSUB_V_I]], <4 x i16> [[VQSUB_V1_I]]) #4 3334// CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8> 3335// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <4 x i16> 3336// CHECK: ret <4 x i16> [[TMP2]] 3337uint16x4_t test_vqsub_u16(uint16x4_t a, uint16x4_t b) { 3338 return vqsub_u16(a, b); 3339} 3340 3341// CHECK-LABEL: define <2 x i32> @test_vqsub_u32(<2 x i32> %a, <2 x i32> %b) #0 { 3342// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3343// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 3344// CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3345// CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3346// CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqsub.v2i32(<2 x i32> [[VQSUB_V_I]], <2 x i32> [[VQSUB_V1_I]]) #4 3347// CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8> 3348// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <2 x i32> 3349// CHECK: ret <2 x i32> [[TMP2]] 3350uint32x2_t test_vqsub_u32(uint32x2_t a, uint32x2_t b) { 3351 return vqsub_u32(a, b); 3352} 3353 3354// CHECK-LABEL: define <1 x i64> @test_vqsub_u64(<1 x i64> %a, <1 x i64> %b) #0 { 3355// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 3356// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 3357// CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 3358// CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 3359// CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqsub.v1i64(<1 x i64> [[VQSUB_V_I]], <1 x i64> [[VQSUB_V1_I]]) #4 3360// CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8> 3361// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <1 x i64> 3362// CHECK: ret <1 x i64> [[TMP2]] 3363uint64x1_t test_vqsub_u64(uint64x1_t a, uint64x1_t b) { 3364 return vqsub_u64(a, b); 3365} 3366 3367// CHECK-LABEL: define <16 x i8> @test_vqsubq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 3368// CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqsub.v16i8(<16 x i8> %a, <16 x i8> %b) #4 3369// CHECK: ret <16 x i8> [[VQSUBQ_V_I]] 3370int8x16_t test_vqsubq_s8(int8x16_t a, int8x16_t b) { 3371 return vqsubq_s8(a, b); 3372} 3373 3374// CHECK-LABEL: define <8 x i16> @test_vqsubq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 3375// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3376// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3377// CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3378// CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 3379// CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> [[VQSUBQ_V_I]], <8 x i16> [[VQSUBQ_V1_I]]) #4 3380// CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8> 3381// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <8 x i16> 3382// CHECK: ret <8 x i16> [[TMP2]] 3383int16x8_t test_vqsubq_s16(int16x8_t a, int16x8_t b) { 3384 return vqsubq_s16(a, b); 3385} 3386 3387// CHECK-LABEL: define <4 x i32> @test_vqsubq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 3388// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3389// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 3390// CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3391// CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 3392// CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQSUBQ_V_I]], <4 x i32> [[VQSUBQ_V1_I]]) #4 3393// CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8> 3394// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <4 x i32> 3395// CHECK: ret <4 x i32> [[TMP2]] 3396int32x4_t test_vqsubq_s32(int32x4_t a, int32x4_t b) { 3397 return vqsubq_s32(a, b); 3398} 3399 3400// CHECK-LABEL: define <2 x i64> @test_vqsubq_s64(<2 x i64> %a, <2 x i64> %b) #0 { 3401// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 3402// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 3403// CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 3404// CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 3405// CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQSUBQ_V_I]], <2 x i64> [[VQSUBQ_V1_I]]) #4 3406// CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8> 3407// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <2 x i64> 3408// CHECK: ret <2 x i64> [[TMP2]] 3409int64x2_t test_vqsubq_s64(int64x2_t a, int64x2_t b) { 3410 return vqsubq_s64(a, b); 3411} 3412 3413// CHECK-LABEL: define <16 x i8> @test_vqsubq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 3414// CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqsub.v16i8(<16 x i8> %a, <16 x i8> %b) #4 3415// CHECK: ret <16 x i8> [[VQSUBQ_V_I]] 3416uint8x16_t test_vqsubq_u8(uint8x16_t a, uint8x16_t b) { 3417 return vqsubq_u8(a, b); 3418} 3419 3420// CHECK-LABEL: define <8 x i16> @test_vqsubq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 3421// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3422// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3423// CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3424// CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 3425// CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqsub.v8i16(<8 x i16> [[VQSUBQ_V_I]], <8 x i16> [[VQSUBQ_V1_I]]) #4 3426// CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8> 3427// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <8 x i16> 3428// CHECK: ret <8 x i16> [[TMP2]] 3429uint16x8_t test_vqsubq_u16(uint16x8_t a, uint16x8_t b) { 3430 return vqsubq_u16(a, b); 3431} 3432 3433// CHECK-LABEL: define <4 x i32> @test_vqsubq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 3434// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3435// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 3436// CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3437// CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 3438// CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqsub.v4i32(<4 x i32> [[VQSUBQ_V_I]], <4 x i32> [[VQSUBQ_V1_I]]) #4 3439// CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8> 3440// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <4 x i32> 3441// CHECK: ret <4 x i32> [[TMP2]] 3442uint32x4_t test_vqsubq_u32(uint32x4_t a, uint32x4_t b) { 3443 return vqsubq_u32(a, b); 3444} 3445 3446// CHECK-LABEL: define <2 x i64> @test_vqsubq_u64(<2 x i64> %a, <2 x i64> %b) #0 { 3447// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 3448// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 3449// CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 3450// CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 3451// CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqsub.v2i64(<2 x i64> [[VQSUBQ_V_I]], <2 x i64> [[VQSUBQ_V1_I]]) #4 3452// CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8> 3453// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <2 x i64> 3454// CHECK: ret <2 x i64> [[TMP2]] 3455uint64x2_t test_vqsubq_u64(uint64x2_t a, uint64x2_t b) { 3456 return vqsubq_u64(a, b); 3457} 3458 3459 3460// CHECK-LABEL: define <8 x i8> @test_vshl_s8(<8 x i8> %a, <8 x i8> %b) #0 { 3461// CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4 3462// CHECK: ret <8 x i8> [[VSHL_V_I]] 3463int8x8_t test_vshl_s8(int8x8_t a, int8x8_t b) { 3464 return vshl_s8(a, b); 3465} 3466 3467// CHECK-LABEL: define <4 x i16> @test_vshl_s16(<4 x i16> %a, <4 x i16> %b) #0 { 3468// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3469// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3470// CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3471// CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3472// CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sshl.v4i16(<4 x i16> [[VSHL_V_I]], <4 x i16> [[VSHL_V1_I]]) #4 3473// CHECK: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8> 3474// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <4 x i16> 3475// CHECK: ret <4 x i16> [[TMP2]] 3476int16x4_t test_vshl_s16(int16x4_t a, int16x4_t b) { 3477 return vshl_s16(a, b); 3478} 3479 3480// CHECK-LABEL: define <2 x i32> @test_vshl_s32(<2 x i32> %a, <2 x i32> %b) #0 { 3481// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3482// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 3483// CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3484// CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3485// CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sshl.v2i32(<2 x i32> [[VSHL_V_I]], <2 x i32> [[VSHL_V1_I]]) #4 3486// CHECK: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8> 3487// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <2 x i32> 3488// CHECK: ret <2 x i32> [[TMP2]] 3489int32x2_t test_vshl_s32(int32x2_t a, int32x2_t b) { 3490 return vshl_s32(a, b); 3491} 3492 3493// CHECK-LABEL: define <1 x i64> @test_vshl_s64(<1 x i64> %a, <1 x i64> %b) #0 { 3494// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 3495// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 3496// CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 3497// CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 3498// CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> [[VSHL_V_I]], <1 x i64> [[VSHL_V1_I]]) #4 3499// CHECK: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8> 3500// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <1 x i64> 3501// CHECK: ret <1 x i64> [[TMP2]] 3502int64x1_t test_vshl_s64(int64x1_t a, int64x1_t b) { 3503 return vshl_s64(a, b); 3504} 3505 3506// CHECK-LABEL: define <8 x i8> @test_vshl_u8(<8 x i8> %a, <8 x i8> %b) #0 { 3507// CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.ushl.v8i8(<8 x i8> %a, <8 x i8> %b) #4 3508// CHECK: ret <8 x i8> [[VSHL_V_I]] 3509uint8x8_t test_vshl_u8(uint8x8_t a, int8x8_t b) { 3510 return vshl_u8(a, b); 3511} 3512 3513// CHECK-LABEL: define <4 x i16> @test_vshl_u16(<4 x i16> %a, <4 x i16> %b) #0 { 3514// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3515// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3516// CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3517// CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3518// CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.ushl.v4i16(<4 x i16> [[VSHL_V_I]], <4 x i16> [[VSHL_V1_I]]) #4 3519// CHECK: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8> 3520// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <4 x i16> 3521// CHECK: ret <4 x i16> [[TMP2]] 3522uint16x4_t test_vshl_u16(uint16x4_t a, int16x4_t b) { 3523 return vshl_u16(a, b); 3524} 3525 3526// CHECK-LABEL: define <2 x i32> @test_vshl_u32(<2 x i32> %a, <2 x i32> %b) #0 { 3527// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3528// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 3529// CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3530// CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3531// CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.ushl.v2i32(<2 x i32> [[VSHL_V_I]], <2 x i32> [[VSHL_V1_I]]) #4 3532// CHECK: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8> 3533// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <2 x i32> 3534// CHECK: ret <2 x i32> [[TMP2]] 3535uint32x2_t test_vshl_u32(uint32x2_t a, int32x2_t b) { 3536 return vshl_u32(a, b); 3537} 3538 3539// CHECK-LABEL: define <1 x i64> @test_vshl_u64(<1 x i64> %a, <1 x i64> %b) #0 { 3540// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 3541// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 3542// CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 3543// CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 3544// CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> [[VSHL_V_I]], <1 x i64> [[VSHL_V1_I]]) #4 3545// CHECK: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8> 3546// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <1 x i64> 3547// CHECK: ret <1 x i64> [[TMP2]] 3548uint64x1_t test_vshl_u64(uint64x1_t a, int64x1_t b) { 3549 return vshl_u64(a, b); 3550} 3551 3552// CHECK-LABEL: define <16 x i8> @test_vshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 3553// CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4 3554// CHECK: ret <16 x i8> [[VSHLQ_V_I]] 3555int8x16_t test_vshlq_s8(int8x16_t a, int8x16_t b) { 3556 return vshlq_s8(a, b); 3557} 3558 3559// CHECK-LABEL: define <8 x i16> @test_vshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 3560// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3561// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3562// CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3563// CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 3564// CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> [[VSHLQ_V_I]], <8 x i16> [[VSHLQ_V1_I]]) #4 3565// CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8> 3566// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <8 x i16> 3567// CHECK: ret <8 x i16> [[TMP2]] 3568int16x8_t test_vshlq_s16(int16x8_t a, int16x8_t b) { 3569 return vshlq_s16(a, b); 3570} 3571 3572// CHECK-LABEL: define <4 x i32> @test_vshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 3573// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3574// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 3575// CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3576// CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 3577// CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[VSHLQ_V_I]], <4 x i32> [[VSHLQ_V1_I]]) #4 3578// CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8> 3579// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <4 x i32> 3580// CHECK: ret <4 x i32> [[TMP2]] 3581int32x4_t test_vshlq_s32(int32x4_t a, int32x4_t b) { 3582 return vshlq_s32(a, b); 3583} 3584 3585// CHECK-LABEL: define <2 x i64> @test_vshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 { 3586// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 3587// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 3588// CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 3589// CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 3590// CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[VSHLQ_V_I]], <2 x i64> [[VSHLQ_V1_I]]) #4 3591// CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8> 3592// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <2 x i64> 3593// CHECK: ret <2 x i64> [[TMP2]] 3594int64x2_t test_vshlq_s64(int64x2_t a, int64x2_t b) { 3595 return vshlq_s64(a, b); 3596} 3597 3598// CHECK-LABEL: define <16 x i8> @test_vshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 3599// CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.ushl.v16i8(<16 x i8> %a, <16 x i8> %b) #4 3600// CHECK: ret <16 x i8> [[VSHLQ_V_I]] 3601uint8x16_t test_vshlq_u8(uint8x16_t a, int8x16_t b) { 3602 return vshlq_u8(a, b); 3603} 3604 3605// CHECK-LABEL: define <8 x i16> @test_vshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 3606// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3607// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3608// CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3609// CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 3610// CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[VSHLQ_V_I]], <8 x i16> [[VSHLQ_V1_I]]) #4 3611// CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8> 3612// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <8 x i16> 3613// CHECK: ret <8 x i16> [[TMP2]] 3614uint16x8_t test_vshlq_u16(uint16x8_t a, int16x8_t b) { 3615 return vshlq_u16(a, b); 3616} 3617 3618// CHECK-LABEL: define <4 x i32> @test_vshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 3619// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3620// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 3621// CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3622// CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 3623// CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[VSHLQ_V_I]], <4 x i32> [[VSHLQ_V1_I]]) #4 3624// CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8> 3625// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <4 x i32> 3626// CHECK: ret <4 x i32> [[TMP2]] 3627uint32x4_t test_vshlq_u32(uint32x4_t a, int32x4_t b) { 3628 return vshlq_u32(a, b); 3629} 3630 3631// CHECK-LABEL: define <2 x i64> @test_vshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 { 3632// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 3633// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 3634// CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 3635// CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 3636// CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> [[VSHLQ_V_I]], <2 x i64> [[VSHLQ_V1_I]]) #4 3637// CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8> 3638// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <2 x i64> 3639// CHECK: ret <2 x i64> [[TMP2]] 3640uint64x2_t test_vshlq_u64(uint64x2_t a, int64x2_t b) { 3641 return vshlq_u64(a, b); 3642} 3643 3644 3645// CHECK-LABEL: define <8 x i8> @test_vqshl_s8(<8 x i8> %a, <8 x i8> %b) #0 { 3646// CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4 3647// CHECK: ret <8 x i8> [[VQSHL_V_I]] 3648int8x8_t test_vqshl_s8(int8x8_t a, int8x8_t b) { 3649 return vqshl_s8(a, b); 3650} 3651 3652// CHECK-LABEL: define <4 x i16> @test_vqshl_s16(<4 x i16> %a, <4 x i16> %b) #0 { 3653// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3654// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3655// CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3656// CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3657// CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[VQSHL_V_I]], <4 x i16> [[VQSHL_V1_I]]) #4 3658// CHECK: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8> 3659// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <4 x i16> 3660// CHECK: ret <4 x i16> [[TMP2]] 3661int16x4_t test_vqshl_s16(int16x4_t a, int16x4_t b) { 3662 return vqshl_s16(a, b); 3663} 3664 3665// CHECK-LABEL: define <2 x i32> @test_vqshl_s32(<2 x i32> %a, <2 x i32> %b) #0 { 3666// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3667// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 3668// CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3669// CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3670// CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[VQSHL_V_I]], <2 x i32> [[VQSHL_V1_I]]) #4 3671// CHECK: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8> 3672// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <2 x i32> 3673// CHECK: ret <2 x i32> [[TMP2]] 3674int32x2_t test_vqshl_s32(int32x2_t a, int32x2_t b) { 3675 return vqshl_s32(a, b); 3676} 3677 3678// CHECK-LABEL: define <1 x i64> @test_vqshl_s64(<1 x i64> %a, <1 x i64> %b) #0 { 3679// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 3680// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 3681// CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 3682// CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 3683// CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[VQSHL_V_I]], <1 x i64> [[VQSHL_V1_I]]) #4 3684// CHECK: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8> 3685// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <1 x i64> 3686// CHECK: ret <1 x i64> [[TMP2]] 3687int64x1_t test_vqshl_s64(int64x1_t a, int64x1_t b) { 3688 return vqshl_s64(a, b); 3689} 3690 3691// CHECK-LABEL: define <8 x i8> @test_vqshl_u8(<8 x i8> %a, <8 x i8> %b) #0 { 3692// CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4 3693// CHECK: ret <8 x i8> [[VQSHL_V_I]] 3694uint8x8_t test_vqshl_u8(uint8x8_t a, int8x8_t b) { 3695 return vqshl_u8(a, b); 3696} 3697 3698// CHECK-LABEL: define <4 x i16> @test_vqshl_u16(<4 x i16> %a, <4 x i16> %b) #0 { 3699// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3700// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3701// CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3702// CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3703// CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[VQSHL_V_I]], <4 x i16> [[VQSHL_V1_I]]) #4 3704// CHECK: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8> 3705// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <4 x i16> 3706// CHECK: ret <4 x i16> [[TMP2]] 3707uint16x4_t test_vqshl_u16(uint16x4_t a, int16x4_t b) { 3708 return vqshl_u16(a, b); 3709} 3710 3711// CHECK-LABEL: define <2 x i32> @test_vqshl_u32(<2 x i32> %a, <2 x i32> %b) #0 { 3712// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3713// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 3714// CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3715// CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3716// CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[VQSHL_V_I]], <2 x i32> [[VQSHL_V1_I]]) #4 3717// CHECK: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8> 3718// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <2 x i32> 3719// CHECK: ret <2 x i32> [[TMP2]] 3720uint32x2_t test_vqshl_u32(uint32x2_t a, int32x2_t b) { 3721 return vqshl_u32(a, b); 3722} 3723 3724// CHECK-LABEL: define <1 x i64> @test_vqshl_u64(<1 x i64> %a, <1 x i64> %b) #0 { 3725// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 3726// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 3727// CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 3728// CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 3729// CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[VQSHL_V_I]], <1 x i64> [[VQSHL_V1_I]]) #4 3730// CHECK: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8> 3731// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <1 x i64> 3732// CHECK: ret <1 x i64> [[TMP2]] 3733uint64x1_t test_vqshl_u64(uint64x1_t a, int64x1_t b) { 3734 return vqshl_u64(a, b); 3735} 3736 3737// CHECK-LABEL: define <16 x i8> @test_vqshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 3738// CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4 3739// CHECK: ret <16 x i8> [[VQSHLQ_V_I]] 3740int8x16_t test_vqshlq_s8(int8x16_t a, int8x16_t b) { 3741 return vqshlq_s8(a, b); 3742} 3743 3744// CHECK-LABEL: define <8 x i16> @test_vqshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 3745// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3746// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3747// CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3748// CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 3749// CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[VQSHLQ_V_I]], <8 x i16> [[VQSHLQ_V1_I]]) #4 3750// CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8> 3751// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <8 x i16> 3752// CHECK: ret <8 x i16> [[TMP2]] 3753int16x8_t test_vqshlq_s16(int16x8_t a, int16x8_t b) { 3754 return vqshlq_s16(a, b); 3755} 3756 3757// CHECK-LABEL: define <4 x i32> @test_vqshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 3758// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3759// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 3760// CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3761// CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 3762// CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[VQSHLQ_V_I]], <4 x i32> [[VQSHLQ_V1_I]]) #4 3763// CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8> 3764// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <4 x i32> 3765// CHECK: ret <4 x i32> [[TMP2]] 3766int32x4_t test_vqshlq_s32(int32x4_t a, int32x4_t b) { 3767 return vqshlq_s32(a, b); 3768} 3769 3770// CHECK-LABEL: define <2 x i64> @test_vqshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 { 3771// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 3772// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 3773// CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 3774// CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 3775// CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[VQSHLQ_V_I]], <2 x i64> [[VQSHLQ_V1_I]]) #4 3776// CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8> 3777// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <2 x i64> 3778// CHECK: ret <2 x i64> [[TMP2]] 3779int64x2_t test_vqshlq_s64(int64x2_t a, int64x2_t b) { 3780 return vqshlq_s64(a, b); 3781} 3782 3783// CHECK-LABEL: define <16 x i8> @test_vqshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 3784// CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4 3785// CHECK: ret <16 x i8> [[VQSHLQ_V_I]] 3786uint8x16_t test_vqshlq_u8(uint8x16_t a, int8x16_t b) { 3787 return vqshlq_u8(a, b); 3788} 3789 3790// CHECK-LABEL: define <8 x i16> @test_vqshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 3791// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3792// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3793// CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3794// CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 3795// CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[VQSHLQ_V_I]], <8 x i16> [[VQSHLQ_V1_I]]) #4 3796// CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8> 3797// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <8 x i16> 3798// CHECK: ret <8 x i16> [[TMP2]] 3799uint16x8_t test_vqshlq_u16(uint16x8_t a, int16x8_t b) { 3800 return vqshlq_u16(a, b); 3801} 3802 3803// CHECK-LABEL: define <4 x i32> @test_vqshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 3804// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3805// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 3806// CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3807// CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 3808// CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[VQSHLQ_V_I]], <4 x i32> [[VQSHLQ_V1_I]]) #4 3809// CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8> 3810// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <4 x i32> 3811// CHECK: ret <4 x i32> [[TMP2]] 3812uint32x4_t test_vqshlq_u32(uint32x4_t a, int32x4_t b) { 3813 return vqshlq_u32(a, b); 3814} 3815 3816// CHECK-LABEL: define <2 x i64> @test_vqshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 { 3817// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 3818// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 3819// CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 3820// CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 3821// CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[VQSHLQ_V_I]], <2 x i64> [[VQSHLQ_V1_I]]) #4 3822// CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8> 3823// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <2 x i64> 3824// CHECK: ret <2 x i64> [[TMP2]] 3825uint64x2_t test_vqshlq_u64(uint64x2_t a, int64x2_t b) { 3826 return vqshlq_u64(a, b); 3827} 3828 3829// CHECK-LABEL: define <8 x i8> @test_vrshl_s8(<8 x i8> %a, <8 x i8> %b) #0 { 3830// CHECK: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4 3831// CHECK: ret <8 x i8> [[VRSHL_V_I]] 3832int8x8_t test_vrshl_s8(int8x8_t a, int8x8_t b) { 3833 return vrshl_s8(a, b); 3834} 3835 3836// CHECK-LABEL: define <4 x i16> @test_vrshl_s16(<4 x i16> %a, <4 x i16> %b) #0 { 3837// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3838// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3839// CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3840// CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3841// CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHL_V_I]], <4 x i16> [[VRSHL_V1_I]]) #4 3842// CHECK: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8> 3843// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <4 x i16> 3844// CHECK: ret <4 x i16> [[TMP2]] 3845int16x4_t test_vrshl_s16(int16x4_t a, int16x4_t b) { 3846 return vrshl_s16(a, b); 3847} 3848 3849// CHECK-LABEL: define <2 x i32> @test_vrshl_s32(<2 x i32> %a, <2 x i32> %b) #0 { 3850// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3851// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 3852// CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3853// CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3854// CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHL_V_I]], <2 x i32> [[VRSHL_V1_I]]) #4 3855// CHECK: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8> 3856// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <2 x i32> 3857// CHECK: ret <2 x i32> [[TMP2]] 3858int32x2_t test_vrshl_s32(int32x2_t a, int32x2_t b) { 3859 return vrshl_s32(a, b); 3860} 3861 3862// CHECK-LABEL: define <1 x i64> @test_vrshl_s64(<1 x i64> %a, <1 x i64> %b) #0 { 3863// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 3864// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 3865// CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 3866// CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 3867// CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHL_V_I]], <1 x i64> [[VRSHL_V1_I]]) #4 3868// CHECK: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8> 3869// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <1 x i64> 3870// CHECK: ret <1 x i64> [[TMP2]] 3871int64x1_t test_vrshl_s64(int64x1_t a, int64x1_t b) { 3872 return vrshl_s64(a, b); 3873} 3874 3875// CHECK-LABEL: define <8 x i8> @test_vrshl_u8(<8 x i8> %a, <8 x i8> %b) #0 { 3876// CHECK: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4 3877// CHECK: ret <8 x i8> [[VRSHL_V_I]] 3878uint8x8_t test_vrshl_u8(uint8x8_t a, int8x8_t b) { 3879 return vrshl_u8(a, b); 3880} 3881 3882// CHECK-LABEL: define <4 x i16> @test_vrshl_u16(<4 x i16> %a, <4 x i16> %b) #0 { 3883// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3884// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3885// CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3886// CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3887// CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHL_V_I]], <4 x i16> [[VRSHL_V1_I]]) #4 3888// CHECK: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8> 3889// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <4 x i16> 3890// CHECK: ret <4 x i16> [[TMP2]] 3891uint16x4_t test_vrshl_u16(uint16x4_t a, int16x4_t b) { 3892 return vrshl_u16(a, b); 3893} 3894 3895// CHECK-LABEL: define <2 x i32> @test_vrshl_u32(<2 x i32> %a, <2 x i32> %b) #0 { 3896// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3897// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 3898// CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3899// CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3900// CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHL_V_I]], <2 x i32> [[VRSHL_V1_I]]) #4 3901// CHECK: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8> 3902// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <2 x i32> 3903// CHECK: ret <2 x i32> [[TMP2]] 3904uint32x2_t test_vrshl_u32(uint32x2_t a, int32x2_t b) { 3905 return vrshl_u32(a, b); 3906} 3907 3908// CHECK-LABEL: define <1 x i64> @test_vrshl_u64(<1 x i64> %a, <1 x i64> %b) #0 { 3909// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 3910// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 3911// CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 3912// CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 3913// CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHL_V_I]], <1 x i64> [[VRSHL_V1_I]]) #4 3914// CHECK: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8> 3915// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <1 x i64> 3916// CHECK: ret <1 x i64> [[TMP2]] 3917uint64x1_t test_vrshl_u64(uint64x1_t a, int64x1_t b) { 3918 return vrshl_u64(a, b); 3919} 3920 3921// CHECK-LABEL: define <16 x i8> @test_vrshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 3922// CHECK: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4 3923// CHECK: ret <16 x i8> [[VRSHLQ_V_I]] 3924int8x16_t test_vrshlq_s8(int8x16_t a, int8x16_t b) { 3925 return vrshlq_s8(a, b); 3926} 3927 3928// CHECK-LABEL: define <8 x i16> @test_vrshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 3929// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3930// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3931// CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3932// CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 3933// CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHLQ_V_I]], <8 x i16> [[VRSHLQ_V1_I]]) #4 3934// CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8> 3935// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <8 x i16> 3936// CHECK: ret <8 x i16> [[TMP2]] 3937int16x8_t test_vrshlq_s16(int16x8_t a, int16x8_t b) { 3938 return vrshlq_s16(a, b); 3939} 3940 3941// CHECK-LABEL: define <4 x i32> @test_vrshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 3942// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3943// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 3944// CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3945// CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 3946// CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHLQ_V_I]], <4 x i32> [[VRSHLQ_V1_I]]) #4 3947// CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8> 3948// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <4 x i32> 3949// CHECK: ret <4 x i32> [[TMP2]] 3950int32x4_t test_vrshlq_s32(int32x4_t a, int32x4_t b) { 3951 return vrshlq_s32(a, b); 3952} 3953 3954// CHECK-LABEL: define <2 x i64> @test_vrshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 { 3955// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 3956// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 3957// CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 3958// CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 3959// CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHLQ_V_I]], <2 x i64> [[VRSHLQ_V1_I]]) #4 3960// CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8> 3961// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <2 x i64> 3962// CHECK: ret <2 x i64> [[TMP2]] 3963int64x2_t test_vrshlq_s64(int64x2_t a, int64x2_t b) { 3964 return vrshlq_s64(a, b); 3965} 3966 3967// CHECK-LABEL: define <16 x i8> @test_vrshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 3968// CHECK: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4 3969// CHECK: ret <16 x i8> [[VRSHLQ_V_I]] 3970uint8x16_t test_vrshlq_u8(uint8x16_t a, int8x16_t b) { 3971 return vrshlq_u8(a, b); 3972} 3973 3974// CHECK-LABEL: define <8 x i16> @test_vrshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 3975// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3976// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3977// CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3978// CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 3979// CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHLQ_V_I]], <8 x i16> [[VRSHLQ_V1_I]]) #4 3980// CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8> 3981// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <8 x i16> 3982// CHECK: ret <8 x i16> [[TMP2]] 3983uint16x8_t test_vrshlq_u16(uint16x8_t a, int16x8_t b) { 3984 return vrshlq_u16(a, b); 3985} 3986 3987// CHECK-LABEL: define <4 x i32> @test_vrshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 3988// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3989// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 3990// CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3991// CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 3992// CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHLQ_V_I]], <4 x i32> [[VRSHLQ_V1_I]]) #4 3993// CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8> 3994// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <4 x i32> 3995// CHECK: ret <4 x i32> [[TMP2]] 3996uint32x4_t test_vrshlq_u32(uint32x4_t a, int32x4_t b) { 3997 return vrshlq_u32(a, b); 3998} 3999 4000// CHECK-LABEL: define <2 x i64> @test_vrshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 { 4001// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 4002// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 4003// CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 4004// CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 4005// CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHLQ_V_I]], <2 x i64> [[VRSHLQ_V1_I]]) #4 4006// CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8> 4007// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <2 x i64> 4008// CHECK: ret <2 x i64> [[TMP2]] 4009uint64x2_t test_vrshlq_u64(uint64x2_t a, int64x2_t b) { 4010 return vrshlq_u64(a, b); 4011} 4012 4013 4014// CHECK-LABEL: define <8 x i8> @test_vqrshl_s8(<8 x i8> %a, <8 x i8> %b) #0 { 4015// CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4 4016// CHECK: ret <8 x i8> [[VQRSHL_V_I]] 4017int8x8_t test_vqrshl_s8(int8x8_t a, int8x8_t b) { 4018 return vqrshl_s8(a, b); 4019} 4020 4021// CHECK-LABEL: define <4 x i16> @test_vqrshl_s16(<4 x i16> %a, <4 x i16> %b) #0 { 4022// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 4023// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4024// CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 4025// CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4026// CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> [[VQRSHL_V_I]], <4 x i16> [[VQRSHL_V1_I]]) #4 4027// CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8> 4028// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <4 x i16> 4029// CHECK: ret <4 x i16> [[TMP2]] 4030int16x4_t test_vqrshl_s16(int16x4_t a, int16x4_t b) { 4031 return vqrshl_s16(a, b); 4032} 4033 4034// CHECK-LABEL: define <2 x i32> @test_vqrshl_s32(<2 x i32> %a, <2 x i32> %b) #0 { 4035// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 4036// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4037// CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 4038// CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4039// CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> [[VQRSHL_V_I]], <2 x i32> [[VQRSHL_V1_I]]) #4 4040// CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8> 4041// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <2 x i32> 4042// CHECK: ret <2 x i32> [[TMP2]] 4043int32x2_t test_vqrshl_s32(int32x2_t a, int32x2_t b) { 4044 return vqrshl_s32(a, b); 4045} 4046 4047// CHECK-LABEL: define <1 x i64> @test_vqrshl_s64(<1 x i64> %a, <1 x i64> %b) #0 { 4048// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 4049// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 4050// CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 4051// CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 4052// CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> [[VQRSHL_V_I]], <1 x i64> [[VQRSHL_V1_I]]) #4 4053// CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8> 4054// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <1 x i64> 4055// CHECK: ret <1 x i64> [[TMP2]] 4056int64x1_t test_vqrshl_s64(int64x1_t a, int64x1_t b) { 4057 return vqrshl_s64(a, b); 4058} 4059 4060// CHECK-LABEL: define <8 x i8> @test_vqrshl_u8(<8 x i8> %a, <8 x i8> %b) #0 { 4061// CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4 4062// CHECK: ret <8 x i8> [[VQRSHL_V_I]] 4063uint8x8_t test_vqrshl_u8(uint8x8_t a, int8x8_t b) { 4064 return vqrshl_u8(a, b); 4065} 4066 4067// CHECK-LABEL: define <4 x i16> @test_vqrshl_u16(<4 x i16> %a, <4 x i16> %b) #0 { 4068// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 4069// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4070// CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 4071// CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4072// CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> [[VQRSHL_V_I]], <4 x i16> [[VQRSHL_V1_I]]) #4 4073// CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8> 4074// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <4 x i16> 4075// CHECK: ret <4 x i16> [[TMP2]] 4076uint16x4_t test_vqrshl_u16(uint16x4_t a, int16x4_t b) { 4077 return vqrshl_u16(a, b); 4078} 4079 4080// CHECK-LABEL: define <2 x i32> @test_vqrshl_u32(<2 x i32> %a, <2 x i32> %b) #0 { 4081// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 4082// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4083// CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 4084// CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4085// CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> [[VQRSHL_V_I]], <2 x i32> [[VQRSHL_V1_I]]) #4 4086// CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8> 4087// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <2 x i32> 4088// CHECK: ret <2 x i32> [[TMP2]] 4089uint32x2_t test_vqrshl_u32(uint32x2_t a, int32x2_t b) { 4090 return vqrshl_u32(a, b); 4091} 4092 4093// CHECK-LABEL: define <1 x i64> @test_vqrshl_u64(<1 x i64> %a, <1 x i64> %b) #0 { 4094// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 4095// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 4096// CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 4097// CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 4098// CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> [[VQRSHL_V_I]], <1 x i64> [[VQRSHL_V1_I]]) #4 4099// CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8> 4100// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <1 x i64> 4101// CHECK: ret <1 x i64> [[TMP2]] 4102uint64x1_t test_vqrshl_u64(uint64x1_t a, int64x1_t b) { 4103 return vqrshl_u64(a, b); 4104} 4105 4106// CHECK-LABEL: define <16 x i8> @test_vqrshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 4107// CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4 4108// CHECK: ret <16 x i8> [[VQRSHLQ_V_I]] 4109int8x16_t test_vqrshlq_s8(int8x16_t a, int8x16_t b) { 4110 return vqrshlq_s8(a, b); 4111} 4112 4113// CHECK-LABEL: define <8 x i16> @test_vqrshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 4114// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 4115// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 4116// CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 4117// CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 4118// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> [[VQRSHLQ_V_I]], <8 x i16> [[VQRSHLQ_V1_I]]) #4 4119// CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8> 4120// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <8 x i16> 4121// CHECK: ret <8 x i16> [[TMP2]] 4122int16x8_t test_vqrshlq_s16(int16x8_t a, int16x8_t b) { 4123 return vqrshlq_s16(a, b); 4124} 4125 4126// CHECK-LABEL: define <4 x i32> @test_vqrshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 4127// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4128// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 4129// CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 4130// CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 4131// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> [[VQRSHLQ_V_I]], <4 x i32> [[VQRSHLQ_V1_I]]) #4 4132// CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8> 4133// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <4 x i32> 4134// CHECK: ret <4 x i32> [[TMP2]] 4135int32x4_t test_vqrshlq_s32(int32x4_t a, int32x4_t b) { 4136 return vqrshlq_s32(a, b); 4137} 4138 4139// CHECK-LABEL: define <2 x i64> @test_vqrshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 { 4140// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 4141// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 4142// CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 4143// CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 4144// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> [[VQRSHLQ_V_I]], <2 x i64> [[VQRSHLQ_V1_I]]) #4 4145// CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8> 4146// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <2 x i64> 4147// CHECK: ret <2 x i64> [[TMP2]] 4148int64x2_t test_vqrshlq_s64(int64x2_t a, int64x2_t b) { 4149 return vqrshlq_s64(a, b); 4150} 4151 4152// CHECK-LABEL: define <16 x i8> @test_vqrshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 4153// CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4 4154// CHECK: ret <16 x i8> [[VQRSHLQ_V_I]] 4155uint8x16_t test_vqrshlq_u8(uint8x16_t a, int8x16_t b) { 4156 return vqrshlq_u8(a, b); 4157} 4158 4159// CHECK-LABEL: define <8 x i16> @test_vqrshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 4160// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 4161// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 4162// CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 4163// CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 4164// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> [[VQRSHLQ_V_I]], <8 x i16> [[VQRSHLQ_V1_I]]) #4 4165// CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8> 4166// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <8 x i16> 4167// CHECK: ret <8 x i16> [[TMP2]] 4168uint16x8_t test_vqrshlq_u16(uint16x8_t a, int16x8_t b) { 4169 return vqrshlq_u16(a, b); 4170} 4171 4172// CHECK-LABEL: define <4 x i32> @test_vqrshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 4173// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4174// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 4175// CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 4176// CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 4177// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> [[VQRSHLQ_V_I]], <4 x i32> [[VQRSHLQ_V1_I]]) #4 4178// CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8> 4179// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <4 x i32> 4180// CHECK: ret <4 x i32> [[TMP2]] 4181uint32x4_t test_vqrshlq_u32(uint32x4_t a, int32x4_t b) { 4182 return vqrshlq_u32(a, b); 4183} 4184 4185// CHECK-LABEL: define <2 x i64> @test_vqrshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 { 4186// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 4187// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 4188// CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 4189// CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 4190// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> [[VQRSHLQ_V_I]], <2 x i64> [[VQRSHLQ_V1_I]]) #4 4191// CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8> 4192// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <2 x i64> 4193// CHECK: ret <2 x i64> [[TMP2]] 4194uint64x2_t test_vqrshlq_u64(uint64x2_t a, int64x2_t b) { 4195 return vqrshlq_u64(a, b); 4196} 4197 4198// CHECK-LABEL: define <1 x i64> @test_vsli_n_p64(<1 x i64> %a, <1 x i64> %b) #0 { 4199// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 4200// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 4201// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 4202// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 4203// CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 0) 4204// CHECK: ret <1 x i64> [[VSLI_N2]] 4205poly64x1_t test_vsli_n_p64(poly64x1_t a, poly64x1_t b) { 4206 return vsli_n_p64(a, b, 0); 4207} 4208 4209// CHECK-LABEL: define <2 x i64> @test_vsliq_n_p64(<2 x i64> %a, <2 x i64> %b) #0 { 4210// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 4211// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 4212// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 4213// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 4214// CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 0) 4215// CHECK: ret <2 x i64> [[VSLI_N2]] 4216poly64x2_t test_vsliq_n_p64(poly64x2_t a, poly64x2_t b) { 4217 return vsliq_n_p64(a, b, 0); 4218} 4219 4220// CHECK-LABEL: define <8 x i8> @test_vmax_s8(<8 x i8> %a, <8 x i8> %b) #0 { 4221// CHECK: [[VMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8> %a, <8 x i8> %b) #4 4222// CHECK: ret <8 x i8> [[VMAX_I]] 4223int8x8_t test_vmax_s8(int8x8_t a, int8x8_t b) { 4224 return vmax_s8(a, b); 4225} 4226 4227// CHECK-LABEL: define <4 x i16> @test_vmax_s16(<4 x i16> %a, <4 x i16> %b) #0 { 4228// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 4229// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4230// CHECK: [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 4231// CHECK: [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4232// CHECK: [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16> [[VMAX_I]], <4 x i16> [[VMAX1_I]]) #4 4233// CHECK: ret <4 x i16> [[VMAX2_I]] 4234int16x4_t test_vmax_s16(int16x4_t a, int16x4_t b) { 4235 return vmax_s16(a, b); 4236} 4237 4238// CHECK-LABEL: define <2 x i32> @test_vmax_s32(<2 x i32> %a, <2 x i32> %b) #0 { 4239// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 4240// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4241// CHECK: [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 4242// CHECK: [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4243// CHECK: [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32> [[VMAX_I]], <2 x i32> [[VMAX1_I]]) #4 4244// CHECK: ret <2 x i32> [[VMAX2_I]] 4245int32x2_t test_vmax_s32(int32x2_t a, int32x2_t b) { 4246 return vmax_s32(a, b); 4247} 4248 4249// CHECK-LABEL: define <8 x i8> @test_vmax_u8(<8 x i8> %a, <8 x i8> %b) #0 { 4250// CHECK: [[VMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8> %a, <8 x i8> %b) #4 4251// CHECK: ret <8 x i8> [[VMAX_I]] 4252uint8x8_t test_vmax_u8(uint8x8_t a, uint8x8_t b) { 4253 return vmax_u8(a, b); 4254} 4255 4256// CHECK-LABEL: define <4 x i16> @test_vmax_u16(<4 x i16> %a, <4 x i16> %b) #0 { 4257// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 4258// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4259// CHECK: [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 4260// CHECK: [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4261// CHECK: [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16> [[VMAX_I]], <4 x i16> [[VMAX1_I]]) #4 4262// CHECK: ret <4 x i16> [[VMAX2_I]] 4263uint16x4_t test_vmax_u16(uint16x4_t a, uint16x4_t b) { 4264 return vmax_u16(a, b); 4265} 4266 4267// CHECK-LABEL: define <2 x i32> @test_vmax_u32(<2 x i32> %a, <2 x i32> %b) #0 { 4268// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 4269// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4270// CHECK: [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 4271// CHECK: [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4272// CHECK: [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32> [[VMAX_I]], <2 x i32> [[VMAX1_I]]) #4 4273// CHECK: ret <2 x i32> [[VMAX2_I]] 4274uint32x2_t test_vmax_u32(uint32x2_t a, uint32x2_t b) { 4275 return vmax_u32(a, b); 4276} 4277 4278// CHECK-LABEL: define <2 x float> @test_vmax_f32(<2 x float> %a, <2 x float> %b) #0 { 4279// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 4280// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 4281// CHECK: [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 4282// CHECK: [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 4283// CHECK: [[VMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float> [[VMAX_I]], <2 x float> [[VMAX1_I]]) #4 4284// CHECK: ret <2 x float> [[VMAX2_I]] 4285float32x2_t test_vmax_f32(float32x2_t a, float32x2_t b) { 4286 return vmax_f32(a, b); 4287} 4288 4289// CHECK-LABEL: define <16 x i8> @test_vmaxq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 4290// CHECK: [[VMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8> %a, <16 x i8> %b) #4 4291// CHECK: ret <16 x i8> [[VMAX_I]] 4292int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b) { 4293 return vmaxq_s8(a, b); 4294} 4295 4296// CHECK-LABEL: define <8 x i16> @test_vmaxq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 4297// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 4298// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 4299// CHECK: [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 4300// CHECK: [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 4301// CHECK: [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16> [[VMAX_I]], <8 x i16> [[VMAX1_I]]) #4 4302// CHECK: ret <8 x i16> [[VMAX2_I]] 4303int16x8_t test_vmaxq_s16(int16x8_t a, int16x8_t b) { 4304 return vmaxq_s16(a, b); 4305} 4306 4307// CHECK-LABEL: define <4 x i32> @test_vmaxq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 4308// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4309// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 4310// CHECK: [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 4311// CHECK: [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 4312// CHECK: [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> [[VMAX_I]], <4 x i32> [[VMAX1_I]]) #4 4313// CHECK: ret <4 x i32> [[VMAX2_I]] 4314int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b) { 4315 return vmaxq_s32(a, b); 4316} 4317 4318// CHECK-LABEL: define <16 x i8> @test_vmaxq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 4319// CHECK: [[VMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8> %a, <16 x i8> %b) #4 4320// CHECK: ret <16 x i8> [[VMAX_I]] 4321uint8x16_t test_vmaxq_u8(uint8x16_t a, uint8x16_t b) { 4322 return vmaxq_u8(a, b); 4323} 4324 4325// CHECK-LABEL: define <8 x i16> @test_vmaxq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 4326// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 4327// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 4328// CHECK: [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 4329// CHECK: [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 4330// CHECK: [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16> [[VMAX_I]], <8 x i16> [[VMAX1_I]]) #4 4331// CHECK: ret <8 x i16> [[VMAX2_I]] 4332uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b) { 4333 return vmaxq_u16(a, b); 4334} 4335 4336// CHECK-LABEL: define <4 x i32> @test_vmaxq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 4337// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4338// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 4339// CHECK: [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 4340// CHECK: [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 4341// CHECK: [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> [[VMAX_I]], <4 x i32> [[VMAX1_I]]) #4 4342// CHECK: ret <4 x i32> [[VMAX2_I]] 4343uint32x4_t test_vmaxq_u32(uint32x4_t a, uint32x4_t b) { 4344 return vmaxq_u32(a, b); 4345} 4346 4347// CHECK-LABEL: define <4 x float> @test_vmaxq_f32(<4 x float> %a, <4 x float> %b) #0 { 4348// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 4349// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 4350// CHECK: [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 4351// CHECK: [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 4352// CHECK: [[VMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> [[VMAX_I]], <4 x float> [[VMAX1_I]]) #4 4353// CHECK: ret <4 x float> [[VMAX2_I]] 4354float32x4_t test_vmaxq_f32(float32x4_t a, float32x4_t b) { 4355 return vmaxq_f32(a, b); 4356} 4357 4358// CHECK-LABEL: define <2 x double> @test_vmaxq_f64(<2 x double> %a, <2 x double> %b) #0 { 4359// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 4360// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> 4361// CHECK: [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 4362// CHECK: [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 4363// CHECK: [[VMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double> [[VMAX_I]], <2 x double> [[VMAX1_I]]) #4 4364// CHECK: ret <2 x double> [[VMAX2_I]] 4365float64x2_t test_vmaxq_f64(float64x2_t a, float64x2_t b) { 4366 return vmaxq_f64(a, b); 4367} 4368 4369 4370// CHECK-LABEL: define <8 x i8> @test_vmin_s8(<8 x i8> %a, <8 x i8> %b) #0 { 4371// CHECK: [[VMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8> %a, <8 x i8> %b) #4 4372// CHECK: ret <8 x i8> [[VMIN_I]] 4373int8x8_t test_vmin_s8(int8x8_t a, int8x8_t b) { 4374 return vmin_s8(a, b); 4375} 4376 4377// CHECK-LABEL: define <4 x i16> @test_vmin_s16(<4 x i16> %a, <4 x i16> %b) #0 { 4378// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 4379// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4380// CHECK: [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 4381// CHECK: [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4382// CHECK: [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16> [[VMIN_I]], <4 x i16> [[VMIN1_I]]) #4 4383// CHECK: ret <4 x i16> [[VMIN2_I]] 4384int16x4_t test_vmin_s16(int16x4_t a, int16x4_t b) { 4385 return vmin_s16(a, b); 4386} 4387 4388// CHECK-LABEL: define <2 x i32> @test_vmin_s32(<2 x i32> %a, <2 x i32> %b) #0 { 4389// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 4390// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4391// CHECK: [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 4392// CHECK: [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4393// CHECK: [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32> [[VMIN_I]], <2 x i32> [[VMIN1_I]]) #4 4394// CHECK: ret <2 x i32> [[VMIN2_I]] 4395int32x2_t test_vmin_s32(int32x2_t a, int32x2_t b) { 4396 return vmin_s32(a, b); 4397} 4398 4399// CHECK-LABEL: define <8 x i8> @test_vmin_u8(<8 x i8> %a, <8 x i8> %b) #0 { 4400// CHECK: [[VMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8> %a, <8 x i8> %b) #4 4401// CHECK: ret <8 x i8> [[VMIN_I]] 4402uint8x8_t test_vmin_u8(uint8x8_t a, uint8x8_t b) { 4403 return vmin_u8(a, b); 4404} 4405 4406// CHECK-LABEL: define <4 x i16> @test_vmin_u16(<4 x i16> %a, <4 x i16> %b) #0 { 4407// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 4408// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4409// CHECK: [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 4410// CHECK: [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4411// CHECK: [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16> [[VMIN_I]], <4 x i16> [[VMIN1_I]]) #4 4412// CHECK: ret <4 x i16> [[VMIN2_I]] 4413uint16x4_t test_vmin_u16(uint16x4_t a, uint16x4_t b) { 4414 return vmin_u16(a, b); 4415} 4416 4417// CHECK-LABEL: define <2 x i32> @test_vmin_u32(<2 x i32> %a, <2 x i32> %b) #0 { 4418// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 4419// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4420// CHECK: [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 4421// CHECK: [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4422// CHECK: [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32> [[VMIN_I]], <2 x i32> [[VMIN1_I]]) #4 4423// CHECK: ret <2 x i32> [[VMIN2_I]] 4424uint32x2_t test_vmin_u32(uint32x2_t a, uint32x2_t b) { 4425 return vmin_u32(a, b); 4426} 4427 4428// CHECK-LABEL: define <2 x float> @test_vmin_f32(<2 x float> %a, <2 x float> %b) #0 { 4429// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 4430// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 4431// CHECK: [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 4432// CHECK: [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 4433// CHECK: [[VMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float> [[VMIN_I]], <2 x float> [[VMIN1_I]]) #4 4434// CHECK: ret <2 x float> [[VMIN2_I]] 4435float32x2_t test_vmin_f32(float32x2_t a, float32x2_t b) { 4436 return vmin_f32(a, b); 4437} 4438 4439// CHECK-LABEL: define <16 x i8> @test_vminq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 4440// CHECK: [[VMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8> %a, <16 x i8> %b) #4 4441// CHECK: ret <16 x i8> [[VMIN_I]] 4442int8x16_t test_vminq_s8(int8x16_t a, int8x16_t b) { 4443 return vminq_s8(a, b); 4444} 4445 4446// CHECK-LABEL: define <8 x i16> @test_vminq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 4447// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 4448// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 4449// CHECK: [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 4450// CHECK: [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 4451// CHECK: [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16> [[VMIN_I]], <8 x i16> [[VMIN1_I]]) #4 4452// CHECK: ret <8 x i16> [[VMIN2_I]] 4453int16x8_t test_vminq_s16(int16x8_t a, int16x8_t b) { 4454 return vminq_s16(a, b); 4455} 4456 4457// CHECK-LABEL: define <4 x i32> @test_vminq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 4458// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4459// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 4460// CHECK: [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 4461// CHECK: [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 4462// CHECK: [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> [[VMIN_I]], <4 x i32> [[VMIN1_I]]) #4 4463// CHECK: ret <4 x i32> [[VMIN2_I]] 4464int32x4_t test_vminq_s32(int32x4_t a, int32x4_t b) { 4465 return vminq_s32(a, b); 4466} 4467 4468// CHECK-LABEL: define <16 x i8> @test_vminq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 4469// CHECK: [[VMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8> %a, <16 x i8> %b) #4 4470// CHECK: ret <16 x i8> [[VMIN_I]] 4471uint8x16_t test_vminq_u8(uint8x16_t a, uint8x16_t b) { 4472 return vminq_u8(a, b); 4473} 4474 4475// CHECK-LABEL: define <8 x i16> @test_vminq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 4476// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 4477// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 4478// CHECK: [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 4479// CHECK: [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 4480// CHECK: [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16> [[VMIN_I]], <8 x i16> [[VMIN1_I]]) #4 4481// CHECK: ret <8 x i16> [[VMIN2_I]] 4482uint16x8_t test_vminq_u16(uint16x8_t a, uint16x8_t b) { 4483 return vminq_u16(a, b); 4484} 4485 4486// CHECK-LABEL: define <4 x i32> @test_vminq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 4487// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4488// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 4489// CHECK: [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 4490// CHECK: [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 4491// CHECK: [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> [[VMIN_I]], <4 x i32> [[VMIN1_I]]) #4 4492// CHECK: ret <4 x i32> [[VMIN2_I]] 4493uint32x4_t test_vminq_u32(uint32x4_t a, uint32x4_t b) { 4494 return vminq_u32(a, b); 4495} 4496 4497// CHECK-LABEL: define <4 x float> @test_vminq_f32(<4 x float> %a, <4 x float> %b) #0 { 4498// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 4499// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 4500// CHECK: [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 4501// CHECK: [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 4502// CHECK: [[VMIN2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float> [[VMIN_I]], <4 x float> [[VMIN1_I]]) #4 4503// CHECK: ret <4 x float> [[VMIN2_I]] 4504float32x4_t test_vminq_f32(float32x4_t a, float32x4_t b) { 4505 return vminq_f32(a, b); 4506} 4507 4508// CHECK-LABEL: define <2 x double> @test_vminq_f64(<2 x double> %a, <2 x double> %b) #0 { 4509// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 4510// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> 4511// CHECK: [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 4512// CHECK: [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 4513// CHECK: [[VMIN2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double> [[VMIN_I]], <2 x double> [[VMIN1_I]]) #4 4514// CHECK: ret <2 x double> [[VMIN2_I]] 4515float64x2_t test_vminq_f64(float64x2_t a, float64x2_t b) { 4516 return vminq_f64(a, b); 4517} 4518 4519// CHECK-LABEL: define <2 x float> @test_vmaxnm_f32(<2 x float> %a, <2 x float> %b) #0 { 4520// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 4521// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 4522// CHECK: [[VMAXNM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 4523// CHECK: [[VMAXNM1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 4524// CHECK: [[VMAXNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnm.v2f32(<2 x float> [[VMAXNM_I]], <2 x float> [[VMAXNM1_I]]) #4 4525// CHECK: ret <2 x float> [[VMAXNM2_I]] 4526float32x2_t test_vmaxnm_f32(float32x2_t a, float32x2_t b) { 4527 return vmaxnm_f32(a, b); 4528} 4529 4530// CHECK-LABEL: define <4 x float> @test_vmaxnmq_f32(<4 x float> %a, <4 x float> %b) #0 { 4531// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 4532// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 4533// CHECK: [[VMAXNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 4534// CHECK: [[VMAXNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 4535// CHECK: [[VMAXNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnm.v4f32(<4 x float> [[VMAXNM_I]], <4 x float> [[VMAXNM1_I]]) #4 4536// CHECK: ret <4 x float> [[VMAXNM2_I]] 4537float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b) { 4538 return vmaxnmq_f32(a, b); 4539} 4540 4541// CHECK-LABEL: define <2 x double> @test_vmaxnmq_f64(<2 x double> %a, <2 x double> %b) #0 { 4542// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 4543// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> 4544// CHECK: [[VMAXNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 4545// CHECK: [[VMAXNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 4546// CHECK: [[VMAXNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnm.v2f64(<2 x double> [[VMAXNM_I]], <2 x double> [[VMAXNM1_I]]) #4 4547// CHECK: ret <2 x double> [[VMAXNM2_I]] 4548float64x2_t test_vmaxnmq_f64(float64x2_t a, float64x2_t b) { 4549 return vmaxnmq_f64(a, b); 4550} 4551 4552// CHECK-LABEL: define <2 x float> @test_vminnm_f32(<2 x float> %a, <2 x float> %b) #0 { 4553// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 4554// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 4555// CHECK: [[VMINNM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 4556// CHECK: [[VMINNM1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 4557// CHECK: [[VMINNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnm.v2f32(<2 x float> [[VMINNM_I]], <2 x float> [[VMINNM1_I]]) #4 4558// CHECK: ret <2 x float> [[VMINNM2_I]] 4559float32x2_t test_vminnm_f32(float32x2_t a, float32x2_t b) { 4560 return vminnm_f32(a, b); 4561} 4562 4563// CHECK-LABEL: define <4 x float> @test_vminnmq_f32(<4 x float> %a, <4 x float> %b) #0 { 4564// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 4565// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 4566// CHECK: [[VMINNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 4567// CHECK: [[VMINNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 4568// CHECK: [[VMINNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnm.v4f32(<4 x float> [[VMINNM_I]], <4 x float> [[VMINNM1_I]]) #4 4569// CHECK: ret <4 x float> [[VMINNM2_I]] 4570float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b) { 4571 return vminnmq_f32(a, b); 4572} 4573 4574// CHECK-LABEL: define <2 x double> @test_vminnmq_f64(<2 x double> %a, <2 x double> %b) #0 { 4575// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 4576// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> 4577// CHECK: [[VMINNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 4578// CHECK: [[VMINNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 4579// CHECK: [[VMINNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnm.v2f64(<2 x double> [[VMINNM_I]], <2 x double> [[VMINNM1_I]]) #4 4580// CHECK: ret <2 x double> [[VMINNM2_I]] 4581float64x2_t test_vminnmq_f64(float64x2_t a, float64x2_t b) { 4582 return vminnmq_f64(a, b); 4583} 4584 4585// CHECK-LABEL: define <8 x i8> @test_vpmax_s8(<8 x i8> %a, <8 x i8> %b) #0 { 4586// CHECK: [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8> %a, <8 x i8> %b) #4 4587// CHECK: ret <8 x i8> [[VPMAX_I]] 4588int8x8_t test_vpmax_s8(int8x8_t a, int8x8_t b) { 4589 return vpmax_s8(a, b); 4590} 4591 4592// CHECK-LABEL: define <4 x i16> @test_vpmax_s16(<4 x i16> %a, <4 x i16> %b) #0 { 4593// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 4594// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4595// CHECK: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 4596// CHECK: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4597// CHECK: [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16> [[VPMAX_I]], <4 x i16> [[VPMAX1_I]]) #4 4598// CHECK: ret <4 x i16> [[VPMAX2_I]] 4599int16x4_t test_vpmax_s16(int16x4_t a, int16x4_t b) { 4600 return vpmax_s16(a, b); 4601} 4602 4603// CHECK-LABEL: define <2 x i32> @test_vpmax_s32(<2 x i32> %a, <2 x i32> %b) #0 { 4604// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 4605// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4606// CHECK: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 4607// CHECK: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4608// CHECK: [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32> [[VPMAX_I]], <2 x i32> [[VPMAX1_I]]) #4 4609// CHECK: ret <2 x i32> [[VPMAX2_I]] 4610int32x2_t test_vpmax_s32(int32x2_t a, int32x2_t b) { 4611 return vpmax_s32(a, b); 4612} 4613 4614// CHECK-LABEL: define <8 x i8> @test_vpmax_u8(<8 x i8> %a, <8 x i8> %b) #0 { 4615// CHECK: [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8> %a, <8 x i8> %b) #4 4616// CHECK: ret <8 x i8> [[VPMAX_I]] 4617uint8x8_t test_vpmax_u8(uint8x8_t a, uint8x8_t b) { 4618 return vpmax_u8(a, b); 4619} 4620 4621// CHECK-LABEL: define <4 x i16> @test_vpmax_u16(<4 x i16> %a, <4 x i16> %b) #0 { 4622// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 4623// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4624// CHECK: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 4625// CHECK: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4626// CHECK: [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16> [[VPMAX_I]], <4 x i16> [[VPMAX1_I]]) #4 4627// CHECK: ret <4 x i16> [[VPMAX2_I]] 4628uint16x4_t test_vpmax_u16(uint16x4_t a, uint16x4_t b) { 4629 return vpmax_u16(a, b); 4630} 4631 4632// CHECK-LABEL: define <2 x i32> @test_vpmax_u32(<2 x i32> %a, <2 x i32> %b) #0 { 4633// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 4634// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4635// CHECK: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 4636// CHECK: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4637// CHECK: [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32> [[VPMAX_I]], <2 x i32> [[VPMAX1_I]]) #4 4638// CHECK: ret <2 x i32> [[VPMAX2_I]] 4639uint32x2_t test_vpmax_u32(uint32x2_t a, uint32x2_t b) { 4640 return vpmax_u32(a, b); 4641} 4642 4643// CHECK-LABEL: define <2 x float> @test_vpmax_f32(<2 x float> %a, <2 x float> %b) #0 { 4644// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 4645// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 4646// CHECK: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 4647// CHECK: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 4648// CHECK: [[VPMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float> [[VPMAX_I]], <2 x float> [[VPMAX1_I]]) #4 4649// CHECK: ret <2 x float> [[VPMAX2_I]] 4650float32x2_t test_vpmax_f32(float32x2_t a, float32x2_t b) { 4651 return vpmax_f32(a, b); 4652} 4653 4654// CHECK-LABEL: define <16 x i8> @test_vpmaxq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 4655// CHECK: [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8> %a, <16 x i8> %b) #4 4656// CHECK: ret <16 x i8> [[VPMAX_I]] 4657int8x16_t test_vpmaxq_s8(int8x16_t a, int8x16_t b) { 4658 return vpmaxq_s8(a, b); 4659} 4660 4661// CHECK-LABEL: define <8 x i16> @test_vpmaxq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 4662// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 4663// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 4664// CHECK: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 4665// CHECK: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 4666// CHECK: [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16> [[VPMAX_I]], <8 x i16> [[VPMAX1_I]]) #4 4667// CHECK: ret <8 x i16> [[VPMAX2_I]] 4668int16x8_t test_vpmaxq_s16(int16x8_t a, int16x8_t b) { 4669 return vpmaxq_s16(a, b); 4670} 4671 4672// CHECK-LABEL: define <4 x i32> @test_vpmaxq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 4673// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4674// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 4675// CHECK: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 4676// CHECK: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 4677// CHECK: [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32> [[VPMAX_I]], <4 x i32> [[VPMAX1_I]]) #4 4678// CHECK: ret <4 x i32> [[VPMAX2_I]] 4679int32x4_t test_vpmaxq_s32(int32x4_t a, int32x4_t b) { 4680 return vpmaxq_s32(a, b); 4681} 4682 4683// CHECK-LABEL: define <16 x i8> @test_vpmaxq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 4684// CHECK: [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8> %a, <16 x i8> %b) #4 4685// CHECK: ret <16 x i8> [[VPMAX_I]] 4686uint8x16_t test_vpmaxq_u8(uint8x16_t a, uint8x16_t b) { 4687 return vpmaxq_u8(a, b); 4688} 4689 4690// CHECK-LABEL: define <8 x i16> @test_vpmaxq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 4691// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 4692// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 4693// CHECK: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 4694// CHECK: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 4695// CHECK: [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16> [[VPMAX_I]], <8 x i16> [[VPMAX1_I]]) #4 4696// CHECK: ret <8 x i16> [[VPMAX2_I]] 4697uint16x8_t test_vpmaxq_u16(uint16x8_t a, uint16x8_t b) { 4698 return vpmaxq_u16(a, b); 4699} 4700 4701// CHECK-LABEL: define <4 x i32> @test_vpmaxq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 4702// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4703// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 4704// CHECK: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 4705// CHECK: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 4706// CHECK: [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32> [[VPMAX_I]], <4 x i32> [[VPMAX1_I]]) #4 4707// CHECK: ret <4 x i32> [[VPMAX2_I]] 4708uint32x4_t test_vpmaxq_u32(uint32x4_t a, uint32x4_t b) { 4709 return vpmaxq_u32(a, b); 4710} 4711 4712// CHECK-LABEL: define <4 x float> @test_vpmaxq_f32(<4 x float> %a, <4 x float> %b) #0 { 4713// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 4714// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 4715// CHECK: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 4716// CHECK: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 4717// CHECK: [[VPMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float> [[VPMAX_I]], <4 x float> [[VPMAX1_I]]) #4 4718// CHECK: ret <4 x float> [[VPMAX2_I]] 4719float32x4_t test_vpmaxq_f32(float32x4_t a, float32x4_t b) { 4720 return vpmaxq_f32(a, b); 4721} 4722 4723// CHECK-LABEL: define <2 x double> @test_vpmaxq_f64(<2 x double> %a, <2 x double> %b) #0 { 4724// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 4725// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> 4726// CHECK: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 4727// CHECK: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 4728// CHECK: [[VPMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double> [[VPMAX_I]], <2 x double> [[VPMAX1_I]]) #4 4729// CHECK: ret <2 x double> [[VPMAX2_I]] 4730float64x2_t test_vpmaxq_f64(float64x2_t a, float64x2_t b) { 4731 return vpmaxq_f64(a, b); 4732} 4733 4734// CHECK-LABEL: define <8 x i8> @test_vpmin_s8(<8 x i8> %a, <8 x i8> %b) #0 { 4735// CHECK: [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8> %a, <8 x i8> %b) #4 4736// CHECK: ret <8 x i8> [[VPMIN_I]] 4737int8x8_t test_vpmin_s8(int8x8_t a, int8x8_t b) { 4738 return vpmin_s8(a, b); 4739} 4740 4741// CHECK-LABEL: define <4 x i16> @test_vpmin_s16(<4 x i16> %a, <4 x i16> %b) #0 { 4742// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 4743// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4744// CHECK: [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 4745// CHECK: [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4746// CHECK: [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16> [[VPMIN_I]], <4 x i16> [[VPMIN1_I]]) #4 4747// CHECK: ret <4 x i16> [[VPMIN2_I]] 4748int16x4_t test_vpmin_s16(int16x4_t a, int16x4_t b) { 4749 return vpmin_s16(a, b); 4750} 4751 4752// CHECK-LABEL: define <2 x i32> @test_vpmin_s32(<2 x i32> %a, <2 x i32> %b) #0 { 4753// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 4754// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4755// CHECK: [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 4756// CHECK: [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4757// CHECK: [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32> [[VPMIN_I]], <2 x i32> [[VPMIN1_I]]) #4 4758// CHECK: ret <2 x i32> [[VPMIN2_I]] 4759int32x2_t test_vpmin_s32(int32x2_t a, int32x2_t b) { 4760 return vpmin_s32(a, b); 4761} 4762 4763// CHECK-LABEL: define <8 x i8> @test_vpmin_u8(<8 x i8> %a, <8 x i8> %b) #0 { 4764// CHECK: [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8> %a, <8 x i8> %b) #4 4765// CHECK: ret <8 x i8> [[VPMIN_I]] 4766uint8x8_t test_vpmin_u8(uint8x8_t a, uint8x8_t b) { 4767 return vpmin_u8(a, b); 4768} 4769 4770// CHECK-LABEL: define <4 x i16> @test_vpmin_u16(<4 x i16> %a, <4 x i16> %b) #0 { 4771// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 4772// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4773// CHECK: [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 4774// CHECK: [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4775// CHECK: [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16> [[VPMIN_I]], <4 x i16> [[VPMIN1_I]]) #4 4776// CHECK: ret <4 x i16> [[VPMIN2_I]] 4777uint16x4_t test_vpmin_u16(uint16x4_t a, uint16x4_t b) { 4778 return vpmin_u16(a, b); 4779} 4780 4781// CHECK-LABEL: define <2 x i32> @test_vpmin_u32(<2 x i32> %a, <2 x i32> %b) #0 { 4782// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 4783// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4784// CHECK: [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 4785// CHECK: [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4786// CHECK: [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32> [[VPMIN_I]], <2 x i32> [[VPMIN1_I]]) #4 4787// CHECK: ret <2 x i32> [[VPMIN2_I]] 4788uint32x2_t test_vpmin_u32(uint32x2_t a, uint32x2_t b) { 4789 return vpmin_u32(a, b); 4790} 4791 4792// CHECK-LABEL: define <2 x float> @test_vpmin_f32(<2 x float> %a, <2 x float> %b) #0 { 4793// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 4794// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 4795// CHECK: [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 4796// CHECK: [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 4797// CHECK: [[VPMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float> [[VPMIN_I]], <2 x float> [[VPMIN1_I]]) #4 4798// CHECK: ret <2 x float> [[VPMIN2_I]] 4799float32x2_t test_vpmin_f32(float32x2_t a, float32x2_t b) { 4800 return vpmin_f32(a, b); 4801} 4802 4803// CHECK-LABEL: define <16 x i8> @test_vpminq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 4804// CHECK: [[VPMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8> %a, <16 x i8> %b) #4 4805// CHECK: ret <16 x i8> [[VPMIN_I]] 4806int8x16_t test_vpminq_s8(int8x16_t a, int8x16_t b) { 4807 return vpminq_s8(a, b); 4808} 4809 4810// CHECK-LABEL: define <8 x i16> @test_vpminq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 4811// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 4812// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 4813// CHECK: [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 4814// CHECK: [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 4815// CHECK: [[VPMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16> [[VPMIN_I]], <8 x i16> [[VPMIN1_I]]) #4 4816// CHECK: ret <8 x i16> [[VPMIN2_I]] 4817int16x8_t test_vpminq_s16(int16x8_t a, int16x8_t b) { 4818 return vpminq_s16(a, b); 4819} 4820 4821// CHECK-LABEL: define <4 x i32> @test_vpminq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 4822// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4823// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 4824// CHECK: [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 4825// CHECK: [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 4826// CHECK: [[VPMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32> [[VPMIN_I]], <4 x i32> [[VPMIN1_I]]) #4 4827// CHECK: ret <4 x i32> [[VPMIN2_I]] 4828int32x4_t test_vpminq_s32(int32x4_t a, int32x4_t b) { 4829 return vpminq_s32(a, b); 4830} 4831 4832// CHECK-LABEL: define <16 x i8> @test_vpminq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 4833// CHECK: [[VPMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8> %a, <16 x i8> %b) #4 4834// CHECK: ret <16 x i8> [[VPMIN_I]] 4835uint8x16_t test_vpminq_u8(uint8x16_t a, uint8x16_t b) { 4836 return vpminq_u8(a, b); 4837} 4838 4839// CHECK-LABEL: define <8 x i16> @test_vpminq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 4840// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 4841// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 4842// CHECK: [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 4843// CHECK: [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 4844// CHECK: [[VPMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16> [[VPMIN_I]], <8 x i16> [[VPMIN1_I]]) #4 4845// CHECK: ret <8 x i16> [[VPMIN2_I]] 4846uint16x8_t test_vpminq_u16(uint16x8_t a, uint16x8_t b) { 4847 return vpminq_u16(a, b); 4848} 4849 4850// CHECK-LABEL: define <4 x i32> @test_vpminq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 4851// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4852// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 4853// CHECK: [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 4854// CHECK: [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 4855// CHECK: [[VPMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32> [[VPMIN_I]], <4 x i32> [[VPMIN1_I]]) #4 4856// CHECK: ret <4 x i32> [[VPMIN2_I]] 4857uint32x4_t test_vpminq_u32(uint32x4_t a, uint32x4_t b) { 4858 return vpminq_u32(a, b); 4859} 4860 4861// CHECK-LABEL: define <4 x float> @test_vpminq_f32(<4 x float> %a, <4 x float> %b) #0 { 4862// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 4863// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 4864// CHECK: [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 4865// CHECK: [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 4866// CHECK: [[VPMIN2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float> [[VPMIN_I]], <4 x float> [[VPMIN1_I]]) #4 4867// CHECK: ret <4 x float> [[VPMIN2_I]] 4868float32x4_t test_vpminq_f32(float32x4_t a, float32x4_t b) { 4869 return vpminq_f32(a, b); 4870} 4871 4872// CHECK-LABEL: define <2 x double> @test_vpminq_f64(<2 x double> %a, <2 x double> %b) #0 { 4873// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 4874// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> 4875// CHECK: [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 4876// CHECK: [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 4877// CHECK: [[VPMIN2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double> [[VPMIN_I]], <2 x double> [[VPMIN1_I]]) #4 4878// CHECK: ret <2 x double> [[VPMIN2_I]] 4879float64x2_t test_vpminq_f64(float64x2_t a, float64x2_t b) { 4880 return vpminq_f64(a, b); 4881} 4882 4883// CHECK-LABEL: define <2 x float> @test_vpmaxnm_f32(<2 x float> %a, <2 x float> %b) #0 { 4884// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 4885// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 4886// CHECK: [[VPMAXNM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 4887// CHECK: [[VPMAXNM1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 4888// CHECK: [[VPMAXNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float> [[VPMAXNM_I]], <2 x float> [[VPMAXNM1_I]]) #4 4889// CHECK: ret <2 x float> [[VPMAXNM2_I]] 4890float32x2_t test_vpmaxnm_f32(float32x2_t a, float32x2_t b) { 4891 return vpmaxnm_f32(a, b); 4892} 4893 4894// CHECK-LABEL: define <4 x float> @test_vpmaxnmq_f32(<4 x float> %a, <4 x float> %b) #0 { 4895// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 4896// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 4897// CHECK: [[VPMAXNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 4898// CHECK: [[VPMAXNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 4899// CHECK: [[VPMAXNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float> [[VPMAXNM_I]], <4 x float> [[VPMAXNM1_I]]) #4 4900// CHECK: ret <4 x float> [[VPMAXNM2_I]] 4901float32x4_t test_vpmaxnmq_f32(float32x4_t a, float32x4_t b) { 4902 return vpmaxnmq_f32(a, b); 4903} 4904 4905// CHECK-LABEL: define <2 x double> @test_vpmaxnmq_f64(<2 x double> %a, <2 x double> %b) #0 { 4906// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 4907// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> 4908// CHECK: [[VPMAXNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 4909// CHECK: [[VPMAXNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 4910// CHECK: [[VPMAXNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double> [[VPMAXNM_I]], <2 x double> [[VPMAXNM1_I]]) #4 4911// CHECK: ret <2 x double> [[VPMAXNM2_I]] 4912float64x2_t test_vpmaxnmq_f64(float64x2_t a, float64x2_t b) { 4913 return vpmaxnmq_f64(a, b); 4914} 4915 4916// CHECK-LABEL: define <2 x float> @test_vpminnm_f32(<2 x float> %a, <2 x float> %b) #0 { 4917// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 4918// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 4919// CHECK: [[VPMINNM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 4920// CHECK: [[VPMINNM1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 4921// CHECK: [[VPMINNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float> [[VPMINNM_I]], <2 x float> [[VPMINNM1_I]]) #4 4922// CHECK: ret <2 x float> [[VPMINNM2_I]] 4923float32x2_t test_vpminnm_f32(float32x2_t a, float32x2_t b) { 4924 return vpminnm_f32(a, b); 4925} 4926 4927// CHECK-LABEL: define <4 x float> @test_vpminnmq_f32(<4 x float> %a, <4 x float> %b) #0 { 4928// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 4929// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 4930// CHECK: [[VPMINNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 4931// CHECK: [[VPMINNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 4932// CHECK: [[VPMINNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float> [[VPMINNM_I]], <4 x float> [[VPMINNM1_I]]) #4 4933// CHECK: ret <4 x float> [[VPMINNM2_I]] 4934float32x4_t test_vpminnmq_f32(float32x4_t a, float32x4_t b) { 4935 return vpminnmq_f32(a, b); 4936} 4937 4938// CHECK-LABEL: define <2 x double> @test_vpminnmq_f64(<2 x double> %a, <2 x double> %b) #0 { 4939// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 4940// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> 4941// CHECK: [[VPMINNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 4942// CHECK: [[VPMINNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 4943// CHECK: [[VPMINNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double> [[VPMINNM_I]], <2 x double> [[VPMINNM1_I]]) #4 4944// CHECK: ret <2 x double> [[VPMINNM2_I]] 4945float64x2_t test_vpminnmq_f64(float64x2_t a, float64x2_t b) { 4946 return vpminnmq_f64(a, b); 4947} 4948 4949// CHECK-LABEL: define <8 x i8> @test_vpadd_s8(<8 x i8> %a, <8 x i8> %b) #0 { 4950// CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %a, <8 x i8> %b) #4 4951// CHECK: ret <8 x i8> [[VPADD_V_I]] 4952int8x8_t test_vpadd_s8(int8x8_t a, int8x8_t b) { 4953 return vpadd_s8(a, b); 4954} 4955 4956// CHECK-LABEL: define <4 x i16> @test_vpadd_s16(<4 x i16> %a, <4 x i16> %b) #0 { 4957// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 4958// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4959// CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 4960// CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4961// CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> [[VPADD_V_I]], <4 x i16> [[VPADD_V1_I]]) #4 4962// CHECK: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8> 4963// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <4 x i16> 4964// CHECK: ret <4 x i16> [[TMP2]] 4965int16x4_t test_vpadd_s16(int16x4_t a, int16x4_t b) { 4966 return vpadd_s16(a, b); 4967} 4968 4969// CHECK-LABEL: define <2 x i32> @test_vpadd_s32(<2 x i32> %a, <2 x i32> %b) #0 { 4970// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 4971// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4972// CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 4973// CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4974// CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> [[VPADD_V_I]], <2 x i32> [[VPADD_V1_I]]) #4 4975// CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8> 4976// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <2 x i32> 4977// CHECK: ret <2 x i32> [[TMP2]] 4978int32x2_t test_vpadd_s32(int32x2_t a, int32x2_t b) { 4979 return vpadd_s32(a, b); 4980} 4981 4982// CHECK-LABEL: define <8 x i8> @test_vpadd_u8(<8 x i8> %a, <8 x i8> %b) #0 { 4983// CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %a, <8 x i8> %b) #4 4984// CHECK: ret <8 x i8> [[VPADD_V_I]] 4985uint8x8_t test_vpadd_u8(uint8x8_t a, uint8x8_t b) { 4986 return vpadd_u8(a, b); 4987} 4988 4989// CHECK-LABEL: define <4 x i16> @test_vpadd_u16(<4 x i16> %a, <4 x i16> %b) #0 { 4990// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 4991// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4992// CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 4993// CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4994// CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> [[VPADD_V_I]], <4 x i16> [[VPADD_V1_I]]) #4 4995// CHECK: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8> 4996// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <4 x i16> 4997// CHECK: ret <4 x i16> [[TMP2]] 4998uint16x4_t test_vpadd_u16(uint16x4_t a, uint16x4_t b) { 4999 return vpadd_u16(a, b); 5000} 5001 5002// CHECK-LABEL: define <2 x i32> @test_vpadd_u32(<2 x i32> %a, <2 x i32> %b) #0 { 5003// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 5004// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 5005// CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5006// CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 5007// CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> [[VPADD_V_I]], <2 x i32> [[VPADD_V1_I]]) #4 5008// CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8> 5009// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <2 x i32> 5010// CHECK: ret <2 x i32> [[TMP2]] 5011uint32x2_t test_vpadd_u32(uint32x2_t a, uint32x2_t b) { 5012 return vpadd_u32(a, b); 5013} 5014 5015// CHECK-LABEL: define <2 x float> @test_vpadd_f32(<2 x float> %a, <2 x float> %b) #0 { 5016// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 5017// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 5018// CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 5019// CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 5020// CHECK: [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float> [[VPADD_V_I]], <2 x float> [[VPADD_V1_I]]) #4 5021// CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x float> [[VPADD_V2_I]] to <8 x i8> 5022// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <2 x float> 5023// CHECK: ret <2 x float> [[TMP2]] 5024float32x2_t test_vpadd_f32(float32x2_t a, float32x2_t b) { 5025 return vpadd_f32(a, b); 5026} 5027 5028// CHECK-LABEL: define <16 x i8> @test_vpaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 5029// CHECK: [[VPADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %a, <16 x i8> %b) #4 5030// CHECK: ret <16 x i8> [[VPADDQ_V_I]] 5031int8x16_t test_vpaddq_s8(int8x16_t a, int8x16_t b) { 5032 return vpaddq_s8(a, b); 5033} 5034 5035// CHECK-LABEL: define <8 x i16> @test_vpaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 5036// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5037// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 5038// CHECK: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5039// CHECK: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 5040// CHECK: [[VPADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> [[VPADDQ_V_I]], <8 x i16> [[VPADDQ_V1_I]]) #4 5041// CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VPADDQ_V2_I]] to <16 x i8> 5042// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <8 x i16> 5043// CHECK: ret <8 x i16> [[TMP2]] 5044int16x8_t test_vpaddq_s16(int16x8_t a, int16x8_t b) { 5045 return vpaddq_s16(a, b); 5046} 5047 5048// CHECK-LABEL: define <4 x i32> @test_vpaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 5049// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5050// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 5051// CHECK: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5052// CHECK: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 5053// CHECK: [[VPADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> [[VPADDQ_V_I]], <4 x i32> [[VPADDQ_V1_I]]) #4 5054// CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VPADDQ_V2_I]] to <16 x i8> 5055// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <4 x i32> 5056// CHECK: ret <4 x i32> [[TMP2]] 5057int32x4_t test_vpaddq_s32(int32x4_t a, int32x4_t b) { 5058 return vpaddq_s32(a, b); 5059} 5060 5061// CHECK-LABEL: define <16 x i8> @test_vpaddq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 5062// CHECK: [[VPADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %a, <16 x i8> %b) #4 5063// CHECK: ret <16 x i8> [[VPADDQ_V_I]] 5064uint8x16_t test_vpaddq_u8(uint8x16_t a, uint8x16_t b) { 5065 return vpaddq_u8(a, b); 5066} 5067 5068// CHECK-LABEL: define <8 x i16> @test_vpaddq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 5069// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5070// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 5071// CHECK: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5072// CHECK: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 5073// CHECK: [[VPADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> [[VPADDQ_V_I]], <8 x i16> [[VPADDQ_V1_I]]) #4 5074// CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VPADDQ_V2_I]] to <16 x i8> 5075// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <8 x i16> 5076// CHECK: ret <8 x i16> [[TMP2]] 5077uint16x8_t test_vpaddq_u16(uint16x8_t a, uint16x8_t b) { 5078 return vpaddq_u16(a, b); 5079} 5080 5081// CHECK-LABEL: define <4 x i32> @test_vpaddq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 5082// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5083// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 5084// CHECK: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5085// CHECK: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 5086// CHECK: [[VPADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> [[VPADDQ_V_I]], <4 x i32> [[VPADDQ_V1_I]]) #4 5087// CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VPADDQ_V2_I]] to <16 x i8> 5088// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <4 x i32> 5089// CHECK: ret <4 x i32> [[TMP2]] 5090uint32x4_t test_vpaddq_u32(uint32x4_t a, uint32x4_t b) { 5091 return vpaddq_u32(a, b); 5092} 5093 5094// CHECK-LABEL: define <4 x float> @test_vpaddq_f32(<4 x float> %a, <4 x float> %b) #0 { 5095// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 5096// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 5097// CHECK: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 5098// CHECK: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 5099// CHECK: [[VPADDQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float> [[VPADDQ_V_I]], <4 x float> [[VPADDQ_V1_I]]) #4 5100// CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <4 x float> [[VPADDQ_V2_I]] to <16 x i8> 5101// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <4 x float> 5102// CHECK: ret <4 x float> [[TMP2]] 5103float32x4_t test_vpaddq_f32(float32x4_t a, float32x4_t b) { 5104 return vpaddq_f32(a, b); 5105} 5106 5107// CHECK-LABEL: define <2 x double> @test_vpaddq_f64(<2 x double> %a, <2 x double> %b) #0 { 5108// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 5109// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> 5110// CHECK: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 5111// CHECK: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 5112// CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double> [[VPADDQ_V_I]], <2 x double> [[VPADDQ_V1_I]]) #4 5113// CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <2 x double> [[VPADDQ_V2_I]] to <16 x i8> 5114// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <2 x double> 5115// CHECK: ret <2 x double> [[TMP2]] 5116float64x2_t test_vpaddq_f64(float64x2_t a, float64x2_t b) { 5117 return vpaddq_f64(a, b); 5118} 5119 5120// CHECK-LABEL: define <4 x i16> @test_vqdmulh_s16(<4 x i16> %a, <4 x i16> %b) #0 { 5121// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 5122// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 5123// CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5124// CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 5125// CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[VQDMULH_V_I]], <4 x i16> [[VQDMULH_V1_I]]) #4 5126// CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8> 5127// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <4 x i16> 5128// CHECK: ret <4 x i16> [[TMP2]] 5129int16x4_t test_vqdmulh_s16(int16x4_t a, int16x4_t b) { 5130 return vqdmulh_s16(a, b); 5131} 5132 5133// CHECK-LABEL: define <2 x i32> @test_vqdmulh_s32(<2 x i32> %a, <2 x i32> %b) #0 { 5134// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 5135// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 5136// CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5137// CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 5138// CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> [[VQDMULH_V_I]], <2 x i32> [[VQDMULH_V1_I]]) #4 5139// CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8> 5140// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <2 x i32> 5141// CHECK: ret <2 x i32> [[TMP2]] 5142int32x2_t test_vqdmulh_s32(int32x2_t a, int32x2_t b) { 5143 return vqdmulh_s32(a, b); 5144} 5145 5146// CHECK-LABEL: define <8 x i16> @test_vqdmulhq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 5147// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5148// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 5149// CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5150// CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 5151// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> [[VQDMULHQ_V_I]], <8 x i16> [[VQDMULHQ_V1_I]]) #4 5152// CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8> 5153// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <8 x i16> 5154// CHECK: ret <8 x i16> [[TMP2]] 5155int16x8_t test_vqdmulhq_s16(int16x8_t a, int16x8_t b) { 5156 return vqdmulhq_s16(a, b); 5157} 5158 5159// CHECK-LABEL: define <4 x i32> @test_vqdmulhq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 5160// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5161// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 5162// CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5163// CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 5164// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> [[VQDMULHQ_V_I]], <4 x i32> [[VQDMULHQ_V1_I]]) #4 5165// CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8> 5166// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <4 x i32> 5167// CHECK: ret <4 x i32> [[TMP2]] 5168int32x4_t test_vqdmulhq_s32(int32x4_t a, int32x4_t b) { 5169 return vqdmulhq_s32(a, b); 5170} 5171 5172// CHECK-LABEL: define <4 x i16> @test_vqrdmulh_s16(<4 x i16> %a, <4 x i16> %b) #0 { 5173// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 5174// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 5175// CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5176// CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 5177// CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[VQRDMULH_V_I]], <4 x i16> [[VQRDMULH_V1_I]]) #4 5178// CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8> 5179// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <4 x i16> 5180// CHECK: ret <4 x i16> [[TMP2]] 5181int16x4_t test_vqrdmulh_s16(int16x4_t a, int16x4_t b) { 5182 return vqrdmulh_s16(a, b); 5183} 5184 5185// CHECK-LABEL: define <2 x i32> @test_vqrdmulh_s32(<2 x i32> %a, <2 x i32> %b) #0 { 5186// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 5187// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 5188// CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5189// CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 5190// CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> [[VQRDMULH_V_I]], <2 x i32> [[VQRDMULH_V1_I]]) #4 5191// CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8> 5192// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <2 x i32> 5193// CHECK: ret <2 x i32> [[TMP2]] 5194int32x2_t test_vqrdmulh_s32(int32x2_t a, int32x2_t b) { 5195 return vqrdmulh_s32(a, b); 5196} 5197 5198// CHECK-LABEL: define <8 x i16> @test_vqrdmulhq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 5199// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5200// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 5201// CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5202// CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 5203// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> [[VQRDMULHQ_V_I]], <8 x i16> [[VQRDMULHQ_V1_I]]) #4 5204// CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8> 5205// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <8 x i16> 5206// CHECK: ret <8 x i16> [[TMP2]] 5207int16x8_t test_vqrdmulhq_s16(int16x8_t a, int16x8_t b) { 5208 return vqrdmulhq_s16(a, b); 5209} 5210 5211// CHECK-LABEL: define <4 x i32> @test_vqrdmulhq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 5212// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5213// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 5214// CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5215// CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 5216// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> [[VQRDMULHQ_V_I]], <4 x i32> [[VQRDMULHQ_V1_I]]) #4 5217// CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8> 5218// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <4 x i32> 5219// CHECK: ret <4 x i32> [[TMP2]] 5220int32x4_t test_vqrdmulhq_s32(int32x4_t a, int32x4_t b) { 5221 return vqrdmulhq_s32(a, b); 5222} 5223 5224// CHECK-LABEL: define <2 x float> @test_vmulx_f32(<2 x float> %a, <2 x float> %b) #0 { 5225// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 5226// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 5227// CHECK: [[VMULX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 5228// CHECK: [[VMULX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 5229// CHECK: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> [[VMULX_I]], <2 x float> [[VMULX1_I]]) #4 5230// CHECK: ret <2 x float> [[VMULX2_I]] 5231float32x2_t test_vmulx_f32(float32x2_t a, float32x2_t b) { 5232 return vmulx_f32(a, b); 5233} 5234 5235// CHECK-LABEL: define <4 x float> @test_vmulxq_f32(<4 x float> %a, <4 x float> %b) #0 { 5236// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 5237// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 5238// CHECK: [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 5239// CHECK: [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 5240// CHECK: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> [[VMULX_I]], <4 x float> [[VMULX1_I]]) #4 5241// CHECK: ret <4 x float> [[VMULX2_I]] 5242float32x4_t test_vmulxq_f32(float32x4_t a, float32x4_t b) { 5243 return vmulxq_f32(a, b); 5244} 5245 5246// CHECK-LABEL: define <2 x double> @test_vmulxq_f64(<2 x double> %a, <2 x double> %b) #0 { 5247// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 5248// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> 5249// CHECK: [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 5250// CHECK: [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 5251// CHECK: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> [[VMULX_I]], <2 x double> [[VMULX1_I]]) #4 5252// CHECK: ret <2 x double> [[VMULX2_I]] 5253float64x2_t test_vmulxq_f64(float64x2_t a, float64x2_t b) { 5254 return vmulxq_f64(a, b); 5255} 5256 5257// CHECK-LABEL: define <8 x i8> @test_vshl_n_s8(<8 x i8> %a) #0 { 5258// CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 5259// CHECK: ret <8 x i8> [[VSHL_N]] 5260int8x8_t test_vshl_n_s8(int8x8_t a) { 5261 return vshl_n_s8(a, 3); 5262} 5263 5264// CHECK-LABEL: define <4 x i16> @test_vshl_n_s16(<4 x i16> %a) #0 { 5265// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 5266// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5267// CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3> 5268// CHECK: ret <4 x i16> [[VSHL_N]] 5269int16x4_t test_vshl_n_s16(int16x4_t a) { 5270 return vshl_n_s16(a, 3); 5271} 5272 5273// CHECK-LABEL: define <2 x i32> @test_vshl_n_s32(<2 x i32> %a) #0 { 5274// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 5275// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5276// CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], <i32 3, i32 3> 5277// CHECK: ret <2 x i32> [[VSHL_N]] 5278int32x2_t test_vshl_n_s32(int32x2_t a) { 5279 return vshl_n_s32(a, 3); 5280} 5281 5282// CHECK-LABEL: define <16 x i8> @test_vshlq_n_s8(<16 x i8> %a) #0 { 5283// CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 5284// CHECK: ret <16 x i8> [[VSHL_N]] 5285int8x16_t test_vshlq_n_s8(int8x16_t a) { 5286 return vshlq_n_s8(a, 3); 5287} 5288 5289// CHECK-LABEL: define <8 x i16> @test_vshlq_n_s16(<8 x i16> %a) #0 { 5290// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5291// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5292// CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 5293// CHECK: ret <8 x i16> [[VSHL_N]] 5294int16x8_t test_vshlq_n_s16(int16x8_t a) { 5295 return vshlq_n_s16(a, 3); 5296} 5297 5298// CHECK-LABEL: define <4 x i32> @test_vshlq_n_s32(<4 x i32> %a) #0 { 5299// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5300// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5301// CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3> 5302// CHECK: ret <4 x i32> [[VSHL_N]] 5303int32x4_t test_vshlq_n_s32(int32x4_t a) { 5304 return vshlq_n_s32(a, 3); 5305} 5306 5307// CHECK-LABEL: define <2 x i64> @test_vshlq_n_s64(<2 x i64> %a) #0 { 5308// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 5309// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 5310// CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], <i64 3, i64 3> 5311// CHECK: ret <2 x i64> [[VSHL_N]] 5312int64x2_t test_vshlq_n_s64(int64x2_t a) { 5313 return vshlq_n_s64(a, 3); 5314} 5315 5316// CHECK-LABEL: define <8 x i8> @test_vshl_n_u8(<8 x i8> %a) #0 { 5317// CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 5318// CHECK: ret <8 x i8> [[VSHL_N]] 5319int8x8_t test_vshl_n_u8(int8x8_t a) { 5320 return vshl_n_u8(a, 3); 5321} 5322 5323// CHECK-LABEL: define <4 x i16> @test_vshl_n_u16(<4 x i16> %a) #0 { 5324// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 5325// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5326// CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3> 5327// CHECK: ret <4 x i16> [[VSHL_N]] 5328int16x4_t test_vshl_n_u16(int16x4_t a) { 5329 return vshl_n_u16(a, 3); 5330} 5331 5332// CHECK-LABEL: define <2 x i32> @test_vshl_n_u32(<2 x i32> %a) #0 { 5333// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 5334// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5335// CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], <i32 3, i32 3> 5336// CHECK: ret <2 x i32> [[VSHL_N]] 5337int32x2_t test_vshl_n_u32(int32x2_t a) { 5338 return vshl_n_u32(a, 3); 5339} 5340 5341// CHECK-LABEL: define <16 x i8> @test_vshlq_n_u8(<16 x i8> %a) #0 { 5342// CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 5343// CHECK: ret <16 x i8> [[VSHL_N]] 5344int8x16_t test_vshlq_n_u8(int8x16_t a) { 5345 return vshlq_n_u8(a, 3); 5346} 5347 5348// CHECK-LABEL: define <8 x i16> @test_vshlq_n_u16(<8 x i16> %a) #0 { 5349// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5350// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5351// CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 5352// CHECK: ret <8 x i16> [[VSHL_N]] 5353int16x8_t test_vshlq_n_u16(int16x8_t a) { 5354 return vshlq_n_u16(a, 3); 5355} 5356 5357// CHECK-LABEL: define <4 x i32> @test_vshlq_n_u32(<4 x i32> %a) #0 { 5358// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5359// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5360// CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3> 5361// CHECK: ret <4 x i32> [[VSHL_N]] 5362int32x4_t test_vshlq_n_u32(int32x4_t a) { 5363 return vshlq_n_u32(a, 3); 5364} 5365 5366// CHECK-LABEL: define <2 x i64> @test_vshlq_n_u64(<2 x i64> %a) #0 { 5367// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 5368// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 5369// CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], <i64 3, i64 3> 5370// CHECK: ret <2 x i64> [[VSHL_N]] 5371int64x2_t test_vshlq_n_u64(int64x2_t a) { 5372 return vshlq_n_u64(a, 3); 5373} 5374 5375// CHECK-LABEL: define <8 x i8> @test_vshr_n_s8(<8 x i8> %a) #0 { 5376// CHECK: [[VSHR_N:%.*]] = ashr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 5377// CHECK: ret <8 x i8> [[VSHR_N]] 5378int8x8_t test_vshr_n_s8(int8x8_t a) { 5379 return vshr_n_s8(a, 3); 5380} 5381 5382// CHECK-LABEL: define <4 x i16> @test_vshr_n_s16(<4 x i16> %a) #0 { 5383// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 5384// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5385// CHECK: [[VSHR_N:%.*]] = ashr <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3> 5386// CHECK: ret <4 x i16> [[VSHR_N]] 5387int16x4_t test_vshr_n_s16(int16x4_t a) { 5388 return vshr_n_s16(a, 3); 5389} 5390 5391// CHECK-LABEL: define <2 x i32> @test_vshr_n_s32(<2 x i32> %a) #0 { 5392// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 5393// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5394// CHECK: [[VSHR_N:%.*]] = ashr <2 x i32> [[TMP1]], <i32 3, i32 3> 5395// CHECK: ret <2 x i32> [[VSHR_N]] 5396int32x2_t test_vshr_n_s32(int32x2_t a) { 5397 return vshr_n_s32(a, 3); 5398} 5399 5400// CHECK-LABEL: define <16 x i8> @test_vshrq_n_s8(<16 x i8> %a) #0 { 5401// CHECK: [[VSHR_N:%.*]] = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 5402// CHECK: ret <16 x i8> [[VSHR_N]] 5403int8x16_t test_vshrq_n_s8(int8x16_t a) { 5404 return vshrq_n_s8(a, 3); 5405} 5406 5407// CHECK-LABEL: define <8 x i16> @test_vshrq_n_s16(<8 x i16> %a) #0 { 5408// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5409// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5410// CHECK: [[VSHR_N:%.*]] = ashr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 5411// CHECK: ret <8 x i16> [[VSHR_N]] 5412int16x8_t test_vshrq_n_s16(int16x8_t a) { 5413 return vshrq_n_s16(a, 3); 5414} 5415 5416// CHECK-LABEL: define <4 x i32> @test_vshrq_n_s32(<4 x i32> %a) #0 { 5417// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5418// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5419// CHECK: [[VSHR_N:%.*]] = ashr <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3> 5420// CHECK: ret <4 x i32> [[VSHR_N]] 5421int32x4_t test_vshrq_n_s32(int32x4_t a) { 5422 return vshrq_n_s32(a, 3); 5423} 5424 5425// CHECK-LABEL: define <2 x i64> @test_vshrq_n_s64(<2 x i64> %a) #0 { 5426// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 5427// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 5428// CHECK: [[VSHR_N:%.*]] = ashr <2 x i64> [[TMP1]], <i64 3, i64 3> 5429// CHECK: ret <2 x i64> [[VSHR_N]] 5430int64x2_t test_vshrq_n_s64(int64x2_t a) { 5431 return vshrq_n_s64(a, 3); 5432} 5433 5434// CHECK-LABEL: define <8 x i8> @test_vshr_n_u8(<8 x i8> %a) #0 { 5435// CHECK: [[VSHR_N:%.*]] = lshr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 5436// CHECK: ret <8 x i8> [[VSHR_N]] 5437int8x8_t test_vshr_n_u8(int8x8_t a) { 5438 return vshr_n_u8(a, 3); 5439} 5440 5441// CHECK-LABEL: define <4 x i16> @test_vshr_n_u16(<4 x i16> %a) #0 { 5442// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 5443// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5444// CHECK: [[VSHR_N:%.*]] = lshr <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3> 5445// CHECK: ret <4 x i16> [[VSHR_N]] 5446int16x4_t test_vshr_n_u16(int16x4_t a) { 5447 return vshr_n_u16(a, 3); 5448} 5449 5450// CHECK-LABEL: define <2 x i32> @test_vshr_n_u32(<2 x i32> %a) #0 { 5451// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 5452// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5453// CHECK: [[VSHR_N:%.*]] = lshr <2 x i32> [[TMP1]], <i32 3, i32 3> 5454// CHECK: ret <2 x i32> [[VSHR_N]] 5455int32x2_t test_vshr_n_u32(int32x2_t a) { 5456 return vshr_n_u32(a, 3); 5457} 5458 5459// CHECK-LABEL: define <16 x i8> @test_vshrq_n_u8(<16 x i8> %a) #0 { 5460// CHECK: [[VSHR_N:%.*]] = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 5461// CHECK: ret <16 x i8> [[VSHR_N]] 5462int8x16_t test_vshrq_n_u8(int8x16_t a) { 5463 return vshrq_n_u8(a, 3); 5464} 5465 5466// CHECK-LABEL: define <8 x i16> @test_vshrq_n_u16(<8 x i16> %a) #0 { 5467// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5468// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5469// CHECK: [[VSHR_N:%.*]] = lshr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 5470// CHECK: ret <8 x i16> [[VSHR_N]] 5471int16x8_t test_vshrq_n_u16(int16x8_t a) { 5472 return vshrq_n_u16(a, 3); 5473} 5474 5475// CHECK-LABEL: define <4 x i32> @test_vshrq_n_u32(<4 x i32> %a) #0 { 5476// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5477// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5478// CHECK: [[VSHR_N:%.*]] = lshr <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3> 5479// CHECK: ret <4 x i32> [[VSHR_N]] 5480int32x4_t test_vshrq_n_u32(int32x4_t a) { 5481 return vshrq_n_u32(a, 3); 5482} 5483 5484// CHECK-LABEL: define <2 x i64> @test_vshrq_n_u64(<2 x i64> %a) #0 { 5485// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 5486// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 5487// CHECK: [[VSHR_N:%.*]] = lshr <2 x i64> [[TMP1]], <i64 3, i64 3> 5488// CHECK: ret <2 x i64> [[VSHR_N]] 5489int64x2_t test_vshrq_n_u64(int64x2_t a) { 5490 return vshrq_n_u64(a, 3); 5491} 5492 5493// CHECK-LABEL: define <8 x i8> @test_vsra_n_s8(<8 x i8> %a, <8 x i8> %b) #0 { 5494// CHECK: [[VSRA_N:%.*]] = ashr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 5495// CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]] 5496// CHECK: ret <8 x i8> [[TMP0]] 5497int8x8_t test_vsra_n_s8(int8x8_t a, int8x8_t b) { 5498 return vsra_n_s8(a, b, 3); 5499} 5500 5501// CHECK-LABEL: define <4 x i16> @test_vsra_n_s16(<4 x i16> %a, <4 x i16> %b) #0 { 5502// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 5503// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 5504// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5505// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 5506// CHECK: [[VSRA_N:%.*]] = ashr <4 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3> 5507// CHECK: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]] 5508// CHECK: ret <4 x i16> [[TMP4]] 5509int16x4_t test_vsra_n_s16(int16x4_t a, int16x4_t b) { 5510 return vsra_n_s16(a, b, 3); 5511} 5512 5513// CHECK-LABEL: define <2 x i32> @test_vsra_n_s32(<2 x i32> %a, <2 x i32> %b) #0 { 5514// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 5515// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 5516// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5517// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 5518// CHECK: [[VSRA_N:%.*]] = ashr <2 x i32> [[TMP3]], <i32 3, i32 3> 5519// CHECK: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]] 5520// CHECK: ret <2 x i32> [[TMP4]] 5521int32x2_t test_vsra_n_s32(int32x2_t a, int32x2_t b) { 5522 return vsra_n_s32(a, b, 3); 5523} 5524 5525// CHECK-LABEL: define <16 x i8> @test_vsraq_n_s8(<16 x i8> %a, <16 x i8> %b) #0 { 5526// CHECK: [[VSRA_N:%.*]] = ashr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 5527// CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]] 5528// CHECK: ret <16 x i8> [[TMP0]] 5529int8x16_t test_vsraq_n_s8(int8x16_t a, int8x16_t b) { 5530 return vsraq_n_s8(a, b, 3); 5531} 5532 5533// CHECK-LABEL: define <8 x i16> @test_vsraq_n_s16(<8 x i16> %a, <8 x i16> %b) #0 { 5534// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5535// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 5536// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5537// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 5538// CHECK: [[VSRA_N:%.*]] = ashr <8 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 5539// CHECK: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]] 5540// CHECK: ret <8 x i16> [[TMP4]] 5541int16x8_t test_vsraq_n_s16(int16x8_t a, int16x8_t b) { 5542 return vsraq_n_s16(a, b, 3); 5543} 5544 5545// CHECK-LABEL: define <4 x i32> @test_vsraq_n_s32(<4 x i32> %a, <4 x i32> %b) #0 { 5546// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5547// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 5548// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5549// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 5550// CHECK: [[VSRA_N:%.*]] = ashr <4 x i32> [[TMP3]], <i32 3, i32 3, i32 3, i32 3> 5551// CHECK: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]] 5552// CHECK: ret <4 x i32> [[TMP4]] 5553int32x4_t test_vsraq_n_s32(int32x4_t a, int32x4_t b) { 5554 return vsraq_n_s32(a, b, 3); 5555} 5556 5557// CHECK-LABEL: define <2 x i64> @test_vsraq_n_s64(<2 x i64> %a, <2 x i64> %b) #0 { 5558// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 5559// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 5560// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 5561// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 5562// CHECK: [[VSRA_N:%.*]] = ashr <2 x i64> [[TMP3]], <i64 3, i64 3> 5563// CHECK: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]] 5564// CHECK: ret <2 x i64> [[TMP4]] 5565int64x2_t test_vsraq_n_s64(int64x2_t a, int64x2_t b) { 5566 return vsraq_n_s64(a, b, 3); 5567} 5568 5569// CHECK-LABEL: define <8 x i8> @test_vsra_n_u8(<8 x i8> %a, <8 x i8> %b) #0 { 5570// CHECK: [[VSRA_N:%.*]] = lshr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 5571// CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]] 5572// CHECK: ret <8 x i8> [[TMP0]] 5573int8x8_t test_vsra_n_u8(int8x8_t a, int8x8_t b) { 5574 return vsra_n_u8(a, b, 3); 5575} 5576 5577// CHECK-LABEL: define <4 x i16> @test_vsra_n_u16(<4 x i16> %a, <4 x i16> %b) #0 { 5578// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 5579// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 5580// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5581// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 5582// CHECK: [[VSRA_N:%.*]] = lshr <4 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3> 5583// CHECK: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]] 5584// CHECK: ret <4 x i16> [[TMP4]] 5585int16x4_t test_vsra_n_u16(int16x4_t a, int16x4_t b) { 5586 return vsra_n_u16(a, b, 3); 5587} 5588 5589// CHECK-LABEL: define <2 x i32> @test_vsra_n_u32(<2 x i32> %a, <2 x i32> %b) #0 { 5590// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 5591// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 5592// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5593// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 5594// CHECK: [[VSRA_N:%.*]] = lshr <2 x i32> [[TMP3]], <i32 3, i32 3> 5595// CHECK: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]] 5596// CHECK: ret <2 x i32> [[TMP4]] 5597int32x2_t test_vsra_n_u32(int32x2_t a, int32x2_t b) { 5598 return vsra_n_u32(a, b, 3); 5599} 5600 5601// CHECK-LABEL: define <16 x i8> @test_vsraq_n_u8(<16 x i8> %a, <16 x i8> %b) #0 { 5602// CHECK: [[VSRA_N:%.*]] = lshr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 5603// CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]] 5604// CHECK: ret <16 x i8> [[TMP0]] 5605int8x16_t test_vsraq_n_u8(int8x16_t a, int8x16_t b) { 5606 return vsraq_n_u8(a, b, 3); 5607} 5608 5609// CHECK-LABEL: define <8 x i16> @test_vsraq_n_u16(<8 x i16> %a, <8 x i16> %b) #0 { 5610// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5611// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 5612// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5613// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 5614// CHECK: [[VSRA_N:%.*]] = lshr <8 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 5615// CHECK: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]] 5616// CHECK: ret <8 x i16> [[TMP4]] 5617int16x8_t test_vsraq_n_u16(int16x8_t a, int16x8_t b) { 5618 return vsraq_n_u16(a, b, 3); 5619} 5620 5621// CHECK-LABEL: define <4 x i32> @test_vsraq_n_u32(<4 x i32> %a, <4 x i32> %b) #0 { 5622// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5623// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 5624// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5625// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 5626// CHECK: [[VSRA_N:%.*]] = lshr <4 x i32> [[TMP3]], <i32 3, i32 3, i32 3, i32 3> 5627// CHECK: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]] 5628// CHECK: ret <4 x i32> [[TMP4]] 5629int32x4_t test_vsraq_n_u32(int32x4_t a, int32x4_t b) { 5630 return vsraq_n_u32(a, b, 3); 5631} 5632 5633// CHECK-LABEL: define <2 x i64> @test_vsraq_n_u64(<2 x i64> %a, <2 x i64> %b) #0 { 5634// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 5635// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 5636// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 5637// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 5638// CHECK: [[VSRA_N:%.*]] = lshr <2 x i64> [[TMP3]], <i64 3, i64 3> 5639// CHECK: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]] 5640// CHECK: ret <2 x i64> [[TMP4]] 5641int64x2_t test_vsraq_n_u64(int64x2_t a, int64x2_t b) { 5642 return vsraq_n_u64(a, b, 3); 5643} 5644 5645// CHECK-LABEL: define <8 x i8> @test_vrshr_n_s8(<8 x i8> %a) #0 { 5646// CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %a, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>) 5647// CHECK: ret <8 x i8> [[VRSHR_N]] 5648int8x8_t test_vrshr_n_s8(int8x8_t a) { 5649 return vrshr_n_s8(a, 3); 5650} 5651 5652// CHECK-LABEL: define <4 x i16> @test_vrshr_n_s16(<4 x i16> %a) #0 { 5653// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 5654// CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5655// CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>) 5656// CHECK: ret <4 x i16> [[VRSHR_N1]] 5657int16x4_t test_vrshr_n_s16(int16x4_t a) { 5658 return vrshr_n_s16(a, 3); 5659} 5660 5661// CHECK-LABEL: define <2 x i32> @test_vrshr_n_s32(<2 x i32> %a) #0 { 5662// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 5663// CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5664// CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>) 5665// CHECK: ret <2 x i32> [[VRSHR_N1]] 5666int32x2_t test_vrshr_n_s32(int32x2_t a) { 5667 return vrshr_n_s32(a, 3); 5668} 5669 5670// CHECK-LABEL: define <16 x i8> @test_vrshrq_n_s8(<16 x i8> %a) #0 { 5671// CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %a, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>) 5672// CHECK: ret <16 x i8> [[VRSHR_N]] 5673int8x16_t test_vrshrq_n_s8(int8x16_t a) { 5674 return vrshrq_n_s8(a, 3); 5675} 5676 5677// CHECK-LABEL: define <8 x i16> @test_vrshrq_n_s16(<8 x i16> %a) #0 { 5678// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5679// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5680// CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>) 5681// CHECK: ret <8 x i16> [[VRSHR_N1]] 5682int16x8_t test_vrshrq_n_s16(int16x8_t a) { 5683 return vrshrq_n_s16(a, 3); 5684} 5685 5686// CHECK-LABEL: define <4 x i32> @test_vrshrq_n_s32(<4 x i32> %a) #0 { 5687// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5688// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5689// CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>) 5690// CHECK: ret <4 x i32> [[VRSHR_N1]] 5691int32x4_t test_vrshrq_n_s32(int32x4_t a) { 5692 return vrshrq_n_s32(a, 3); 5693} 5694 5695// CHECK-LABEL: define <2 x i64> @test_vrshrq_n_s64(<2 x i64> %a) #0 { 5696// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 5697// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 5698// CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>) 5699// CHECK: ret <2 x i64> [[VRSHR_N1]] 5700int64x2_t test_vrshrq_n_s64(int64x2_t a) { 5701 return vrshrq_n_s64(a, 3); 5702} 5703 5704// CHECK-LABEL: define <8 x i8> @test_vrshr_n_u8(<8 x i8> %a) #0 { 5705// CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %a, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>) 5706// CHECK: ret <8 x i8> [[VRSHR_N]] 5707int8x8_t test_vrshr_n_u8(int8x8_t a) { 5708 return vrshr_n_u8(a, 3); 5709} 5710 5711// CHECK-LABEL: define <4 x i16> @test_vrshr_n_u16(<4 x i16> %a) #0 { 5712// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 5713// CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5714// CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>) 5715// CHECK: ret <4 x i16> [[VRSHR_N1]] 5716int16x4_t test_vrshr_n_u16(int16x4_t a) { 5717 return vrshr_n_u16(a, 3); 5718} 5719 5720// CHECK-LABEL: define <2 x i32> @test_vrshr_n_u32(<2 x i32> %a) #0 { 5721// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 5722// CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5723// CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>) 5724// CHECK: ret <2 x i32> [[VRSHR_N1]] 5725int32x2_t test_vrshr_n_u32(int32x2_t a) { 5726 return vrshr_n_u32(a, 3); 5727} 5728 5729// CHECK-LABEL: define <16 x i8> @test_vrshrq_n_u8(<16 x i8> %a) #0 { 5730// CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %a, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>) 5731// CHECK: ret <16 x i8> [[VRSHR_N]] 5732int8x16_t test_vrshrq_n_u8(int8x16_t a) { 5733 return vrshrq_n_u8(a, 3); 5734} 5735 5736// CHECK-LABEL: define <8 x i16> @test_vrshrq_n_u16(<8 x i16> %a) #0 { 5737// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5738// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5739// CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>) 5740// CHECK: ret <8 x i16> [[VRSHR_N1]] 5741int16x8_t test_vrshrq_n_u16(int16x8_t a) { 5742 return vrshrq_n_u16(a, 3); 5743} 5744 5745// CHECK-LABEL: define <4 x i32> @test_vrshrq_n_u32(<4 x i32> %a) #0 { 5746// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5747// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5748// CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>) 5749// CHECK: ret <4 x i32> [[VRSHR_N1]] 5750int32x4_t test_vrshrq_n_u32(int32x4_t a) { 5751 return vrshrq_n_u32(a, 3); 5752} 5753 5754// CHECK-LABEL: define <2 x i64> @test_vrshrq_n_u64(<2 x i64> %a) #0 { 5755// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 5756// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 5757// CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>) 5758// CHECK: ret <2 x i64> [[VRSHR_N1]] 5759int64x2_t test_vrshrq_n_u64(int64x2_t a) { 5760 return vrshrq_n_u64(a, 3); 5761} 5762 5763// CHECK-LABEL: define <8 x i8> @test_vrsra_n_s8(<8 x i8> %a, <8 x i8> %b) #0 { 5764// CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %b, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>) 5765// CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VRSHR_N]] 5766// CHECK: ret <8 x i8> [[TMP0]] 5767int8x8_t test_vrsra_n_s8(int8x8_t a, int8x8_t b) { 5768 return vrsra_n_s8(a, b, 3); 5769} 5770 5771// CHECK-LABEL: define <4 x i16> @test_vrsra_n_s16(<4 x i16> %a, <4 x i16> %b) #0 { 5772// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 5773// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 5774// CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 5775// CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>) 5776// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5777// CHECK: [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]] 5778// CHECK: ret <4 x i16> [[TMP3]] 5779int16x4_t test_vrsra_n_s16(int16x4_t a, int16x4_t b) { 5780 return vrsra_n_s16(a, b, 3); 5781} 5782 5783// CHECK-LABEL: define <2 x i32> @test_vrsra_n_s32(<2 x i32> %a, <2 x i32> %b) #0 { 5784// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 5785// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 5786// CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 5787// CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>) 5788// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5789// CHECK: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]] 5790// CHECK: ret <2 x i32> [[TMP3]] 5791int32x2_t test_vrsra_n_s32(int32x2_t a, int32x2_t b) { 5792 return vrsra_n_s32(a, b, 3); 5793} 5794 5795// CHECK-LABEL: define <16 x i8> @test_vrsraq_n_s8(<16 x i8> %a, <16 x i8> %b) #0 { 5796// CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %b, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>) 5797// CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VRSHR_N]] 5798// CHECK: ret <16 x i8> [[TMP0]] 5799int8x16_t test_vrsraq_n_s8(int8x16_t a, int8x16_t b) { 5800 return vrsraq_n_s8(a, b, 3); 5801} 5802 5803// CHECK-LABEL: define <8 x i16> @test_vrsraq_n_s16(<8 x i16> %a, <8 x i16> %b) #0 { 5804// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5805// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 5806// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 5807// CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>) 5808// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5809// CHECK: [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]] 5810// CHECK: ret <8 x i16> [[TMP3]] 5811int16x8_t test_vrsraq_n_s16(int16x8_t a, int16x8_t b) { 5812 return vrsraq_n_s16(a, b, 3); 5813} 5814 5815// CHECK-LABEL: define <4 x i32> @test_vrsraq_n_s32(<4 x i32> %a, <4 x i32> %b) #0 { 5816// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5817// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 5818// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 5819// CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>) 5820// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5821// CHECK: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]] 5822// CHECK: ret <4 x i32> [[TMP3]] 5823int32x4_t test_vrsraq_n_s32(int32x4_t a, int32x4_t b) { 5824 return vrsraq_n_s32(a, b, 3); 5825} 5826 5827// CHECK-LABEL: define <2 x i64> @test_vrsraq_n_s64(<2 x i64> %a, <2 x i64> %b) #0 { 5828// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 5829// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 5830// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 5831// CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>) 5832// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 5833// CHECK: [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]] 5834// CHECK: ret <2 x i64> [[TMP3]] 5835int64x2_t test_vrsraq_n_s64(int64x2_t a, int64x2_t b) { 5836 return vrsraq_n_s64(a, b, 3); 5837} 5838 5839// CHECK-LABEL: define <8 x i8> @test_vrsra_n_u8(<8 x i8> %a, <8 x i8> %b) #0 { 5840// CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %b, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>) 5841// CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VRSHR_N]] 5842// CHECK: ret <8 x i8> [[TMP0]] 5843int8x8_t test_vrsra_n_u8(int8x8_t a, int8x8_t b) { 5844 return vrsra_n_u8(a, b, 3); 5845} 5846 5847// CHECK-LABEL: define <4 x i16> @test_vrsra_n_u16(<4 x i16> %a, <4 x i16> %b) #0 { 5848// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 5849// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 5850// CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 5851// CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>) 5852// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5853// CHECK: [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]] 5854// CHECK: ret <4 x i16> [[TMP3]] 5855int16x4_t test_vrsra_n_u16(int16x4_t a, int16x4_t b) { 5856 return vrsra_n_u16(a, b, 3); 5857} 5858 5859// CHECK-LABEL: define <2 x i32> @test_vrsra_n_u32(<2 x i32> %a, <2 x i32> %b) #0 { 5860// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 5861// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 5862// CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 5863// CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>) 5864// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5865// CHECK: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]] 5866// CHECK: ret <2 x i32> [[TMP3]] 5867int32x2_t test_vrsra_n_u32(int32x2_t a, int32x2_t b) { 5868 return vrsra_n_u32(a, b, 3); 5869} 5870 5871// CHECK-LABEL: define <16 x i8> @test_vrsraq_n_u8(<16 x i8> %a, <16 x i8> %b) #0 { 5872// CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %b, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>) 5873// CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VRSHR_N]] 5874// CHECK: ret <16 x i8> [[TMP0]] 5875int8x16_t test_vrsraq_n_u8(int8x16_t a, int8x16_t b) { 5876 return vrsraq_n_u8(a, b, 3); 5877} 5878 5879// CHECK-LABEL: define <8 x i16> @test_vrsraq_n_u16(<8 x i16> %a, <8 x i16> %b) #0 { 5880// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5881// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 5882// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 5883// CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>) 5884// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5885// CHECK: [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]] 5886// CHECK: ret <8 x i16> [[TMP3]] 5887int16x8_t test_vrsraq_n_u16(int16x8_t a, int16x8_t b) { 5888 return vrsraq_n_u16(a, b, 3); 5889} 5890 5891// CHECK-LABEL: define <4 x i32> @test_vrsraq_n_u32(<4 x i32> %a, <4 x i32> %b) #0 { 5892// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5893// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 5894// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 5895// CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>) 5896// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5897// CHECK: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]] 5898// CHECK: ret <4 x i32> [[TMP3]] 5899int32x4_t test_vrsraq_n_u32(int32x4_t a, int32x4_t b) { 5900 return vrsraq_n_u32(a, b, 3); 5901} 5902 5903// CHECK-LABEL: define <2 x i64> @test_vrsraq_n_u64(<2 x i64> %a, <2 x i64> %b) #0 { 5904// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 5905// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 5906// CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 5907// CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>) 5908// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 5909// CHECK: [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]] 5910// CHECK: ret <2 x i64> [[TMP3]] 5911int64x2_t test_vrsraq_n_u64(int64x2_t a, int64x2_t b) { 5912 return vrsraq_n_u64(a, b, 3); 5913} 5914 5915// CHECK-LABEL: define <8 x i8> @test_vsri_n_s8(<8 x i8> %a, <8 x i8> %b) #0 { 5916// CHECK: [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3) 5917// CHECK: ret <8 x i8> [[VSRI_N]] 5918int8x8_t test_vsri_n_s8(int8x8_t a, int8x8_t b) { 5919 return vsri_n_s8(a, b, 3); 5920} 5921 5922// CHECK-LABEL: define <4 x i16> @test_vsri_n_s16(<4 x i16> %a, <4 x i16> %b) #0 { 5923// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 5924// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 5925// CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5926// CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 5927// CHECK: [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 3) 5928// CHECK: ret <4 x i16> [[VSRI_N2]] 5929int16x4_t test_vsri_n_s16(int16x4_t a, int16x4_t b) { 5930 return vsri_n_s16(a, b, 3); 5931} 5932 5933// CHECK-LABEL: define <2 x i32> @test_vsri_n_s32(<2 x i32> %a, <2 x i32> %b) #0 { 5934// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 5935// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 5936// CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5937// CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 5938// CHECK: [[VSRI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> [[VSRI_N]], <2 x i32> [[VSRI_N1]], i32 3) 5939// CHECK: ret <2 x i32> [[VSRI_N2]] 5940int32x2_t test_vsri_n_s32(int32x2_t a, int32x2_t b) { 5941 return vsri_n_s32(a, b, 3); 5942} 5943 5944// CHECK-LABEL: define <16 x i8> @test_vsriq_n_s8(<16 x i8> %a, <16 x i8> %b) #0 { 5945// CHECK: [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3) 5946// CHECK: ret <16 x i8> [[VSRI_N]] 5947int8x16_t test_vsriq_n_s8(int8x16_t a, int8x16_t b) { 5948 return vsriq_n_s8(a, b, 3); 5949} 5950 5951// CHECK-LABEL: define <8 x i16> @test_vsriq_n_s16(<8 x i16> %a, <8 x i16> %b) #0 { 5952// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5953// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 5954// CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5955// CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 5956// CHECK: [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 3) 5957// CHECK: ret <8 x i16> [[VSRI_N2]] 5958int16x8_t test_vsriq_n_s16(int16x8_t a, int16x8_t b) { 5959 return vsriq_n_s16(a, b, 3); 5960} 5961 5962// CHECK-LABEL: define <4 x i32> @test_vsriq_n_s32(<4 x i32> %a, <4 x i32> %b) #0 { 5963// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5964// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 5965// CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5966// CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 5967// CHECK: [[VSRI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> [[VSRI_N]], <4 x i32> [[VSRI_N1]], i32 3) 5968// CHECK: ret <4 x i32> [[VSRI_N2]] 5969int32x4_t test_vsriq_n_s32(int32x4_t a, int32x4_t b) { 5970 return vsriq_n_s32(a, b, 3); 5971} 5972 5973// CHECK-LABEL: define <2 x i64> @test_vsriq_n_s64(<2 x i64> %a, <2 x i64> %b) #0 { 5974// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 5975// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 5976// CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 5977// CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 5978// CHECK: [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 3) 5979// CHECK: ret <2 x i64> [[VSRI_N2]] 5980int64x2_t test_vsriq_n_s64(int64x2_t a, int64x2_t b) { 5981 return vsriq_n_s64(a, b, 3); 5982} 5983 5984// CHECK-LABEL: define <8 x i8> @test_vsri_n_u8(<8 x i8> %a, <8 x i8> %b) #0 { 5985// CHECK: [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3) 5986// CHECK: ret <8 x i8> [[VSRI_N]] 5987int8x8_t test_vsri_n_u8(int8x8_t a, int8x8_t b) { 5988 return vsri_n_u8(a, b, 3); 5989} 5990 5991// CHECK-LABEL: define <4 x i16> @test_vsri_n_u16(<4 x i16> %a, <4 x i16> %b) #0 { 5992// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 5993// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 5994// CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5995// CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 5996// CHECK: [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 3) 5997// CHECK: ret <4 x i16> [[VSRI_N2]] 5998int16x4_t test_vsri_n_u16(int16x4_t a, int16x4_t b) { 5999 return vsri_n_u16(a, b, 3); 6000} 6001 6002// CHECK-LABEL: define <2 x i32> @test_vsri_n_u32(<2 x i32> %a, <2 x i32> %b) #0 { 6003// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 6004// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 6005// CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 6006// CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 6007// CHECK: [[VSRI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> [[VSRI_N]], <2 x i32> [[VSRI_N1]], i32 3) 6008// CHECK: ret <2 x i32> [[VSRI_N2]] 6009int32x2_t test_vsri_n_u32(int32x2_t a, int32x2_t b) { 6010 return vsri_n_u32(a, b, 3); 6011} 6012 6013// CHECK-LABEL: define <16 x i8> @test_vsriq_n_u8(<16 x i8> %a, <16 x i8> %b) #0 { 6014// CHECK: [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3) 6015// CHECK: ret <16 x i8> [[VSRI_N]] 6016int8x16_t test_vsriq_n_u8(int8x16_t a, int8x16_t b) { 6017 return vsriq_n_u8(a, b, 3); 6018} 6019 6020// CHECK-LABEL: define <8 x i16> @test_vsriq_n_u16(<8 x i16> %a, <8 x i16> %b) #0 { 6021// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 6022// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 6023// CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6024// CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 6025// CHECK: [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 3) 6026// CHECK: ret <8 x i16> [[VSRI_N2]] 6027int16x8_t test_vsriq_n_u16(int16x8_t a, int16x8_t b) { 6028 return vsriq_n_u16(a, b, 3); 6029} 6030 6031// CHECK-LABEL: define <4 x i32> @test_vsriq_n_u32(<4 x i32> %a, <4 x i32> %b) #0 { 6032// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 6033// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 6034// CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6035// CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 6036// CHECK: [[VSRI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> [[VSRI_N]], <4 x i32> [[VSRI_N1]], i32 3) 6037// CHECK: ret <4 x i32> [[VSRI_N2]] 6038int32x4_t test_vsriq_n_u32(int32x4_t a, int32x4_t b) { 6039 return vsriq_n_u32(a, b, 3); 6040} 6041 6042// CHECK-LABEL: define <2 x i64> @test_vsriq_n_u64(<2 x i64> %a, <2 x i64> %b) #0 { 6043// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 6044// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 6045// CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6046// CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 6047// CHECK: [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 3) 6048// CHECK: ret <2 x i64> [[VSRI_N2]] 6049int64x2_t test_vsriq_n_u64(int64x2_t a, int64x2_t b) { 6050 return vsriq_n_u64(a, b, 3); 6051} 6052 6053// CHECK-LABEL: define <8 x i8> @test_vsri_n_p8(<8 x i8> %a, <8 x i8> %b) #0 { 6054// CHECK: [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3) 6055// CHECK: ret <8 x i8> [[VSRI_N]] 6056poly8x8_t test_vsri_n_p8(poly8x8_t a, poly8x8_t b) { 6057 return vsri_n_p8(a, b, 3); 6058} 6059 6060// CHECK-LABEL: define <4 x i16> @test_vsri_n_p16(<4 x i16> %a, <4 x i16> %b) #0 { 6061// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 6062// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 6063// CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 6064// CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 6065// CHECK: [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 15) 6066// CHECK: ret <4 x i16> [[VSRI_N2]] 6067poly16x4_t test_vsri_n_p16(poly16x4_t a, poly16x4_t b) { 6068 return vsri_n_p16(a, b, 15); 6069} 6070 6071// CHECK-LABEL: define <16 x i8> @test_vsriq_n_p8(<16 x i8> %a, <16 x i8> %b) #0 { 6072// CHECK: [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3) 6073// CHECK: ret <16 x i8> [[VSRI_N]] 6074poly8x16_t test_vsriq_n_p8(poly8x16_t a, poly8x16_t b) { 6075 return vsriq_n_p8(a, b, 3); 6076} 6077 6078// CHECK-LABEL: define <8 x i16> @test_vsriq_n_p16(<8 x i16> %a, <8 x i16> %b) #0 { 6079// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 6080// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 6081// CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6082// CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 6083// CHECK: [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 15) 6084// CHECK: ret <8 x i16> [[VSRI_N2]] 6085poly16x8_t test_vsriq_n_p16(poly16x8_t a, poly16x8_t b) { 6086 return vsriq_n_p16(a, b, 15); 6087} 6088 6089// CHECK-LABEL: define <8 x i8> @test_vsli_n_s8(<8 x i8> %a, <8 x i8> %b) #0 { 6090// CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3) 6091// CHECK: ret <8 x i8> [[VSLI_N]] 6092int8x8_t test_vsli_n_s8(int8x8_t a, int8x8_t b) { 6093 return vsli_n_s8(a, b, 3); 6094} 6095 6096// CHECK-LABEL: define <4 x i16> @test_vsli_n_s16(<4 x i16> %a, <4 x i16> %b) #0 { 6097// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 6098// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 6099// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 6100// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 6101// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 3) 6102// CHECK: ret <4 x i16> [[VSLI_N2]] 6103int16x4_t test_vsli_n_s16(int16x4_t a, int16x4_t b) { 6104 return vsli_n_s16(a, b, 3); 6105} 6106 6107// CHECK-LABEL: define <2 x i32> @test_vsli_n_s32(<2 x i32> %a, <2 x i32> %b) #0 { 6108// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 6109// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 6110// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 6111// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 6112// CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], i32 3) 6113// CHECK: ret <2 x i32> [[VSLI_N2]] 6114int32x2_t test_vsli_n_s32(int32x2_t a, int32x2_t b) { 6115 return vsli_n_s32(a, b, 3); 6116} 6117 6118// CHECK-LABEL: define <16 x i8> @test_vsliq_n_s8(<16 x i8> %a, <16 x i8> %b) #0 { 6119// CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3) 6120// CHECK: ret <16 x i8> [[VSLI_N]] 6121int8x16_t test_vsliq_n_s8(int8x16_t a, int8x16_t b) { 6122 return vsliq_n_s8(a, b, 3); 6123} 6124 6125// CHECK-LABEL: define <8 x i16> @test_vsliq_n_s16(<8 x i16> %a, <8 x i16> %b) #0 { 6126// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 6127// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 6128// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6129// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 6130// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 3) 6131// CHECK: ret <8 x i16> [[VSLI_N2]] 6132int16x8_t test_vsliq_n_s16(int16x8_t a, int16x8_t b) { 6133 return vsliq_n_s16(a, b, 3); 6134} 6135 6136// CHECK-LABEL: define <4 x i32> @test_vsliq_n_s32(<4 x i32> %a, <4 x i32> %b) #0 { 6137// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 6138// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 6139// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6140// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 6141// CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], i32 3) 6142// CHECK: ret <4 x i32> [[VSLI_N2]] 6143int32x4_t test_vsliq_n_s32(int32x4_t a, int32x4_t b) { 6144 return vsliq_n_s32(a, b, 3); 6145} 6146 6147// CHECK-LABEL: define <2 x i64> @test_vsliq_n_s64(<2 x i64> %a, <2 x i64> %b) #0 { 6148// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 6149// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 6150// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6151// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 6152// CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 3) 6153// CHECK: ret <2 x i64> [[VSLI_N2]] 6154int64x2_t test_vsliq_n_s64(int64x2_t a, int64x2_t b) { 6155 return vsliq_n_s64(a, b, 3); 6156} 6157 6158// CHECK-LABEL: define <8 x i8> @test_vsli_n_u8(<8 x i8> %a, <8 x i8> %b) #0 { 6159// CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3) 6160// CHECK: ret <8 x i8> [[VSLI_N]] 6161uint8x8_t test_vsli_n_u8(uint8x8_t a, uint8x8_t b) { 6162 return vsli_n_u8(a, b, 3); 6163} 6164 6165// CHECK-LABEL: define <4 x i16> @test_vsli_n_u16(<4 x i16> %a, <4 x i16> %b) #0 { 6166// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 6167// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 6168// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 6169// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 6170// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 3) 6171// CHECK: ret <4 x i16> [[VSLI_N2]] 6172uint16x4_t test_vsli_n_u16(uint16x4_t a, uint16x4_t b) { 6173 return vsli_n_u16(a, b, 3); 6174} 6175 6176// CHECK-LABEL: define <2 x i32> @test_vsli_n_u32(<2 x i32> %a, <2 x i32> %b) #0 { 6177// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 6178// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 6179// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 6180// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 6181// CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], i32 3) 6182// CHECK: ret <2 x i32> [[VSLI_N2]] 6183uint32x2_t test_vsli_n_u32(uint32x2_t a, uint32x2_t b) { 6184 return vsli_n_u32(a, b, 3); 6185} 6186 6187// CHECK-LABEL: define <16 x i8> @test_vsliq_n_u8(<16 x i8> %a, <16 x i8> %b) #0 { 6188// CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3) 6189// CHECK: ret <16 x i8> [[VSLI_N]] 6190uint8x16_t test_vsliq_n_u8(uint8x16_t a, uint8x16_t b) { 6191 return vsliq_n_u8(a, b, 3); 6192} 6193 6194// CHECK-LABEL: define <8 x i16> @test_vsliq_n_u16(<8 x i16> %a, <8 x i16> %b) #0 { 6195// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 6196// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 6197// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6198// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 6199// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 3) 6200// CHECK: ret <8 x i16> [[VSLI_N2]] 6201uint16x8_t test_vsliq_n_u16(uint16x8_t a, uint16x8_t b) { 6202 return vsliq_n_u16(a, b, 3); 6203} 6204 6205// CHECK-LABEL: define <4 x i32> @test_vsliq_n_u32(<4 x i32> %a, <4 x i32> %b) #0 { 6206// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 6207// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 6208// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6209// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 6210// CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], i32 3) 6211// CHECK: ret <4 x i32> [[VSLI_N2]] 6212uint32x4_t test_vsliq_n_u32(uint32x4_t a, uint32x4_t b) { 6213 return vsliq_n_u32(a, b, 3); 6214} 6215 6216// CHECK-LABEL: define <2 x i64> @test_vsliq_n_u64(<2 x i64> %a, <2 x i64> %b) #0 { 6217// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 6218// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 6219// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6220// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 6221// CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 3) 6222// CHECK: ret <2 x i64> [[VSLI_N2]] 6223uint64x2_t test_vsliq_n_u64(uint64x2_t a, uint64x2_t b) { 6224 return vsliq_n_u64(a, b, 3); 6225} 6226 6227// CHECK-LABEL: define <8 x i8> @test_vsli_n_p8(<8 x i8> %a, <8 x i8> %b) #0 { 6228// CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3) 6229// CHECK: ret <8 x i8> [[VSLI_N]] 6230poly8x8_t test_vsli_n_p8(poly8x8_t a, poly8x8_t b) { 6231 return vsli_n_p8(a, b, 3); 6232} 6233 6234// CHECK-LABEL: define <4 x i16> @test_vsli_n_p16(<4 x i16> %a, <4 x i16> %b) #0 { 6235// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 6236// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 6237// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 6238// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 6239// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 15) 6240// CHECK: ret <4 x i16> [[VSLI_N2]] 6241poly16x4_t test_vsli_n_p16(poly16x4_t a, poly16x4_t b) { 6242 return vsli_n_p16(a, b, 15); 6243} 6244 6245// CHECK-LABEL: define <16 x i8> @test_vsliq_n_p8(<16 x i8> %a, <16 x i8> %b) #0 { 6246// CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3) 6247// CHECK: ret <16 x i8> [[VSLI_N]] 6248poly8x16_t test_vsliq_n_p8(poly8x16_t a, poly8x16_t b) { 6249 return vsliq_n_p8(a, b, 3); 6250} 6251 6252// CHECK-LABEL: define <8 x i16> @test_vsliq_n_p16(<8 x i16> %a, <8 x i16> %b) #0 { 6253// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 6254// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 6255// CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6256// CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 6257// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 15) 6258// CHECK: ret <8 x i16> [[VSLI_N2]] 6259poly16x8_t test_vsliq_n_p16(poly16x8_t a, poly16x8_t b) { 6260 return vsliq_n_p16(a, b, 15); 6261} 6262 6263// CHECK-LABEL: define <8 x i8> @test_vqshlu_n_s8(<8 x i8> %a) #0 { 6264// CHECK: [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %a, <8 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>) 6265// CHECK: ret <8 x i8> [[VQSHLU_N]] 6266int8x8_t test_vqshlu_n_s8(int8x8_t a) { 6267 return vqshlu_n_s8(a, 3); 6268} 6269 6270// CHECK-LABEL: define <4 x i16> @test_vqshlu_n_s16(<4 x i16> %a) #0 { 6271// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 6272// CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 6273// CHECK: [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> <i16 3, i16 3, i16 3, i16 3>) 6274// CHECK: ret <4 x i16> [[VQSHLU_N1]] 6275int16x4_t test_vqshlu_n_s16(int16x4_t a) { 6276 return vqshlu_n_s16(a, 3); 6277} 6278 6279// CHECK-LABEL: define <2 x i32> @test_vqshlu_n_s32(<2 x i32> %a) #0 { 6280// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 6281// CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 6282// CHECK: [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> <i32 3, i32 3>) 6283// CHECK: ret <2 x i32> [[VQSHLU_N1]] 6284int32x2_t test_vqshlu_n_s32(int32x2_t a) { 6285 return vqshlu_n_s32(a, 3); 6286} 6287 6288// CHECK-LABEL: define <16 x i8> @test_vqshluq_n_s8(<16 x i8> %a) #0 { 6289// CHECK: [[VQSHLU_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %a, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>) 6290// CHECK: ret <16 x i8> [[VQSHLU_N]] 6291int8x16_t test_vqshluq_n_s8(int8x16_t a) { 6292 return vqshluq_n_s8(a, 3); 6293} 6294 6295// CHECK-LABEL: define <8 x i16> @test_vqshluq_n_s16(<8 x i16> %a) #0 { 6296// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 6297// CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6298// CHECK: [[VQSHLU_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[VQSHLU_N]], <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>) 6299// CHECK: ret <8 x i16> [[VQSHLU_N1]] 6300int16x8_t test_vqshluq_n_s16(int16x8_t a) { 6301 return vqshluq_n_s16(a, 3); 6302} 6303 6304// CHECK-LABEL: define <4 x i32> @test_vqshluq_n_s32(<4 x i32> %a) #0 { 6305// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 6306// CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6307// CHECK: [[VQSHLU_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[VQSHLU_N]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>) 6308// CHECK: ret <4 x i32> [[VQSHLU_N1]] 6309int32x4_t test_vqshluq_n_s32(int32x4_t a) { 6310 return vqshluq_n_s32(a, 3); 6311} 6312 6313// CHECK-LABEL: define <2 x i64> @test_vqshluq_n_s64(<2 x i64> %a) #0 { 6314// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 6315// CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6316// CHECK: [[VQSHLU_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[VQSHLU_N]], <2 x i64> <i64 3, i64 3>) 6317// CHECK: ret <2 x i64> [[VQSHLU_N1]] 6318int64x2_t test_vqshluq_n_s64(int64x2_t a) { 6319 return vqshluq_n_s64(a, 3); 6320} 6321 6322// CHECK-LABEL: define <8 x i8> @test_vshrn_n_s16(<8 x i16> %a) #0 { 6323// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 6324// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6325// CHECK: [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 6326// CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8> 6327// CHECK: ret <8 x i8> [[VSHRN_N]] 6328int8x8_t test_vshrn_n_s16(int16x8_t a) { 6329 return vshrn_n_s16(a, 3); 6330} 6331 6332// CHECK-LABEL: define <4 x i16> @test_vshrn_n_s32(<4 x i32> %a) #0 { 6333// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 6334// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6335// CHECK: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9> 6336// CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16> 6337// CHECK: ret <4 x i16> [[VSHRN_N]] 6338int16x4_t test_vshrn_n_s32(int32x4_t a) { 6339 return vshrn_n_s32(a, 9); 6340} 6341 6342// CHECK-LABEL: define <2 x i32> @test_vshrn_n_s64(<2 x i64> %a) #0 { 6343// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 6344// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6345// CHECK: [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], <i64 19, i64 19> 6346// CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32> 6347// CHECK: ret <2 x i32> [[VSHRN_N]] 6348int32x2_t test_vshrn_n_s64(int64x2_t a) { 6349 return vshrn_n_s64(a, 19); 6350} 6351 6352// CHECK-LABEL: define <8 x i8> @test_vshrn_n_u16(<8 x i16> %a) #0 { 6353// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 6354// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6355// CHECK: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 6356// CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8> 6357// CHECK: ret <8 x i8> [[VSHRN_N]] 6358uint8x8_t test_vshrn_n_u16(uint16x8_t a) { 6359 return vshrn_n_u16(a, 3); 6360} 6361 6362// CHECK-LABEL: define <4 x i16> @test_vshrn_n_u32(<4 x i32> %a) #0 { 6363// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 6364// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6365// CHECK: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9> 6366// CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16> 6367// CHECK: ret <4 x i16> [[VSHRN_N]] 6368uint16x4_t test_vshrn_n_u32(uint32x4_t a) { 6369 return vshrn_n_u32(a, 9); 6370} 6371 6372// CHECK-LABEL: define <2 x i32> @test_vshrn_n_u64(<2 x i64> %a) #0 { 6373// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 6374// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6375// CHECK: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], <i64 19, i64 19> 6376// CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32> 6377// CHECK: ret <2 x i32> [[VSHRN_N]] 6378uint32x2_t test_vshrn_n_u64(uint64x2_t a) { 6379 return vshrn_n_u64(a, 19); 6380} 6381 6382// CHECK-LABEL: define <16 x i8> @test_vshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) #0 { 6383// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> 6384// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6385// CHECK: [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 6386// CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8> 6387// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VSHRN_N]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6388// CHECK: ret <16 x i8> [[SHUFFLE_I]] 6389int8x16_t test_vshrn_high_n_s16(int8x8_t a, int16x8_t b) { 6390 return vshrn_high_n_s16(a, b, 3); 6391} 6392 6393// CHECK-LABEL: define <8 x i16> @test_vshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) #0 { 6394// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> 6395// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6396// CHECK: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9> 6397// CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16> 6398// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VSHRN_N]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 6399// CHECK: ret <8 x i16> [[SHUFFLE_I]] 6400int16x8_t test_vshrn_high_n_s32(int16x4_t a, int32x4_t b) { 6401 return vshrn_high_n_s32(a, b, 9); 6402} 6403 6404// CHECK-LABEL: define <4 x i32> @test_vshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) #0 { 6405// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> 6406// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6407// CHECK: [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], <i64 19, i64 19> 6408// CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32> 6409// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VSHRN_N]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6410// CHECK: ret <4 x i32> [[SHUFFLE_I]] 6411int32x4_t test_vshrn_high_n_s64(int32x2_t a, int64x2_t b) { 6412 return vshrn_high_n_s64(a, b, 19); 6413} 6414 6415// CHECK-LABEL: define <16 x i8> @test_vshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) #0 { 6416// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> 6417// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6418// CHECK: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 6419// CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8> 6420// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VSHRN_N]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6421// CHECK: ret <16 x i8> [[SHUFFLE_I]] 6422uint8x16_t test_vshrn_high_n_u16(uint8x8_t a, uint16x8_t b) { 6423 return vshrn_high_n_u16(a, b, 3); 6424} 6425 6426// CHECK-LABEL: define <8 x i16> @test_vshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) #0 { 6427// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> 6428// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6429// CHECK: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9> 6430// CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16> 6431// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VSHRN_N]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 6432// CHECK: ret <8 x i16> [[SHUFFLE_I]] 6433uint16x8_t test_vshrn_high_n_u32(uint16x4_t a, uint32x4_t b) { 6434 return vshrn_high_n_u32(a, b, 9); 6435} 6436 6437// CHECK-LABEL: define <4 x i32> @test_vshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) #0 { 6438// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> 6439// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6440// CHECK: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], <i64 19, i64 19> 6441// CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32> 6442// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VSHRN_N]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6443// CHECK: ret <4 x i32> [[SHUFFLE_I]] 6444uint32x4_t test_vshrn_high_n_u64(uint32x2_t a, uint64x2_t b) { 6445 return vshrn_high_n_u64(a, b, 19); 6446} 6447 6448// CHECK-LABEL: define <8 x i8> @test_vqshrun_n_s16(<8 x i16> %a) #0 { 6449// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 6450// CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6451// CHECK: [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[VQSHRUN_N]], i32 3) 6452// CHECK: ret <8 x i8> [[VQSHRUN_N1]] 6453int8x8_t test_vqshrun_n_s16(int16x8_t a) { 6454 return vqshrun_n_s16(a, 3); 6455} 6456 6457// CHECK-LABEL: define <4 x i16> @test_vqshrun_n_s32(<4 x i32> %a) #0 { 6458// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 6459// CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6460// CHECK: [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[VQSHRUN_N]], i32 9) 6461// CHECK: ret <4 x i16> [[VQSHRUN_N1]] 6462int16x4_t test_vqshrun_n_s32(int32x4_t a) { 6463 return vqshrun_n_s32(a, 9); 6464} 6465 6466// CHECK-LABEL: define <2 x i32> @test_vqshrun_n_s64(<2 x i64> %a) #0 { 6467// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 6468// CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6469// CHECK: [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[VQSHRUN_N]], i32 19) 6470// CHECK: ret <2 x i32> [[VQSHRUN_N1]] 6471int32x2_t test_vqshrun_n_s64(int64x2_t a) { 6472 return vqshrun_n_s64(a, 19); 6473} 6474 6475// CHECK-LABEL: define <16 x i8> @test_vqshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) #0 { 6476// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> 6477// CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6478// CHECK: [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[VQSHRUN_N]], i32 3) 6479// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRUN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6480// CHECK: ret <16 x i8> [[SHUFFLE_I]] 6481int8x16_t test_vqshrun_high_n_s16(int8x8_t a, int16x8_t b) { 6482 return vqshrun_high_n_s16(a, b, 3); 6483} 6484 6485// CHECK-LABEL: define <8 x i16> @test_vqshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) #0 { 6486// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> 6487// CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6488// CHECK: [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[VQSHRUN_N]], i32 9) 6489// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRUN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 6490// CHECK: ret <8 x i16> [[SHUFFLE_I]] 6491int16x8_t test_vqshrun_high_n_s32(int16x4_t a, int32x4_t b) { 6492 return vqshrun_high_n_s32(a, b, 9); 6493} 6494 6495// CHECK-LABEL: define <4 x i32> @test_vqshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) #0 { 6496// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> 6497// CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6498// CHECK: [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[VQSHRUN_N]], i32 19) 6499// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRUN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6500// CHECK: ret <4 x i32> [[SHUFFLE_I]] 6501int32x4_t test_vqshrun_high_n_s64(int32x2_t a, int64x2_t b) { 6502 return vqshrun_high_n_s64(a, b, 19); 6503} 6504 6505// CHECK-LABEL: define <8 x i8> @test_vrshrn_n_s16(<8 x i16> %a) #0 { 6506// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 6507// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6508// CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3) 6509// CHECK: ret <8 x i8> [[VRSHRN_N1]] 6510int8x8_t test_vrshrn_n_s16(int16x8_t a) { 6511 return vrshrn_n_s16(a, 3); 6512} 6513 6514// CHECK-LABEL: define <4 x i16> @test_vrshrn_n_s32(<4 x i32> %a) #0 { 6515// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 6516// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6517// CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9) 6518// CHECK: ret <4 x i16> [[VRSHRN_N1]] 6519int16x4_t test_vrshrn_n_s32(int32x4_t a) { 6520 return vrshrn_n_s32(a, 9); 6521} 6522 6523// CHECK-LABEL: define <2 x i32> @test_vrshrn_n_s64(<2 x i64> %a) #0 { 6524// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 6525// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6526// CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19) 6527// CHECK: ret <2 x i32> [[VRSHRN_N1]] 6528int32x2_t test_vrshrn_n_s64(int64x2_t a) { 6529 return vrshrn_n_s64(a, 19); 6530} 6531 6532// CHECK-LABEL: define <8 x i8> @test_vrshrn_n_u16(<8 x i16> %a) #0 { 6533// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 6534// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6535// CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3) 6536// CHECK: ret <8 x i8> [[VRSHRN_N1]] 6537uint8x8_t test_vrshrn_n_u16(uint16x8_t a) { 6538 return vrshrn_n_u16(a, 3); 6539} 6540 6541// CHECK-LABEL: define <4 x i16> @test_vrshrn_n_u32(<4 x i32> %a) #0 { 6542// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 6543// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6544// CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9) 6545// CHECK: ret <4 x i16> [[VRSHRN_N1]] 6546uint16x4_t test_vrshrn_n_u32(uint32x4_t a) { 6547 return vrshrn_n_u32(a, 9); 6548} 6549 6550// CHECK-LABEL: define <2 x i32> @test_vrshrn_n_u64(<2 x i64> %a) #0 { 6551// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 6552// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6553// CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19) 6554// CHECK: ret <2 x i32> [[VRSHRN_N1]] 6555uint32x2_t test_vrshrn_n_u64(uint64x2_t a) { 6556 return vrshrn_n_u64(a, 19); 6557} 6558 6559// CHECK-LABEL: define <16 x i8> @test_vrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) #0 { 6560// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> 6561// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6562// CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3) 6563// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6564// CHECK: ret <16 x i8> [[SHUFFLE_I]] 6565int8x16_t test_vrshrn_high_n_s16(int8x8_t a, int16x8_t b) { 6566 return vrshrn_high_n_s16(a, b, 3); 6567} 6568 6569// CHECK-LABEL: define <8 x i16> @test_vrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) #0 { 6570// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> 6571// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6572// CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9) 6573// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 6574// CHECK: ret <8 x i16> [[SHUFFLE_I]] 6575int16x8_t test_vrshrn_high_n_s32(int16x4_t a, int32x4_t b) { 6576 return vrshrn_high_n_s32(a, b, 9); 6577} 6578 6579// CHECK-LABEL: define <4 x i32> @test_vrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) #0 { 6580// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> 6581// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6582// CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19) 6583// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6584// CHECK: ret <4 x i32> [[SHUFFLE_I]] 6585int32x4_t test_vrshrn_high_n_s64(int32x2_t a, int64x2_t b) { 6586 return vrshrn_high_n_s64(a, b, 19); 6587} 6588 6589// CHECK-LABEL: define <16 x i8> @test_vrshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) #0 { 6590// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> 6591// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6592// CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3) 6593// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6594// CHECK: ret <16 x i8> [[SHUFFLE_I]] 6595uint8x16_t test_vrshrn_high_n_u16(uint8x8_t a, uint16x8_t b) { 6596 return vrshrn_high_n_u16(a, b, 3); 6597} 6598 6599// CHECK-LABEL: define <8 x i16> @test_vrshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) #0 { 6600// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> 6601// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6602// CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9) 6603// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 6604// CHECK: ret <8 x i16> [[SHUFFLE_I]] 6605uint16x8_t test_vrshrn_high_n_u32(uint16x4_t a, uint32x4_t b) { 6606 return vrshrn_high_n_u32(a, b, 9); 6607} 6608 6609// CHECK-LABEL: define <4 x i32> @test_vrshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) #0 { 6610// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> 6611// CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6612// CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19) 6613// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6614// CHECK: ret <4 x i32> [[SHUFFLE_I]] 6615uint32x4_t test_vrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) { 6616 return vrshrn_high_n_u64(a, b, 19); 6617} 6618 6619// CHECK-LABEL: define <8 x i8> @test_vqrshrun_n_s16(<8 x i16> %a) #0 { 6620// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 6621// CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6622// CHECK: [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 3) 6623// CHECK: ret <8 x i8> [[VQRSHRUN_N1]] 6624int8x8_t test_vqrshrun_n_s16(int16x8_t a) { 6625 return vqrshrun_n_s16(a, 3); 6626} 6627 6628// CHECK-LABEL: define <4 x i16> @test_vqrshrun_n_s32(<4 x i32> %a) #0 { 6629// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 6630// CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6631// CHECK: [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 9) 6632// CHECK: ret <4 x i16> [[VQRSHRUN_N1]] 6633int16x4_t test_vqrshrun_n_s32(int32x4_t a) { 6634 return vqrshrun_n_s32(a, 9); 6635} 6636 6637// CHECK-LABEL: define <2 x i32> @test_vqrshrun_n_s64(<2 x i64> %a) #0 { 6638// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 6639// CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6640// CHECK: [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 19) 6641// CHECK: ret <2 x i32> [[VQRSHRUN_N1]] 6642int32x2_t test_vqrshrun_n_s64(int64x2_t a) { 6643 return vqrshrun_n_s64(a, 19); 6644} 6645 6646// CHECK-LABEL: define <16 x i8> @test_vqrshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) #0 { 6647// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> 6648// CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6649// CHECK: [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 3) 6650// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRUN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6651// CHECK: ret <16 x i8> [[SHUFFLE_I]] 6652int8x16_t test_vqrshrun_high_n_s16(int8x8_t a, int16x8_t b) { 6653 return vqrshrun_high_n_s16(a, b, 3); 6654} 6655 6656// CHECK-LABEL: define <8 x i16> @test_vqrshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) #0 { 6657// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> 6658// CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6659// CHECK: [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 9) 6660// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRUN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 6661// CHECK: ret <8 x i16> [[SHUFFLE_I]] 6662int16x8_t test_vqrshrun_high_n_s32(int16x4_t a, int32x4_t b) { 6663 return vqrshrun_high_n_s32(a, b, 9); 6664} 6665 6666// CHECK-LABEL: define <4 x i32> @test_vqrshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) #0 { 6667// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> 6668// CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6669// CHECK: [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 19) 6670// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRUN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6671// CHECK: ret <4 x i32> [[SHUFFLE_I]] 6672int32x4_t test_vqrshrun_high_n_s64(int32x2_t a, int64x2_t b) { 6673 return vqrshrun_high_n_s64(a, b, 19); 6674} 6675 6676// CHECK-LABEL: define <8 x i8> @test_vqshrn_n_s16(<8 x i16> %a) #0 { 6677// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 6678// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6679// CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3) 6680// CHECK: ret <8 x i8> [[VQSHRN_N1]] 6681int8x8_t test_vqshrn_n_s16(int16x8_t a) { 6682 return vqshrn_n_s16(a, 3); 6683} 6684 6685// CHECK-LABEL: define <4 x i16> @test_vqshrn_n_s32(<4 x i32> %a) #0 { 6686// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 6687// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6688// CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9) 6689// CHECK: ret <4 x i16> [[VQSHRN_N1]] 6690int16x4_t test_vqshrn_n_s32(int32x4_t a) { 6691 return vqshrn_n_s32(a, 9); 6692} 6693 6694// CHECK-LABEL: define <2 x i32> @test_vqshrn_n_s64(<2 x i64> %a) #0 { 6695// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 6696// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6697// CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19) 6698// CHECK: ret <2 x i32> [[VQSHRN_N1]] 6699int32x2_t test_vqshrn_n_s64(int64x2_t a) { 6700 return vqshrn_n_s64(a, 19); 6701} 6702 6703// CHECK-LABEL: define <8 x i8> @test_vqshrn_n_u16(<8 x i16> %a) #0 { 6704// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 6705// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6706// CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3) 6707// CHECK: ret <8 x i8> [[VQSHRN_N1]] 6708uint8x8_t test_vqshrn_n_u16(uint16x8_t a) { 6709 return vqshrn_n_u16(a, 3); 6710} 6711 6712// CHECK-LABEL: define <4 x i16> @test_vqshrn_n_u32(<4 x i32> %a) #0 { 6713// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 6714// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6715// CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9) 6716// CHECK: ret <4 x i16> [[VQSHRN_N1]] 6717uint16x4_t test_vqshrn_n_u32(uint32x4_t a) { 6718 return vqshrn_n_u32(a, 9); 6719} 6720 6721// CHECK-LABEL: define <2 x i32> @test_vqshrn_n_u64(<2 x i64> %a) #0 { 6722// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 6723// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6724// CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19) 6725// CHECK: ret <2 x i32> [[VQSHRN_N1]] 6726uint32x2_t test_vqshrn_n_u64(uint64x2_t a) { 6727 return vqshrn_n_u64(a, 19); 6728} 6729 6730// CHECK-LABEL: define <16 x i8> @test_vqshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) #0 { 6731// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> 6732// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6733// CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3) 6734// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6735// CHECK: ret <16 x i8> [[SHUFFLE_I]] 6736int8x16_t test_vqshrn_high_n_s16(int8x8_t a, int16x8_t b) { 6737 return vqshrn_high_n_s16(a, b, 3); 6738} 6739 6740// CHECK-LABEL: define <8 x i16> @test_vqshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) #0 { 6741// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> 6742// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6743// CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9) 6744// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 6745// CHECK: ret <8 x i16> [[SHUFFLE_I]] 6746int16x8_t test_vqshrn_high_n_s32(int16x4_t a, int32x4_t b) { 6747 return vqshrn_high_n_s32(a, b, 9); 6748} 6749 6750// CHECK-LABEL: define <4 x i32> @test_vqshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) #0 { 6751// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> 6752// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6753// CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19) 6754// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6755// CHECK: ret <4 x i32> [[SHUFFLE_I]] 6756int32x4_t test_vqshrn_high_n_s64(int32x2_t a, int64x2_t b) { 6757 return vqshrn_high_n_s64(a, b, 19); 6758} 6759 6760// CHECK-LABEL: define <16 x i8> @test_vqshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) #0 { 6761// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> 6762// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6763// CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3) 6764// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6765// CHECK: ret <16 x i8> [[SHUFFLE_I]] 6766uint8x16_t test_vqshrn_high_n_u16(uint8x8_t a, uint16x8_t b) { 6767 return vqshrn_high_n_u16(a, b, 3); 6768} 6769 6770// CHECK-LABEL: define <8 x i16> @test_vqshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) #0 { 6771// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> 6772// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6773// CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9) 6774// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 6775// CHECK: ret <8 x i16> [[SHUFFLE_I]] 6776uint16x8_t test_vqshrn_high_n_u32(uint16x4_t a, uint32x4_t b) { 6777 return vqshrn_high_n_u32(a, b, 9); 6778} 6779 6780// CHECK-LABEL: define <4 x i32> @test_vqshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) #0 { 6781// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> 6782// CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6783// CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19) 6784// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6785// CHECK: ret <4 x i32> [[SHUFFLE_I]] 6786uint32x4_t test_vqshrn_high_n_u64(uint32x2_t a, uint64x2_t b) { 6787 return vqshrn_high_n_u64(a, b, 19); 6788} 6789 6790// CHECK-LABEL: define <8 x i8> @test_vqrshrn_n_s16(<8 x i16> %a) #0 { 6791// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 6792// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6793// CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3) 6794// CHECK: ret <8 x i8> [[VQRSHRN_N1]] 6795int8x8_t test_vqrshrn_n_s16(int16x8_t a) { 6796 return vqrshrn_n_s16(a, 3); 6797} 6798 6799// CHECK-LABEL: define <4 x i16> @test_vqrshrn_n_s32(<4 x i32> %a) #0 { 6800// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 6801// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6802// CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9) 6803// CHECK: ret <4 x i16> [[VQRSHRN_N1]] 6804int16x4_t test_vqrshrn_n_s32(int32x4_t a) { 6805 return vqrshrn_n_s32(a, 9); 6806} 6807 6808// CHECK-LABEL: define <2 x i32> @test_vqrshrn_n_s64(<2 x i64> %a) #0 { 6809// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 6810// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6811// CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19) 6812// CHECK: ret <2 x i32> [[VQRSHRN_N1]] 6813int32x2_t test_vqrshrn_n_s64(int64x2_t a) { 6814 return vqrshrn_n_s64(a, 19); 6815} 6816 6817// CHECK-LABEL: define <8 x i8> @test_vqrshrn_n_u16(<8 x i16> %a) #0 { 6818// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 6819// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6820// CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3) 6821// CHECK: ret <8 x i8> [[VQRSHRN_N1]] 6822uint8x8_t test_vqrshrn_n_u16(uint16x8_t a) { 6823 return vqrshrn_n_u16(a, 3); 6824} 6825 6826// CHECK-LABEL: define <4 x i16> @test_vqrshrn_n_u32(<4 x i32> %a) #0 { 6827// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 6828// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6829// CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9) 6830// CHECK: ret <4 x i16> [[VQRSHRN_N1]] 6831uint16x4_t test_vqrshrn_n_u32(uint32x4_t a) { 6832 return vqrshrn_n_u32(a, 9); 6833} 6834 6835// CHECK-LABEL: define <2 x i32> @test_vqrshrn_n_u64(<2 x i64> %a) #0 { 6836// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 6837// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6838// CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19) 6839// CHECK: ret <2 x i32> [[VQRSHRN_N1]] 6840uint32x2_t test_vqrshrn_n_u64(uint64x2_t a) { 6841 return vqrshrn_n_u64(a, 19); 6842} 6843 6844// CHECK-LABEL: define <16 x i8> @test_vqrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) #0 { 6845// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> 6846// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6847// CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3) 6848// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6849// CHECK: ret <16 x i8> [[SHUFFLE_I]] 6850int8x16_t test_vqrshrn_high_n_s16(int8x8_t a, int16x8_t b) { 6851 return vqrshrn_high_n_s16(a, b, 3); 6852} 6853 6854// CHECK-LABEL: define <8 x i16> @test_vqrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) #0 { 6855// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> 6856// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6857// CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9) 6858// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 6859// CHECK: ret <8 x i16> [[SHUFFLE_I]] 6860int16x8_t test_vqrshrn_high_n_s32(int16x4_t a, int32x4_t b) { 6861 return vqrshrn_high_n_s32(a, b, 9); 6862} 6863 6864// CHECK-LABEL: define <4 x i32> @test_vqrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) #0 { 6865// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> 6866// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6867// CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19) 6868// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6869// CHECK: ret <4 x i32> [[SHUFFLE_I]] 6870int32x4_t test_vqrshrn_high_n_s64(int32x2_t a, int64x2_t b) { 6871 return vqrshrn_high_n_s64(a, b, 19); 6872} 6873 6874// CHECK-LABEL: define <16 x i8> @test_vqrshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) #0 { 6875// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> 6876// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6877// CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3) 6878// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6879// CHECK: ret <16 x i8> [[SHUFFLE_I]] 6880uint8x16_t test_vqrshrn_high_n_u16(uint8x8_t a, uint16x8_t b) { 6881 return vqrshrn_high_n_u16(a, b, 3); 6882} 6883 6884// CHECK-LABEL: define <8 x i16> @test_vqrshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) #0 { 6885// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> 6886// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6887// CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9) 6888// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 6889// CHECK: ret <8 x i16> [[SHUFFLE_I]] 6890uint16x8_t test_vqrshrn_high_n_u32(uint16x4_t a, uint32x4_t b) { 6891 return vqrshrn_high_n_u32(a, b, 9); 6892} 6893 6894// CHECK-LABEL: define <4 x i32> @test_vqrshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) #0 { 6895// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> 6896// CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6897// CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19) 6898// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6899// CHECK: ret <4 x i32> [[SHUFFLE_I]] 6900uint32x4_t test_vqrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) { 6901 return vqrshrn_high_n_u64(a, b, 19); 6902} 6903 6904// CHECK-LABEL: define <8 x i16> @test_vshll_n_s8(<8 x i8> %a) #0 { 6905// CHECK: [[TMP0:%.*]] = sext <8 x i8> %a to <8 x i16> 6906// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 6907// CHECK: ret <8 x i16> [[VSHLL_N]] 6908int16x8_t test_vshll_n_s8(int8x8_t a) { 6909 return vshll_n_s8(a, 3); 6910} 6911 6912// CHECK-LABEL: define <4 x i32> @test_vshll_n_s16(<4 x i16> %a) #0 { 6913// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 6914// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 6915// CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> 6916// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9> 6917// CHECK: ret <4 x i32> [[VSHLL_N]] 6918int32x4_t test_vshll_n_s16(int16x4_t a) { 6919 return vshll_n_s16(a, 9); 6920} 6921 6922// CHECK-LABEL: define <2 x i64> @test_vshll_n_s32(<2 x i32> %a) #0 { 6923// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 6924// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 6925// CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> 6926// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19> 6927// CHECK: ret <2 x i64> [[VSHLL_N]] 6928int64x2_t test_vshll_n_s32(int32x2_t a) { 6929 return vshll_n_s32(a, 19); 6930} 6931 6932// CHECK-LABEL: define <8 x i16> @test_vshll_n_u8(<8 x i8> %a) #0 { 6933// CHECK: [[TMP0:%.*]] = zext <8 x i8> %a to <8 x i16> 6934// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 6935// CHECK: ret <8 x i16> [[VSHLL_N]] 6936uint16x8_t test_vshll_n_u8(uint8x8_t a) { 6937 return vshll_n_u8(a, 3); 6938} 6939 6940// CHECK-LABEL: define <4 x i32> @test_vshll_n_u16(<4 x i16> %a) #0 { 6941// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 6942// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 6943// CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> 6944// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9> 6945// CHECK: ret <4 x i32> [[VSHLL_N]] 6946uint32x4_t test_vshll_n_u16(uint16x4_t a) { 6947 return vshll_n_u16(a, 9); 6948} 6949 6950// CHECK-LABEL: define <2 x i64> @test_vshll_n_u32(<2 x i32> %a) #0 { 6951// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 6952// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 6953// CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> 6954// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19> 6955// CHECK: ret <2 x i64> [[VSHLL_N]] 6956uint64x2_t test_vshll_n_u32(uint32x2_t a) { 6957 return vshll_n_u32(a, 19); 6958} 6959 6960// CHECK-LABEL: define <8 x i16> @test_vshll_high_n_s8(<16 x i8> %a) #0 { 6961// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6962// CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I]] to <8 x i16> 6963// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 6964// CHECK: ret <8 x i16> [[VSHLL_N]] 6965int16x8_t test_vshll_high_n_s8(int8x16_t a) { 6966 return vshll_high_n_s8(a, 3); 6967} 6968 6969// CHECK-LABEL: define <4 x i32> @test_vshll_high_n_s16(<8 x i16> %a) #0 { 6970// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 6971// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 6972// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 6973// CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> 6974// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9> 6975// CHECK: ret <4 x i32> [[VSHLL_N]] 6976int32x4_t test_vshll_high_n_s16(int16x8_t a) { 6977 return vshll_high_n_s16(a, 9); 6978} 6979 6980// CHECK-LABEL: define <2 x i64> @test_vshll_high_n_s32(<4 x i32> %a) #0 { 6981// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 6982// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 6983// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 6984// CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> 6985// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19> 6986// CHECK: ret <2 x i64> [[VSHLL_N]] 6987int64x2_t test_vshll_high_n_s32(int32x4_t a) { 6988 return vshll_high_n_s32(a, 19); 6989} 6990 6991// CHECK-LABEL: define <8 x i16> @test_vshll_high_n_u8(<16 x i8> %a) #0 { 6992// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6993// CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I]] to <8 x i16> 6994// CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 6995// CHECK: ret <8 x i16> [[VSHLL_N]] 6996uint16x8_t test_vshll_high_n_u8(uint8x16_t a) { 6997 return vshll_high_n_u8(a, 3); 6998} 6999 7000// CHECK-LABEL: define <4 x i32> @test_vshll_high_n_u16(<8 x i16> %a) #0 { 7001// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 7002// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 7003// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 7004// CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> 7005// CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9> 7006// CHECK: ret <4 x i32> [[VSHLL_N]] 7007uint32x4_t test_vshll_high_n_u16(uint16x8_t a) { 7008 return vshll_high_n_u16(a, 9); 7009} 7010 7011// CHECK-LABEL: define <2 x i64> @test_vshll_high_n_u32(<4 x i32> %a) #0 { 7012// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 7013// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 7014// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7015// CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> 7016// CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19> 7017// CHECK: ret <2 x i64> [[VSHLL_N]] 7018uint64x2_t test_vshll_high_n_u32(uint32x4_t a) { 7019 return vshll_high_n_u32(a, 19); 7020} 7021 7022// CHECK-LABEL: define <8 x i16> @test_vmovl_s8(<8 x i8> %a) #0 { 7023// CHECK: [[VMOVL_I:%.*]] = sext <8 x i8> %a to <8 x i16> 7024// CHECK: ret <8 x i16> [[VMOVL_I]] 7025int16x8_t test_vmovl_s8(int8x8_t a) { 7026 return vmovl_s8(a); 7027} 7028 7029// CHECK-LABEL: define <4 x i32> @test_vmovl_s16(<4 x i16> %a) #0 { 7030// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 7031// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 7032// CHECK: [[VMOVL_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> 7033// CHECK: ret <4 x i32> [[VMOVL_I]] 7034int32x4_t test_vmovl_s16(int16x4_t a) { 7035 return vmovl_s16(a); 7036} 7037 7038// CHECK-LABEL: define <2 x i64> @test_vmovl_s32(<2 x i32> %a) #0 { 7039// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 7040// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7041// CHECK: [[VMOVL_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> 7042// CHECK: ret <2 x i64> [[VMOVL_I]] 7043int64x2_t test_vmovl_s32(int32x2_t a) { 7044 return vmovl_s32(a); 7045} 7046 7047// CHECK-LABEL: define <8 x i16> @test_vmovl_u8(<8 x i8> %a) #0 { 7048// CHECK: [[VMOVL_I:%.*]] = zext <8 x i8> %a to <8 x i16> 7049// CHECK: ret <8 x i16> [[VMOVL_I]] 7050uint16x8_t test_vmovl_u8(uint8x8_t a) { 7051 return vmovl_u8(a); 7052} 7053 7054// CHECK-LABEL: define <4 x i32> @test_vmovl_u16(<4 x i16> %a) #0 { 7055// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 7056// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 7057// CHECK: [[VMOVL_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> 7058// CHECK: ret <4 x i32> [[VMOVL_I]] 7059uint32x4_t test_vmovl_u16(uint16x4_t a) { 7060 return vmovl_u16(a); 7061} 7062 7063// CHECK-LABEL: define <2 x i64> @test_vmovl_u32(<2 x i32> %a) #0 { 7064// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 7065// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7066// CHECK: [[VMOVL_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> 7067// CHECK: ret <2 x i64> [[VMOVL_I]] 7068uint64x2_t test_vmovl_u32(uint32x2_t a) { 7069 return vmovl_u32(a); 7070} 7071 7072// CHECK-LABEL: define <8 x i16> @test_vmovl_high_s8(<16 x i8> %a) #0 { 7073// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7074// CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I]] to <8 x i16> 7075// CHECK: ret <8 x i16> [[TMP0]] 7076int16x8_t test_vmovl_high_s8(int8x16_t a) { 7077 return vmovl_high_s8(a); 7078} 7079 7080// CHECK-LABEL: define <4 x i32> @test_vmovl_high_s16(<8 x i16> %a) #0 { 7081// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 7082// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 7083// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 7084// CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> 7085// CHECK: ret <4 x i32> [[TMP2]] 7086int32x4_t test_vmovl_high_s16(int16x8_t a) { 7087 return vmovl_high_s16(a); 7088} 7089 7090// CHECK-LABEL: define <2 x i64> @test_vmovl_high_s32(<4 x i32> %a) #0 { 7091// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 7092// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 7093// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7094// CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> 7095// CHECK: ret <2 x i64> [[TMP2]] 7096int64x2_t test_vmovl_high_s32(int32x4_t a) { 7097 return vmovl_high_s32(a); 7098} 7099 7100// CHECK-LABEL: define <8 x i16> @test_vmovl_high_u8(<16 x i8> %a) #0 { 7101// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7102// CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I]] to <8 x i16> 7103// CHECK: ret <8 x i16> [[TMP0]] 7104uint16x8_t test_vmovl_high_u8(uint8x16_t a) { 7105 return vmovl_high_u8(a); 7106} 7107 7108// CHECK-LABEL: define <4 x i32> @test_vmovl_high_u16(<8 x i16> %a) #0 { 7109// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 7110// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 7111// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 7112// CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> 7113// CHECK: ret <4 x i32> [[TMP2]] 7114uint32x4_t test_vmovl_high_u16(uint16x8_t a) { 7115 return vmovl_high_u16(a); 7116} 7117 7118// CHECK-LABEL: define <2 x i64> @test_vmovl_high_u32(<4 x i32> %a) #0 { 7119// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 7120// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 7121// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7122// CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> 7123// CHECK: ret <2 x i64> [[TMP2]] 7124uint64x2_t test_vmovl_high_u32(uint32x4_t a) { 7125 return vmovl_high_u32(a); 7126} 7127 7128// CHECK-LABEL: define <2 x float> @test_vcvt_n_f32_s32(<2 x i32> %a) #0 { 7129// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 7130// CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7131// CHECK: [[VCVT_N1:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 31) 7132// CHECK: ret <2 x float> [[VCVT_N1]] 7133float32x2_t test_vcvt_n_f32_s32(int32x2_t a) { 7134 return vcvt_n_f32_s32(a, 31); 7135} 7136 7137// CHECK-LABEL: define <4 x float> @test_vcvtq_n_f32_s32(<4 x i32> %a) #0 { 7138// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 7139// CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 7140// CHECK: [[VCVT_N1:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 31) 7141// CHECK: ret <4 x float> [[VCVT_N1]] 7142float32x4_t test_vcvtq_n_f32_s32(int32x4_t a) { 7143 return vcvtq_n_f32_s32(a, 31); 7144} 7145 7146// CHECK-LABEL: define <2 x double> @test_vcvtq_n_f64_s64(<2 x i64> %a) #0 { 7147// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 7148// CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 7149// CHECK: [[VCVT_N1:%.*]] = call <2 x double> @llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64> [[VCVT_N]], i32 50) 7150// CHECK: ret <2 x double> [[VCVT_N1]] 7151float64x2_t test_vcvtq_n_f64_s64(int64x2_t a) { 7152 return vcvtq_n_f64_s64(a, 50); 7153} 7154 7155// CHECK-LABEL: define <2 x float> @test_vcvt_n_f32_u32(<2 x i32> %a) #0 { 7156// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 7157// CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7158// CHECK: [[VCVT_N1:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 31) 7159// CHECK: ret <2 x float> [[VCVT_N1]] 7160float32x2_t test_vcvt_n_f32_u32(uint32x2_t a) { 7161 return vcvt_n_f32_u32(a, 31); 7162} 7163 7164// CHECK-LABEL: define <4 x float> @test_vcvtq_n_f32_u32(<4 x i32> %a) #0 { 7165// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 7166// CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 7167// CHECK: [[VCVT_N1:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 31) 7168// CHECK: ret <4 x float> [[VCVT_N1]] 7169float32x4_t test_vcvtq_n_f32_u32(uint32x4_t a) { 7170 return vcvtq_n_f32_u32(a, 31); 7171} 7172 7173// CHECK-LABEL: define <2 x double> @test_vcvtq_n_f64_u64(<2 x i64> %a) #0 { 7174// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 7175// CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 7176// CHECK: [[VCVT_N1:%.*]] = call <2 x double> @llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64> [[VCVT_N]], i32 50) 7177// CHECK: ret <2 x double> [[VCVT_N1]] 7178float64x2_t test_vcvtq_n_f64_u64(uint64x2_t a) { 7179 return vcvtq_n_f64_u64(a, 50); 7180} 7181 7182// CHECK-LABEL: define <2 x i32> @test_vcvt_n_s32_f32(<2 x float> %a) #0 { 7183// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 7184// CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 7185// CHECK: [[VCVT_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 31) 7186// CHECK: ret <2 x i32> [[VCVT_N1]] 7187int32x2_t test_vcvt_n_s32_f32(float32x2_t a) { 7188 return vcvt_n_s32_f32(a, 31); 7189} 7190 7191// CHECK-LABEL: define <4 x i32> @test_vcvtq_n_s32_f32(<4 x float> %a) #0 { 7192// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 7193// CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 7194// CHECK: [[VCVT_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 31) 7195// CHECK: ret <4 x i32> [[VCVT_N1]] 7196int32x4_t test_vcvtq_n_s32_f32(float32x4_t a) { 7197 return vcvtq_n_s32_f32(a, 31); 7198} 7199 7200// CHECK-LABEL: define <2 x i64> @test_vcvtq_n_s64_f64(<2 x double> %a) #0 { 7201// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 7202// CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 7203// CHECK: [[VCVT_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double> [[VCVT_N]], i32 50) 7204// CHECK: ret <2 x i64> [[VCVT_N1]] 7205int64x2_t test_vcvtq_n_s64_f64(float64x2_t a) { 7206 return vcvtq_n_s64_f64(a, 50); 7207} 7208 7209// CHECK-LABEL: define <2 x i32> @test_vcvt_n_u32_f32(<2 x float> %a) #0 { 7210// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 7211// CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 7212// CHECK: [[VCVT_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 31) 7213// CHECK: ret <2 x i32> [[VCVT_N1]] 7214uint32x2_t test_vcvt_n_u32_f32(float32x2_t a) { 7215 return vcvt_n_u32_f32(a, 31); 7216} 7217 7218// CHECK-LABEL: define <4 x i32> @test_vcvtq_n_u32_f32(<4 x float> %a) #0 { 7219// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 7220// CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 7221// CHECK: [[VCVT_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 31) 7222// CHECK: ret <4 x i32> [[VCVT_N1]] 7223uint32x4_t test_vcvtq_n_u32_f32(float32x4_t a) { 7224 return vcvtq_n_u32_f32(a, 31); 7225} 7226 7227// CHECK-LABEL: define <2 x i64> @test_vcvtq_n_u64_f64(<2 x double> %a) #0 { 7228// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 7229// CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 7230// CHECK: [[VCVT_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double> [[VCVT_N]], i32 50) 7231// CHECK: ret <2 x i64> [[VCVT_N1]] 7232uint64x2_t test_vcvtq_n_u64_f64(float64x2_t a) { 7233 return vcvtq_n_u64_f64(a, 50); 7234} 7235 7236// CHECK-LABEL: define <8 x i16> @test_vaddl_s8(<8 x i8> %a, <8 x i8> %b) #0 { 7237// CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16> 7238// CHECK: [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16> 7239// CHECK: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]] 7240// CHECK: ret <8 x i16> [[ADD_I]] 7241int16x8_t test_vaddl_s8(int8x8_t a, int8x8_t b) { 7242 return vaddl_s8(a, b); 7243} 7244 7245// CHECK-LABEL: define <4 x i32> @test_vaddl_s16(<4 x i16> %a, <4 x i16> %b) #0 { 7246// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 7247// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 7248// CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> 7249// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> 7250// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 7251// CHECK: [[VMOVL_I4_I:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32> 7252// CHECK: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]] 7253// CHECK: ret <4 x i32> [[ADD_I]] 7254int32x4_t test_vaddl_s16(int16x4_t a, int16x4_t b) { 7255 return vaddl_s16(a, b); 7256} 7257 7258// CHECK-LABEL: define <2 x i64> @test_vaddl_s32(<2 x i32> %a, <2 x i32> %b) #0 { 7259// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 7260// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7261// CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> 7262// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> 7263// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 7264// CHECK: [[VMOVL_I4_I:%.*]] = sext <2 x i32> [[TMP3]] to <2 x i64> 7265// CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]] 7266// CHECK: ret <2 x i64> [[ADD_I]] 7267int64x2_t test_vaddl_s32(int32x2_t a, int32x2_t b) { 7268 return vaddl_s32(a, b); 7269} 7270 7271// CHECK-LABEL: define <8 x i16> @test_vaddl_u8(<8 x i8> %a, <8 x i8> %b) #0 { 7272// CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16> 7273// CHECK: [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16> 7274// CHECK: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]] 7275// CHECK: ret <8 x i16> [[ADD_I]] 7276uint16x8_t test_vaddl_u8(uint8x8_t a, uint8x8_t b) { 7277 return vaddl_u8(a, b); 7278} 7279 7280// CHECK-LABEL: define <4 x i32> @test_vaddl_u16(<4 x i16> %a, <4 x i16> %b) #0 { 7281// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 7282// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 7283// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> 7284// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> 7285// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 7286// CHECK: [[VMOVL_I4_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> 7287// CHECK: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]] 7288// CHECK: ret <4 x i32> [[ADD_I]] 7289uint32x4_t test_vaddl_u16(uint16x4_t a, uint16x4_t b) { 7290 return vaddl_u16(a, b); 7291} 7292 7293// CHECK-LABEL: define <2 x i64> @test_vaddl_u32(<2 x i32> %a, <2 x i32> %b) #0 { 7294// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 7295// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7296// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> 7297// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> 7298// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 7299// CHECK: [[VMOVL_I4_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64> 7300// CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]] 7301// CHECK: ret <2 x i64> [[ADD_I]] 7302uint64x2_t test_vaddl_u32(uint32x2_t a, uint32x2_t b) { 7303 return vaddl_u32(a, b); 7304} 7305 7306// CHECK-LABEL: define <8 x i16> @test_vaddl_high_s8(<16 x i8> %a, <16 x i8> %b) #0 { 7307// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7308// CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> 7309// CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7310// CHECK: [[TMP1:%.*]] = sext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16> 7311// CHECK: [[ADD_I:%.*]] = add <8 x i16> [[TMP0]], [[TMP1]] 7312// CHECK: ret <8 x i16> [[ADD_I]] 7313int16x8_t test_vaddl_high_s8(int8x16_t a, int8x16_t b) { 7314 return vaddl_high_s8(a, b); 7315} 7316 7317// CHECK-LABEL: define <4 x i32> @test_vaddl_high_s16(<8 x i16> %a, <8 x i16> %b) #0 { 7318// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 7319// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> 7320// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 7321// CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> 7322// CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 7323// CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8> 7324// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16> 7325// CHECK: [[TMP5:%.*]] = sext <4 x i16> [[TMP4]] to <4 x i32> 7326// CHECK: [[ADD_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP5]] 7327// CHECK: ret <4 x i32> [[ADD_I]] 7328int32x4_t test_vaddl_high_s16(int16x8_t a, int16x8_t b) { 7329 return vaddl_high_s16(a, b); 7330} 7331 7332// CHECK-LABEL: define <2 x i64> @test_vaddl_high_s32(<4 x i32> %a, <4 x i32> %b) #0 { 7333// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 7334// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> 7335// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7336// CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> 7337// CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 7338// CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8> 7339// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32> 7340// CHECK: [[TMP5:%.*]] = sext <2 x i32> [[TMP4]] to <2 x i64> 7341// CHECK: [[ADD_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP5]] 7342// CHECK: ret <2 x i64> [[ADD_I]] 7343int64x2_t test_vaddl_high_s32(int32x4_t a, int32x4_t b) { 7344 return vaddl_high_s32(a, b); 7345} 7346 7347// CHECK-LABEL: define <8 x i16> @test_vaddl_high_u8(<16 x i8> %a, <16 x i8> %b) #0 { 7348// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7349// CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> 7350// CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7351// CHECK: [[TMP1:%.*]] = zext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16> 7352// CHECK: [[ADD_I:%.*]] = add <8 x i16> [[TMP0]], [[TMP1]] 7353// CHECK: ret <8 x i16> [[ADD_I]] 7354uint16x8_t test_vaddl_high_u8(uint8x16_t a, uint8x16_t b) { 7355 return vaddl_high_u8(a, b); 7356} 7357 7358// CHECK-LABEL: define <4 x i32> @test_vaddl_high_u16(<8 x i16> %a, <8 x i16> %b) #0 { 7359// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 7360// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> 7361// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 7362// CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> 7363// CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 7364// CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8> 7365// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16> 7366// CHECK: [[TMP5:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32> 7367// CHECK: [[ADD_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP5]] 7368// CHECK: ret <4 x i32> [[ADD_I]] 7369uint32x4_t test_vaddl_high_u16(uint16x8_t a, uint16x8_t b) { 7370 return vaddl_high_u16(a, b); 7371} 7372 7373// CHECK-LABEL: define <2 x i64> @test_vaddl_high_u32(<4 x i32> %a, <4 x i32> %b) #0 { 7374// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 7375// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> 7376// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7377// CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> 7378// CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 7379// CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8> 7380// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32> 7381// CHECK: [[TMP5:%.*]] = zext <2 x i32> [[TMP4]] to <2 x i64> 7382// CHECK: [[ADD_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP5]] 7383// CHECK: ret <2 x i64> [[ADD_I]] 7384uint64x2_t test_vaddl_high_u32(uint32x4_t a, uint32x4_t b) { 7385 return vaddl_high_u32(a, b); 7386} 7387 7388// CHECK-LABEL: define <8 x i16> @test_vaddw_s8(<8 x i16> %a, <8 x i8> %b) #0 { 7389// CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16> 7390// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]] 7391// CHECK: ret <8 x i16> [[ADD_I]] 7392int16x8_t test_vaddw_s8(int16x8_t a, int8x8_t b) { 7393 return vaddw_s8(a, b); 7394} 7395 7396// CHECK-LABEL: define <4 x i32> @test_vaddw_s16(<4 x i32> %a, <4 x i16> %b) #0 { 7397// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 7398// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 7399// CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> 7400// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]] 7401// CHECK: ret <4 x i32> [[ADD_I]] 7402int32x4_t test_vaddw_s16(int32x4_t a, int16x4_t b) { 7403 return vaddw_s16(a, b); 7404} 7405 7406// CHECK-LABEL: define <2 x i64> @test_vaddw_s32(<2 x i64> %a, <2 x i32> %b) #0 { 7407// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 7408// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7409// CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> 7410// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]] 7411// CHECK: ret <2 x i64> [[ADD_I]] 7412int64x2_t test_vaddw_s32(int64x2_t a, int32x2_t b) { 7413 return vaddw_s32(a, b); 7414} 7415 7416// CHECK-LABEL: define <8 x i16> @test_vaddw_u8(<8 x i16> %a, <8 x i8> %b) #0 { 7417// CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16> 7418// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]] 7419// CHECK: ret <8 x i16> [[ADD_I]] 7420uint16x8_t test_vaddw_u8(uint16x8_t a, uint8x8_t b) { 7421 return vaddw_u8(a, b); 7422} 7423 7424// CHECK-LABEL: define <4 x i32> @test_vaddw_u16(<4 x i32> %a, <4 x i16> %b) #0 { 7425// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 7426// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 7427// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> 7428// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]] 7429// CHECK: ret <4 x i32> [[ADD_I]] 7430uint32x4_t test_vaddw_u16(uint32x4_t a, uint16x4_t b) { 7431 return vaddw_u16(a, b); 7432} 7433 7434// CHECK-LABEL: define <2 x i64> @test_vaddw_u32(<2 x i64> %a, <2 x i32> %b) #0 { 7435// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 7436// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7437// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> 7438// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]] 7439// CHECK: ret <2 x i64> [[ADD_I]] 7440uint64x2_t test_vaddw_u32(uint64x2_t a, uint32x2_t b) { 7441 return vaddw_u32(a, b); 7442} 7443 7444// CHECK-LABEL: define <8 x i16> @test_vaddw_high_s8(<8 x i16> %a, <16 x i8> %b) #0 { 7445// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7446// CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> 7447// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[TMP0]] 7448// CHECK: ret <8 x i16> [[ADD_I]] 7449int16x8_t test_vaddw_high_s8(int16x8_t a, int8x16_t b) { 7450 return vaddw_high_s8(a, b); 7451} 7452 7453// CHECK-LABEL: define <4 x i32> @test_vaddw_high_s16(<4 x i32> %a, <8 x i16> %b) #0 { 7454// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 7455// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> 7456// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 7457// CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> 7458// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP2]] 7459// CHECK: ret <4 x i32> [[ADD_I]] 7460int32x4_t test_vaddw_high_s16(int32x4_t a, int16x8_t b) { 7461 return vaddw_high_s16(a, b); 7462} 7463 7464// CHECK-LABEL: define <2 x i64> @test_vaddw_high_s32(<2 x i64> %a, <4 x i32> %b) #0 { 7465// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 7466// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> 7467// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7468// CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> 7469// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[TMP2]] 7470// CHECK: ret <2 x i64> [[ADD_I]] 7471int64x2_t test_vaddw_high_s32(int64x2_t a, int32x4_t b) { 7472 return vaddw_high_s32(a, b); 7473} 7474 7475// CHECK-LABEL: define <8 x i16> @test_vaddw_high_u8(<8 x i16> %a, <16 x i8> %b) #0 { 7476// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7477// CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> 7478// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[TMP0]] 7479// CHECK: ret <8 x i16> [[ADD_I]] 7480uint16x8_t test_vaddw_high_u8(uint16x8_t a, uint8x16_t b) { 7481 return vaddw_high_u8(a, b); 7482} 7483 7484// CHECK-LABEL: define <4 x i32> @test_vaddw_high_u16(<4 x i32> %a, <8 x i16> %b) #0 { 7485// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 7486// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> 7487// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 7488// CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> 7489// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP2]] 7490// CHECK: ret <4 x i32> [[ADD_I]] 7491uint32x4_t test_vaddw_high_u16(uint32x4_t a, uint16x8_t b) { 7492 return vaddw_high_u16(a, b); 7493} 7494 7495// CHECK-LABEL: define <2 x i64> @test_vaddw_high_u32(<2 x i64> %a, <4 x i32> %b) #0 { 7496// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 7497// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> 7498// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7499// CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> 7500// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[TMP2]] 7501// CHECK: ret <2 x i64> [[ADD_I]] 7502uint64x2_t test_vaddw_high_u32(uint64x2_t a, uint32x4_t b) { 7503 return vaddw_high_u32(a, b); 7504} 7505 7506// CHECK-LABEL: define <8 x i16> @test_vsubl_s8(<8 x i8> %a, <8 x i8> %b) #0 { 7507// CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16> 7508// CHECK: [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16> 7509// CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]] 7510// CHECK: ret <8 x i16> [[SUB_I]] 7511int16x8_t test_vsubl_s8(int8x8_t a, int8x8_t b) { 7512 return vsubl_s8(a, b); 7513} 7514 7515// CHECK-LABEL: define <4 x i32> @test_vsubl_s16(<4 x i16> %a, <4 x i16> %b) #0 { 7516// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 7517// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 7518// CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> 7519// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> 7520// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 7521// CHECK: [[VMOVL_I4_I:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32> 7522// CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]] 7523// CHECK: ret <4 x i32> [[SUB_I]] 7524int32x4_t test_vsubl_s16(int16x4_t a, int16x4_t b) { 7525 return vsubl_s16(a, b); 7526} 7527 7528// CHECK-LABEL: define <2 x i64> @test_vsubl_s32(<2 x i32> %a, <2 x i32> %b) #0 { 7529// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 7530// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7531// CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> 7532// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> 7533// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 7534// CHECK: [[VMOVL_I4_I:%.*]] = sext <2 x i32> [[TMP3]] to <2 x i64> 7535// CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]] 7536// CHECK: ret <2 x i64> [[SUB_I]] 7537int64x2_t test_vsubl_s32(int32x2_t a, int32x2_t b) { 7538 return vsubl_s32(a, b); 7539} 7540 7541// CHECK-LABEL: define <8 x i16> @test_vsubl_u8(<8 x i8> %a, <8 x i8> %b) #0 { 7542// CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16> 7543// CHECK: [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16> 7544// CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]] 7545// CHECK: ret <8 x i16> [[SUB_I]] 7546uint16x8_t test_vsubl_u8(uint8x8_t a, uint8x8_t b) { 7547 return vsubl_u8(a, b); 7548} 7549 7550// CHECK-LABEL: define <4 x i32> @test_vsubl_u16(<4 x i16> %a, <4 x i16> %b) #0 { 7551// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 7552// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 7553// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> 7554// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> 7555// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 7556// CHECK: [[VMOVL_I4_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> 7557// CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]] 7558// CHECK: ret <4 x i32> [[SUB_I]] 7559uint32x4_t test_vsubl_u16(uint16x4_t a, uint16x4_t b) { 7560 return vsubl_u16(a, b); 7561} 7562 7563// CHECK-LABEL: define <2 x i64> @test_vsubl_u32(<2 x i32> %a, <2 x i32> %b) #0 { 7564// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 7565// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7566// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> 7567// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> 7568// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 7569// CHECK: [[VMOVL_I4_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64> 7570// CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]] 7571// CHECK: ret <2 x i64> [[SUB_I]] 7572uint64x2_t test_vsubl_u32(uint32x2_t a, uint32x2_t b) { 7573 return vsubl_u32(a, b); 7574} 7575 7576// CHECK-LABEL: define <8 x i16> @test_vsubl_high_s8(<16 x i8> %a, <16 x i8> %b) #0 { 7577// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7578// CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> 7579// CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7580// CHECK: [[TMP1:%.*]] = sext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16> 7581// CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]] 7582// CHECK: ret <8 x i16> [[SUB_I]] 7583int16x8_t test_vsubl_high_s8(int8x16_t a, int8x16_t b) { 7584 return vsubl_high_s8(a, b); 7585} 7586 7587// CHECK-LABEL: define <4 x i32> @test_vsubl_high_s16(<8 x i16> %a, <8 x i16> %b) #0 { 7588// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 7589// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> 7590// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 7591// CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> 7592// CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 7593// CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8> 7594// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16> 7595// CHECK: [[TMP5:%.*]] = sext <4 x i16> [[TMP4]] to <4 x i32> 7596// CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP5]] 7597// CHECK: ret <4 x i32> [[SUB_I]] 7598int32x4_t test_vsubl_high_s16(int16x8_t a, int16x8_t b) { 7599 return vsubl_high_s16(a, b); 7600} 7601 7602// CHECK-LABEL: define <2 x i64> @test_vsubl_high_s32(<4 x i32> %a, <4 x i32> %b) #0 { 7603// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 7604// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> 7605// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7606// CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> 7607// CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 7608// CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8> 7609// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32> 7610// CHECK: [[TMP5:%.*]] = sext <2 x i32> [[TMP4]] to <2 x i64> 7611// CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP5]] 7612// CHECK: ret <2 x i64> [[SUB_I]] 7613int64x2_t test_vsubl_high_s32(int32x4_t a, int32x4_t b) { 7614 return vsubl_high_s32(a, b); 7615} 7616 7617// CHECK-LABEL: define <8 x i16> @test_vsubl_high_u8(<16 x i8> %a, <16 x i8> %b) #0 { 7618// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7619// CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> 7620// CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7621// CHECK: [[TMP1:%.*]] = zext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16> 7622// CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]] 7623// CHECK: ret <8 x i16> [[SUB_I]] 7624uint16x8_t test_vsubl_high_u8(uint8x16_t a, uint8x16_t b) { 7625 return vsubl_high_u8(a, b); 7626} 7627 7628// CHECK-LABEL: define <4 x i32> @test_vsubl_high_u16(<8 x i16> %a, <8 x i16> %b) #0 { 7629// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 7630// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> 7631// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 7632// CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> 7633// CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 7634// CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8> 7635// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16> 7636// CHECK: [[TMP5:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32> 7637// CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP5]] 7638// CHECK: ret <4 x i32> [[SUB_I]] 7639uint32x4_t test_vsubl_high_u16(uint16x8_t a, uint16x8_t b) { 7640 return vsubl_high_u16(a, b); 7641} 7642 7643// CHECK-LABEL: define <2 x i64> @test_vsubl_high_u32(<4 x i32> %a, <4 x i32> %b) #0 { 7644// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 7645// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> 7646// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7647// CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> 7648// CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 7649// CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8> 7650// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32> 7651// CHECK: [[TMP5:%.*]] = zext <2 x i32> [[TMP4]] to <2 x i64> 7652// CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP5]] 7653// CHECK: ret <2 x i64> [[SUB_I]] 7654uint64x2_t test_vsubl_high_u32(uint32x4_t a, uint32x4_t b) { 7655 return vsubl_high_u32(a, b); 7656} 7657 7658// CHECK-LABEL: define <8 x i16> @test_vsubw_s8(<8 x i16> %a, <8 x i8> %b) #0 { 7659// CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16> 7660// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]] 7661// CHECK: ret <8 x i16> [[SUB_I]] 7662int16x8_t test_vsubw_s8(int16x8_t a, int8x8_t b) { 7663 return vsubw_s8(a, b); 7664} 7665 7666// CHECK-LABEL: define <4 x i32> @test_vsubw_s16(<4 x i32> %a, <4 x i16> %b) #0 { 7667// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 7668// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 7669// CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> 7670// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]] 7671// CHECK: ret <4 x i32> [[SUB_I]] 7672int32x4_t test_vsubw_s16(int32x4_t a, int16x4_t b) { 7673 return vsubw_s16(a, b); 7674} 7675 7676// CHECK-LABEL: define <2 x i64> @test_vsubw_s32(<2 x i64> %a, <2 x i32> %b) #0 { 7677// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 7678// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7679// CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> 7680// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]] 7681// CHECK: ret <2 x i64> [[SUB_I]] 7682int64x2_t test_vsubw_s32(int64x2_t a, int32x2_t b) { 7683 return vsubw_s32(a, b); 7684} 7685 7686// CHECK-LABEL: define <8 x i16> @test_vsubw_u8(<8 x i16> %a, <8 x i8> %b) #0 { 7687// CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16> 7688// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]] 7689// CHECK: ret <8 x i16> [[SUB_I]] 7690uint16x8_t test_vsubw_u8(uint16x8_t a, uint8x8_t b) { 7691 return vsubw_u8(a, b); 7692} 7693 7694// CHECK-LABEL: define <4 x i32> @test_vsubw_u16(<4 x i32> %a, <4 x i16> %b) #0 { 7695// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 7696// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 7697// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> 7698// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]] 7699// CHECK: ret <4 x i32> [[SUB_I]] 7700uint32x4_t test_vsubw_u16(uint32x4_t a, uint16x4_t b) { 7701 return vsubw_u16(a, b); 7702} 7703 7704// CHECK-LABEL: define <2 x i64> @test_vsubw_u32(<2 x i64> %a, <2 x i32> %b) #0 { 7705// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 7706// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7707// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> 7708// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]] 7709// CHECK: ret <2 x i64> [[SUB_I]] 7710uint64x2_t test_vsubw_u32(uint64x2_t a, uint32x2_t b) { 7711 return vsubw_u32(a, b); 7712} 7713 7714// CHECK-LABEL: define <8 x i16> @test_vsubw_high_s8(<8 x i16> %a, <16 x i8> %b) #0 { 7715// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7716// CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> 7717// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[TMP0]] 7718// CHECK: ret <8 x i16> [[SUB_I]] 7719int16x8_t test_vsubw_high_s8(int16x8_t a, int8x16_t b) { 7720 return vsubw_high_s8(a, b); 7721} 7722 7723// CHECK-LABEL: define <4 x i32> @test_vsubw_high_s16(<4 x i32> %a, <8 x i16> %b) #0 { 7724// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 7725// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> 7726// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 7727// CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> 7728// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[TMP2]] 7729// CHECK: ret <4 x i32> [[SUB_I]] 7730int32x4_t test_vsubw_high_s16(int32x4_t a, int16x8_t b) { 7731 return vsubw_high_s16(a, b); 7732} 7733 7734// CHECK-LABEL: define <2 x i64> @test_vsubw_high_s32(<2 x i64> %a, <4 x i32> %b) #0 { 7735// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 7736// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> 7737// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7738// CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> 7739// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[TMP2]] 7740// CHECK: ret <2 x i64> [[SUB_I]] 7741int64x2_t test_vsubw_high_s32(int64x2_t a, int32x4_t b) { 7742 return vsubw_high_s32(a, b); 7743} 7744 7745// CHECK-LABEL: define <8 x i16> @test_vsubw_high_u8(<8 x i16> %a, <16 x i8> %b) #0 { 7746// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7747// CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> 7748// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[TMP0]] 7749// CHECK: ret <8 x i16> [[SUB_I]] 7750uint16x8_t test_vsubw_high_u8(uint16x8_t a, uint8x16_t b) { 7751 return vsubw_high_u8(a, b); 7752} 7753 7754// CHECK-LABEL: define <4 x i32> @test_vsubw_high_u16(<4 x i32> %a, <8 x i16> %b) #0 { 7755// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 7756// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> 7757// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 7758// CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> 7759// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[TMP2]] 7760// CHECK: ret <4 x i32> [[SUB_I]] 7761uint32x4_t test_vsubw_high_u16(uint32x4_t a, uint16x8_t b) { 7762 return vsubw_high_u16(a, b); 7763} 7764 7765// CHECK-LABEL: define <2 x i64> @test_vsubw_high_u32(<2 x i64> %a, <4 x i32> %b) #0 { 7766// CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 7767// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> 7768// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7769// CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> 7770// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[TMP2]] 7771// CHECK: ret <2 x i64> [[SUB_I]] 7772uint64x2_t test_vsubw_high_u32(uint64x2_t a, uint32x4_t b) { 7773 return vsubw_high_u32(a, b); 7774} 7775 7776// CHECK-LABEL: define <8 x i8> @test_vaddhn_s16(<8 x i16> %a, <8 x i16> %b) #0 { 7777// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 7778// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 7779// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 7780// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 7781// CHECK: [[VADDHN_I:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]] 7782// CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 7783// CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8> 7784// CHECK: ret <8 x i8> [[VADDHN2_I]] 7785int8x8_t test_vaddhn_s16(int16x8_t a, int16x8_t b) { 7786 return vaddhn_s16(a, b); 7787} 7788 7789// CHECK-LABEL: define <4 x i16> @test_vaddhn_s32(<4 x i32> %a, <4 x i32> %b) #0 { 7790// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 7791// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 7792// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 7793// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 7794// CHECK: [[VADDHN_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]] 7795// CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16> 7796// CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16> 7797// CHECK: ret <4 x i16> [[VADDHN2_I]] 7798int16x4_t test_vaddhn_s32(int32x4_t a, int32x4_t b) { 7799 return vaddhn_s32(a, b); 7800} 7801 7802// CHECK-LABEL: define <2 x i32> @test_vaddhn_s64(<2 x i64> %a, <2 x i64> %b) #0 { 7803// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 7804// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 7805// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 7806// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 7807// CHECK: [[VADDHN_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] 7808// CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32> 7809// CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32> 7810// CHECK: ret <2 x i32> [[VADDHN2_I]] 7811int32x2_t test_vaddhn_s64(int64x2_t a, int64x2_t b) { 7812 return vaddhn_s64(a, b); 7813} 7814 7815// CHECK-LABEL: define <8 x i8> @test_vaddhn_u16(<8 x i16> %a, <8 x i16> %b) #0 { 7816// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 7817// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 7818// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 7819// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 7820// CHECK: [[VADDHN_I:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]] 7821// CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 7822// CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8> 7823// CHECK: ret <8 x i8> [[VADDHN2_I]] 7824uint8x8_t test_vaddhn_u16(uint16x8_t a, uint16x8_t b) { 7825 return vaddhn_u16(a, b); 7826} 7827 7828// CHECK-LABEL: define <4 x i16> @test_vaddhn_u32(<4 x i32> %a, <4 x i32> %b) #0 { 7829// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 7830// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 7831// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 7832// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 7833// CHECK: [[VADDHN_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]] 7834// CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16> 7835// CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16> 7836// CHECK: ret <4 x i16> [[VADDHN2_I]] 7837uint16x4_t test_vaddhn_u32(uint32x4_t a, uint32x4_t b) { 7838 return vaddhn_u32(a, b); 7839} 7840 7841// CHECK-LABEL: define <2 x i32> @test_vaddhn_u64(<2 x i64> %a, <2 x i64> %b) #0 { 7842// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 7843// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 7844// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 7845// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 7846// CHECK: [[VADDHN_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] 7847// CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32> 7848// CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32> 7849// CHECK: ret <2 x i32> [[VADDHN2_I]] 7850uint32x2_t test_vaddhn_u64(uint64x2_t a, uint64x2_t b) { 7851 return vaddhn_u64(a, b); 7852} 7853 7854// CHECK-LABEL: define <16 x i8> @test_vaddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 { 7855// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 7856// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 7857// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 7858// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 7859// CHECK: [[VADDHN_I_I:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]] 7860// CHECK: [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 7861// CHECK: [[VADDHN2_I_I:%.*]] = trunc <8 x i16> [[VADDHN1_I_I]] to <8 x i8> 7862// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VADDHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7863// CHECK: ret <16 x i8> [[SHUFFLE_I_I]] 7864int8x16_t test_vaddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) { 7865 return vaddhn_high_s16(r, a, b); 7866} 7867 7868// CHECK-LABEL: define <8 x i16> @test_vaddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 { 7869// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 7870// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 7871// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 7872// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 7873// CHECK: [[VADDHN_I_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]] 7874// CHECK: [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], <i32 16, i32 16, i32 16, i32 16> 7875// CHECK: [[VADDHN2_I_I:%.*]] = trunc <4 x i32> [[VADDHN1_I_I]] to <4 x i16> 7876// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VADDHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 7877// CHECK: ret <8 x i16> [[SHUFFLE_I_I]] 7878int16x8_t test_vaddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) { 7879 return vaddhn_high_s32(r, a, b); 7880} 7881 7882// CHECK-LABEL: define <4 x i32> @test_vaddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 { 7883// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 7884// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 7885// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 7886// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 7887// CHECK: [[VADDHN_I_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] 7888// CHECK: [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], <i64 32, i64 32> 7889// CHECK: [[VADDHN2_I_I:%.*]] = trunc <2 x i64> [[VADDHN1_I_I]] to <2 x i32> 7890// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VADDHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 7891// CHECK: ret <4 x i32> [[SHUFFLE_I_I]] 7892int32x4_t test_vaddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) { 7893 return vaddhn_high_s64(r, a, b); 7894} 7895 7896// CHECK-LABEL: define <16 x i8> @test_vaddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 { 7897// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 7898// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 7899// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 7900// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 7901// CHECK: [[VADDHN_I_I:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]] 7902// CHECK: [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 7903// CHECK: [[VADDHN2_I_I:%.*]] = trunc <8 x i16> [[VADDHN1_I_I]] to <8 x i8> 7904// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VADDHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7905// CHECK: ret <16 x i8> [[SHUFFLE_I_I]] 7906uint8x16_t test_vaddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) { 7907 return vaddhn_high_u16(r, a, b); 7908} 7909 7910// CHECK-LABEL: define <8 x i16> @test_vaddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 { 7911// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 7912// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 7913// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 7914// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 7915// CHECK: [[VADDHN_I_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]] 7916// CHECK: [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], <i32 16, i32 16, i32 16, i32 16> 7917// CHECK: [[VADDHN2_I_I:%.*]] = trunc <4 x i32> [[VADDHN1_I_I]] to <4 x i16> 7918// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VADDHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 7919// CHECK: ret <8 x i16> [[SHUFFLE_I_I]] 7920uint16x8_t test_vaddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) { 7921 return vaddhn_high_u32(r, a, b); 7922} 7923 7924// CHECK-LABEL: define <4 x i32> @test_vaddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 { 7925// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 7926// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 7927// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 7928// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 7929// CHECK: [[VADDHN_I_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] 7930// CHECK: [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], <i64 32, i64 32> 7931// CHECK: [[VADDHN2_I_I:%.*]] = trunc <2 x i64> [[VADDHN1_I_I]] to <2 x i32> 7932// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VADDHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 7933// CHECK: ret <4 x i32> [[SHUFFLE_I_I]] 7934uint32x4_t test_vaddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) { 7935 return vaddhn_high_u64(r, a, b); 7936} 7937 7938// CHECK-LABEL: define <8 x i8> @test_vraddhn_s16(<8 x i16> %a, <8 x i16> %b) #0 { 7939// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 7940// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 7941// CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 7942// CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 7943// CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> [[VRADDHN_V_I]], <8 x i16> [[VRADDHN_V1_I]]) #4 7944// CHECK: ret <8 x i8> [[VRADDHN_V2_I]] 7945int8x8_t test_vraddhn_s16(int16x8_t a, int16x8_t b) { 7946 return vraddhn_s16(a, b); 7947} 7948 7949// CHECK-LABEL: define <4 x i16> @test_vraddhn_s32(<4 x i32> %a, <4 x i32> %b) #0 { 7950// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 7951// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 7952// CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 7953// CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 7954// CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> [[VRADDHN_V_I]], <4 x i32> [[VRADDHN_V1_I]]) #4 7955// CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8> 7956// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <4 x i16> 7957// CHECK: ret <4 x i16> [[TMP2]] 7958int16x4_t test_vraddhn_s32(int32x4_t a, int32x4_t b) { 7959 return vraddhn_s32(a, b); 7960} 7961 7962// CHECK-LABEL: define <2 x i32> @test_vraddhn_s64(<2 x i64> %a, <2 x i64> %b) #0 { 7963// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 7964// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 7965// CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 7966// CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 7967// CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> [[VRADDHN_V_I]], <2 x i64> [[VRADDHN_V1_I]]) #4 7968// CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8> 7969// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <2 x i32> 7970// CHECK: ret <2 x i32> [[TMP2]] 7971int32x2_t test_vraddhn_s64(int64x2_t a, int64x2_t b) { 7972 return vraddhn_s64(a, b); 7973} 7974 7975// CHECK-LABEL: define <8 x i8> @test_vraddhn_u16(<8 x i16> %a, <8 x i16> %b) #0 { 7976// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 7977// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 7978// CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 7979// CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 7980// CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> [[VRADDHN_V_I]], <8 x i16> [[VRADDHN_V1_I]]) #4 7981// CHECK: ret <8 x i8> [[VRADDHN_V2_I]] 7982uint8x8_t test_vraddhn_u16(uint16x8_t a, uint16x8_t b) { 7983 return vraddhn_u16(a, b); 7984} 7985 7986// CHECK-LABEL: define <4 x i16> @test_vraddhn_u32(<4 x i32> %a, <4 x i32> %b) #0 { 7987// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 7988// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 7989// CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 7990// CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 7991// CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> [[VRADDHN_V_I]], <4 x i32> [[VRADDHN_V1_I]]) #4 7992// CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8> 7993// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <4 x i16> 7994// CHECK: ret <4 x i16> [[TMP2]] 7995uint16x4_t test_vraddhn_u32(uint32x4_t a, uint32x4_t b) { 7996 return vraddhn_u32(a, b); 7997} 7998 7999// CHECK-LABEL: define <2 x i32> @test_vraddhn_u64(<2 x i64> %a, <2 x i64> %b) #0 { 8000// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 8001// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 8002// CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 8003// CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 8004// CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> [[VRADDHN_V_I]], <2 x i64> [[VRADDHN_V1_I]]) #4 8005// CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8> 8006// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <2 x i32> 8007// CHECK: ret <2 x i32> [[TMP2]] 8008uint32x2_t test_vraddhn_u64(uint64x2_t a, uint64x2_t b) { 8009 return vraddhn_u64(a, b); 8010} 8011 8012// CHECK-LABEL: define <16 x i8> @test_vraddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 { 8013// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 8014// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 8015// CHECK: [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 8016// CHECK: [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 8017// CHECK: [[VRADDHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> [[VRADDHN_V_I_I]], <8 x i16> [[VRADDHN_V1_I_I]]) #4 8018// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRADDHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8019// CHECK: ret <16 x i8> [[SHUFFLE_I_I]] 8020int8x16_t test_vraddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) { 8021 return vraddhn_high_s16(r, a, b); 8022} 8023 8024// CHECK-LABEL: define <8 x i16> @test_vraddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 { 8025// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 8026// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 8027// CHECK: [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 8028// CHECK: [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 8029// CHECK: [[VRADDHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> [[VRADDHN_V_I_I]], <4 x i32> [[VRADDHN_V1_I_I]]) #4 8030// CHECK: [[VRADDHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I_I]] to <8 x i8> 8031// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I_I]] to <4 x i16> 8032// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 8033// CHECK: ret <8 x i16> [[SHUFFLE_I_I]] 8034int16x8_t test_vraddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) { 8035 return vraddhn_high_s32(r, a, b); 8036} 8037 8038// CHECK-LABEL: define <4 x i32> @test_vraddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 { 8039// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 8040// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 8041// CHECK: [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 8042// CHECK: [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 8043// CHECK: [[VRADDHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> [[VRADDHN_V_I_I]], <2 x i64> [[VRADDHN_V1_I_I]]) #4 8044// CHECK: [[VRADDHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I_I]] to <8 x i8> 8045// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I_I]] to <2 x i32> 8046// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 8047// CHECK: ret <4 x i32> [[SHUFFLE_I_I]] 8048int32x4_t test_vraddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) { 8049 return vraddhn_high_s64(r, a, b); 8050} 8051 8052// CHECK-LABEL: define <16 x i8> @test_vraddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 { 8053// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 8054// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 8055// CHECK: [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 8056// CHECK: [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 8057// CHECK: [[VRADDHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> [[VRADDHN_V_I_I]], <8 x i16> [[VRADDHN_V1_I_I]]) #4 8058// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRADDHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8059// CHECK: ret <16 x i8> [[SHUFFLE_I_I]] 8060uint8x16_t test_vraddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) { 8061 return vraddhn_high_u16(r, a, b); 8062} 8063 8064// CHECK-LABEL: define <8 x i16> @test_vraddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 { 8065// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 8066// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 8067// CHECK: [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 8068// CHECK: [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 8069// CHECK: [[VRADDHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> [[VRADDHN_V_I_I]], <4 x i32> [[VRADDHN_V1_I_I]]) #4 8070// CHECK: [[VRADDHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I_I]] to <8 x i8> 8071// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I_I]] to <4 x i16> 8072// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 8073// CHECK: ret <8 x i16> [[SHUFFLE_I_I]] 8074uint16x8_t test_vraddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) { 8075 return vraddhn_high_u32(r, a, b); 8076} 8077 8078// CHECK-LABEL: define <4 x i32> @test_vraddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 { 8079// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 8080// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 8081// CHECK: [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 8082// CHECK: [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 8083// CHECK: [[VRADDHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> [[VRADDHN_V_I_I]], <2 x i64> [[VRADDHN_V1_I_I]]) #4 8084// CHECK: [[VRADDHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I_I]] to <8 x i8> 8085// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I_I]] to <2 x i32> 8086// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 8087// CHECK: ret <4 x i32> [[SHUFFLE_I_I]] 8088uint32x4_t test_vraddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) { 8089 return vraddhn_high_u64(r, a, b); 8090} 8091 8092// CHECK-LABEL: define <8 x i8> @test_vsubhn_s16(<8 x i16> %a, <8 x i16> %b) #0 { 8093// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 8094// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 8095// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 8096// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 8097// CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> [[TMP2]], [[TMP3]] 8098// CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 8099// CHECK: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8> 8100// CHECK: ret <8 x i8> [[VSUBHN2_I]] 8101int8x8_t test_vsubhn_s16(int16x8_t a, int16x8_t b) { 8102 return vsubhn_s16(a, b); 8103} 8104 8105// CHECK-LABEL: define <4 x i16> @test_vsubhn_s32(<4 x i32> %a, <4 x i32> %b) #0 { 8106// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 8107// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 8108// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 8109// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 8110// CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]] 8111// CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], <i32 16, i32 16, i32 16, i32 16> 8112// CHECK: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16> 8113// CHECK: ret <4 x i16> [[VSUBHN2_I]] 8114int16x4_t test_vsubhn_s32(int32x4_t a, int32x4_t b) { 8115 return vsubhn_s32(a, b); 8116} 8117 8118// CHECK-LABEL: define <2 x i32> @test_vsubhn_s64(<2 x i64> %a, <2 x i64> %b) #0 { 8119// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 8120// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 8121// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 8122// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 8123// CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP3]] 8124// CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], <i64 32, i64 32> 8125// CHECK: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32> 8126// CHECK: ret <2 x i32> [[VSUBHN2_I]] 8127int32x2_t test_vsubhn_s64(int64x2_t a, int64x2_t b) { 8128 return vsubhn_s64(a, b); 8129} 8130 8131// CHECK-LABEL: define <8 x i8> @test_vsubhn_u16(<8 x i16> %a, <8 x i16> %b) #0 { 8132// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 8133// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 8134// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 8135// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 8136// CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> [[TMP2]], [[TMP3]] 8137// CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 8138// CHECK: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8> 8139// CHECK: ret <8 x i8> [[VSUBHN2_I]] 8140uint8x8_t test_vsubhn_u16(uint16x8_t a, uint16x8_t b) { 8141 return vsubhn_u16(a, b); 8142} 8143 8144// CHECK-LABEL: define <4 x i16> @test_vsubhn_u32(<4 x i32> %a, <4 x i32> %b) #0 { 8145// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 8146// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 8147// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 8148// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 8149// CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]] 8150// CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], <i32 16, i32 16, i32 16, i32 16> 8151// CHECK: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16> 8152// CHECK: ret <4 x i16> [[VSUBHN2_I]] 8153uint16x4_t test_vsubhn_u32(uint32x4_t a, uint32x4_t b) { 8154 return vsubhn_u32(a, b); 8155} 8156 8157// CHECK-LABEL: define <2 x i32> @test_vsubhn_u64(<2 x i64> %a, <2 x i64> %b) #0 { 8158// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 8159// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 8160// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 8161// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 8162// CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP3]] 8163// CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], <i64 32, i64 32> 8164// CHECK: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32> 8165// CHECK: ret <2 x i32> [[VSUBHN2_I]] 8166uint32x2_t test_vsubhn_u64(uint64x2_t a, uint64x2_t b) { 8167 return vsubhn_u64(a, b); 8168} 8169 8170// CHECK-LABEL: define <16 x i8> @test_vsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 { 8171// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 8172// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 8173// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 8174// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 8175// CHECK: [[VSUBHN_I_I:%.*]] = sub <8 x i16> [[TMP2]], [[TMP3]] 8176// CHECK: [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 8177// CHECK: [[VSUBHN2_I_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I_I]] to <8 x i8> 8178// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VSUBHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8179// CHECK: ret <16 x i8> [[SHUFFLE_I_I]] 8180int8x16_t test_vsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) { 8181 return vsubhn_high_s16(r, a, b); 8182} 8183 8184// CHECK-LABEL: define <8 x i16> @test_vsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 { 8185// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 8186// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 8187// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 8188// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 8189// CHECK: [[VSUBHN_I_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]] 8190// CHECK: [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], <i32 16, i32 16, i32 16, i32 16> 8191// CHECK: [[VSUBHN2_I_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I_I]] to <4 x i16> 8192// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VSUBHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 8193// CHECK: ret <8 x i16> [[SHUFFLE_I_I]] 8194int16x8_t test_vsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) { 8195 return vsubhn_high_s32(r, a, b); 8196} 8197 8198// CHECK-LABEL: define <4 x i32> @test_vsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 { 8199// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 8200// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 8201// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 8202// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 8203// CHECK: [[VSUBHN_I_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP3]] 8204// CHECK: [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], <i64 32, i64 32> 8205// CHECK: [[VSUBHN2_I_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I_I]] to <2 x i32> 8206// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VSUBHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 8207// CHECK: ret <4 x i32> [[SHUFFLE_I_I]] 8208int32x4_t test_vsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) { 8209 return vsubhn_high_s64(r, a, b); 8210} 8211 8212// CHECK-LABEL: define <16 x i8> @test_vsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 { 8213// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 8214// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 8215// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 8216// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 8217// CHECK: [[VSUBHN_I_I:%.*]] = sub <8 x i16> [[TMP2]], [[TMP3]] 8218// CHECK: [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 8219// CHECK: [[VSUBHN2_I_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I_I]] to <8 x i8> 8220// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VSUBHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8221// CHECK: ret <16 x i8> [[SHUFFLE_I_I]] 8222uint8x16_t test_vsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) { 8223 return vsubhn_high_u16(r, a, b); 8224} 8225 8226// CHECK-LABEL: define <8 x i16> @test_vsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 { 8227// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 8228// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 8229// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 8230// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 8231// CHECK: [[VSUBHN_I_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]] 8232// CHECK: [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], <i32 16, i32 16, i32 16, i32 16> 8233// CHECK: [[VSUBHN2_I_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I_I]] to <4 x i16> 8234// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VSUBHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 8235// CHECK: ret <8 x i16> [[SHUFFLE_I_I]] 8236uint16x8_t test_vsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) { 8237 return vsubhn_high_u32(r, a, b); 8238} 8239 8240// CHECK-LABEL: define <4 x i32> @test_vsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 { 8241// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 8242// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 8243// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 8244// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 8245// CHECK: [[VSUBHN_I_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP3]] 8246// CHECK: [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], <i64 32, i64 32> 8247// CHECK: [[VSUBHN2_I_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I_I]] to <2 x i32> 8248// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VSUBHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 8249// CHECK: ret <4 x i32> [[SHUFFLE_I_I]] 8250uint32x4_t test_vsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) { 8251 return vsubhn_high_u64(r, a, b); 8252} 8253 8254// CHECK-LABEL: define <8 x i8> @test_vrsubhn_s16(<8 x i16> %a, <8 x i16> %b) #0 { 8255// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 8256// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 8257// CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 8258// CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 8259// CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> [[VRSUBHN_V_I]], <8 x i16> [[VRSUBHN_V1_I]]) #4 8260// CHECK: ret <8 x i8> [[VRSUBHN_V2_I]] 8261int8x8_t test_vrsubhn_s16(int16x8_t a, int16x8_t b) { 8262 return vrsubhn_s16(a, b); 8263} 8264 8265// CHECK-LABEL: define <4 x i16> @test_vrsubhn_s32(<4 x i32> %a, <4 x i32> %b) #0 { 8266// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 8267// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 8268// CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 8269// CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 8270// CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> [[VRSUBHN_V_I]], <4 x i32> [[VRSUBHN_V1_I]]) #4 8271// CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8> 8272// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <4 x i16> 8273// CHECK: ret <4 x i16> [[TMP2]] 8274int16x4_t test_vrsubhn_s32(int32x4_t a, int32x4_t b) { 8275 return vrsubhn_s32(a, b); 8276} 8277 8278// CHECK-LABEL: define <2 x i32> @test_vrsubhn_s64(<2 x i64> %a, <2 x i64> %b) #0 { 8279// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 8280// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 8281// CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 8282// CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 8283// CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> [[VRSUBHN_V_I]], <2 x i64> [[VRSUBHN_V1_I]]) #4 8284// CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8> 8285// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <2 x i32> 8286// CHECK: ret <2 x i32> [[TMP2]] 8287int32x2_t test_vrsubhn_s64(int64x2_t a, int64x2_t b) { 8288 return vrsubhn_s64(a, b); 8289} 8290 8291// CHECK-LABEL: define <8 x i8> @test_vrsubhn_u16(<8 x i16> %a, <8 x i16> %b) #0 { 8292// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 8293// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 8294// CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 8295// CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 8296// CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> [[VRSUBHN_V_I]], <8 x i16> [[VRSUBHN_V1_I]]) #4 8297// CHECK: ret <8 x i8> [[VRSUBHN_V2_I]] 8298uint8x8_t test_vrsubhn_u16(uint16x8_t a, uint16x8_t b) { 8299 return vrsubhn_u16(a, b); 8300} 8301 8302// CHECK-LABEL: define <4 x i16> @test_vrsubhn_u32(<4 x i32> %a, <4 x i32> %b) #0 { 8303// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 8304// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 8305// CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 8306// CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 8307// CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> [[VRSUBHN_V_I]], <4 x i32> [[VRSUBHN_V1_I]]) #4 8308// CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8> 8309// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <4 x i16> 8310// CHECK: ret <4 x i16> [[TMP2]] 8311uint16x4_t test_vrsubhn_u32(uint32x4_t a, uint32x4_t b) { 8312 return vrsubhn_u32(a, b); 8313} 8314 8315// CHECK-LABEL: define <2 x i32> @test_vrsubhn_u64(<2 x i64> %a, <2 x i64> %b) #0 { 8316// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 8317// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 8318// CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 8319// CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 8320// CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> [[VRSUBHN_V_I]], <2 x i64> [[VRSUBHN_V1_I]]) #4 8321// CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8> 8322// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <2 x i32> 8323// CHECK: ret <2 x i32> [[TMP2]] 8324uint32x2_t test_vrsubhn_u64(uint64x2_t a, uint64x2_t b) { 8325 return vrsubhn_u64(a, b); 8326} 8327 8328// CHECK-LABEL: define <16 x i8> @test_vrsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 { 8329// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 8330// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 8331// CHECK: [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 8332// CHECK: [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 8333// CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> [[VRSUBHN_V_I_I]], <8 x i16> [[VRSUBHN_V1_I_I]]) #4 8334// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRSUBHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8335// CHECK: ret <16 x i8> [[SHUFFLE_I_I]] 8336int8x16_t test_vrsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) { 8337 return vrsubhn_high_s16(r, a, b); 8338} 8339 8340// CHECK-LABEL: define <8 x i16> @test_vrsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 { 8341// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 8342// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 8343// CHECK: [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 8344// CHECK: [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 8345// CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> [[VRSUBHN_V_I_I]], <4 x i32> [[VRSUBHN_V1_I_I]]) #4 8346// CHECK: [[VRSUBHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I_I]] to <8 x i8> 8347// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I_I]] to <4 x i16> 8348// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 8349// CHECK: ret <8 x i16> [[SHUFFLE_I_I]] 8350int16x8_t test_vrsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) { 8351 return vrsubhn_high_s32(r, a, b); 8352} 8353 8354// CHECK-LABEL: define <4 x i32> @test_vrsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 { 8355// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 8356// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 8357// CHECK: [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 8358// CHECK: [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 8359// CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> [[VRSUBHN_V_I_I]], <2 x i64> [[VRSUBHN_V1_I_I]]) #4 8360// CHECK: [[VRSUBHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I_I]] to <8 x i8> 8361// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I_I]] to <2 x i32> 8362// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 8363// CHECK: ret <4 x i32> [[SHUFFLE_I_I]] 8364int32x4_t test_vrsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) { 8365 return vrsubhn_high_s64(r, a, b); 8366} 8367 8368// CHECK-LABEL: define <16 x i8> @test_vrsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 { 8369// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 8370// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 8371// CHECK: [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 8372// CHECK: [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 8373// CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> [[VRSUBHN_V_I_I]], <8 x i16> [[VRSUBHN_V1_I_I]]) #4 8374// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRSUBHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8375// CHECK: ret <16 x i8> [[SHUFFLE_I_I]] 8376uint8x16_t test_vrsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) { 8377 return vrsubhn_high_u16(r, a, b); 8378} 8379 8380// CHECK-LABEL: define <8 x i16> @test_vrsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 { 8381// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 8382// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 8383// CHECK: [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 8384// CHECK: [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 8385// CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> [[VRSUBHN_V_I_I]], <4 x i32> [[VRSUBHN_V1_I_I]]) #4 8386// CHECK: [[VRSUBHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I_I]] to <8 x i8> 8387// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I_I]] to <4 x i16> 8388// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 8389// CHECK: ret <8 x i16> [[SHUFFLE_I_I]] 8390uint16x8_t test_vrsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) { 8391 return vrsubhn_high_u32(r, a, b); 8392} 8393 8394// CHECK-LABEL: define <4 x i32> @test_vrsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 { 8395// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 8396// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 8397// CHECK: [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 8398// CHECK: [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 8399// CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> [[VRSUBHN_V_I_I]], <2 x i64> [[VRSUBHN_V1_I_I]]) #4 8400// CHECK: [[VRSUBHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I_I]] to <8 x i8> 8401// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I_I]] to <2 x i32> 8402// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 8403// CHECK: ret <4 x i32> [[SHUFFLE_I_I]] 8404uint32x4_t test_vrsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) { 8405 return vrsubhn_high_u64(r, a, b); 8406} 8407 8408// CHECK-LABEL: define <8 x i16> @test_vabdl_s8(<8 x i8> %a, <8 x i8> %b) #0 { 8409// CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %a, <8 x i8> %b) #4 8410// CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_I_I]] to <8 x i16> 8411// CHECK: ret <8 x i16> [[VMOVL_I_I]] 8412int16x8_t test_vabdl_s8(int8x8_t a, int8x8_t b) { 8413 return vabdl_s8(a, b); 8414} 8415// CHECK-LABEL: define <4 x i32> @test_vabdl_s16(<4 x i16> %a, <4 x i16> %b) #0 { 8416// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 8417// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 8418// CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8419// CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8420// CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I_I]], <4 x i16> [[VABD1_I_I]]) #4 8421// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I]] to <8 x i8> 8422// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 8423// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> 8424// CHECK: ret <4 x i32> [[VMOVL_I_I]] 8425int32x4_t test_vabdl_s16(int16x4_t a, int16x4_t b) { 8426 return vabdl_s16(a, b); 8427} 8428// CHECK-LABEL: define <2 x i64> @test_vabdl_s32(<2 x i32> %a, <2 x i32> %b) #0 { 8429// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 8430// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 8431// CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8432// CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8433// CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I_I]], <2 x i32> [[VABD1_I_I]]) #4 8434// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I]] to <8 x i8> 8435// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 8436// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64> 8437// CHECK: ret <2 x i64> [[VMOVL_I_I]] 8438int64x2_t test_vabdl_s32(int32x2_t a, int32x2_t b) { 8439 return vabdl_s32(a, b); 8440} 8441// CHECK-LABEL: define <8 x i16> @test_vabdl_u8(<8 x i8> %a, <8 x i8> %b) #0 { 8442// CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %a, <8 x i8> %b) #4 8443// CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_I_I]] to <8 x i16> 8444// CHECK: ret <8 x i16> [[VMOVL_I_I]] 8445uint16x8_t test_vabdl_u8(uint8x8_t a, uint8x8_t b) { 8446 return vabdl_u8(a, b); 8447} 8448// CHECK-LABEL: define <4 x i32> @test_vabdl_u16(<4 x i16> %a, <4 x i16> %b) #0 { 8449// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 8450// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 8451// CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8452// CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8453// CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I_I]], <4 x i16> [[VABD1_I_I]]) #4 8454// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I]] to <8 x i8> 8455// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 8456// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> 8457// CHECK: ret <4 x i32> [[VMOVL_I_I]] 8458uint32x4_t test_vabdl_u16(uint16x4_t a, uint16x4_t b) { 8459 return vabdl_u16(a, b); 8460} 8461// CHECK-LABEL: define <2 x i64> @test_vabdl_u32(<2 x i32> %a, <2 x i32> %b) #0 { 8462// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 8463// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 8464// CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8465// CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8466// CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I_I]], <2 x i32> [[VABD1_I_I]]) #4 8467// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I]] to <8 x i8> 8468// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 8469// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64> 8470// CHECK: ret <2 x i64> [[VMOVL_I_I]] 8471uint64x2_t test_vabdl_u32(uint32x2_t a, uint32x2_t b) { 8472 return vabdl_u32(a, b); 8473} 8474 8475// CHECK-LABEL: define <8 x i16> @test_vabal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 { 8476// CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> %c) #4 8477// CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16> 8478// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]] 8479// CHECK: ret <8 x i16> [[ADD_I]] 8480int16x8_t test_vabal_s8(int16x8_t a, int8x8_t b, int8x8_t c) { 8481 return vabal_s8(a, b, c); 8482} 8483// CHECK-LABEL: define <4 x i32> @test_vabal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { 8484// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 8485// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> 8486// CHECK: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8487// CHECK: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8488// CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I_I_I]], <4 x i16> [[VABD1_I_I_I]]) #4 8489// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8> 8490// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 8491// CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> 8492// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]] 8493// CHECK: ret <4 x i32> [[ADD_I]] 8494int32x4_t test_vabal_s16(int32x4_t a, int16x4_t b, int16x4_t c) { 8495 return vabal_s16(a, b, c); 8496} 8497// CHECK-LABEL: define <2 x i64> @test_vabal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { 8498// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 8499// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> 8500// CHECK: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8501// CHECK: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8502// CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I_I_I]], <2 x i32> [[VABD1_I_I_I]]) #4 8503// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8> 8504// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 8505// CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64> 8506// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]] 8507// CHECK: ret <2 x i64> [[ADD_I]] 8508int64x2_t test_vabal_s32(int64x2_t a, int32x2_t b, int32x2_t c) { 8509 return vabal_s32(a, b, c); 8510} 8511// CHECK-LABEL: define <8 x i16> @test_vabal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 { 8512// CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %b, <8 x i8> %c) #4 8513// CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16> 8514// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]] 8515// CHECK: ret <8 x i16> [[ADD_I]] 8516uint16x8_t test_vabal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) { 8517 return vabal_u8(a, b, c); 8518} 8519// CHECK-LABEL: define <4 x i32> @test_vabal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { 8520// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 8521// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> 8522// CHECK: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8523// CHECK: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8524// CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I_I_I]], <4 x i16> [[VABD1_I_I_I]]) #4 8525// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8> 8526// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 8527// CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> 8528// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]] 8529// CHECK: ret <4 x i32> [[ADD_I]] 8530uint32x4_t test_vabal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { 8531 return vabal_u16(a, b, c); 8532} 8533// CHECK-LABEL: define <2 x i64> @test_vabal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { 8534// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 8535// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> 8536// CHECK: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8537// CHECK: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8538// CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I_I_I]], <2 x i32> [[VABD1_I_I_I]]) #4 8539// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8> 8540// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 8541// CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64> 8542// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]] 8543// CHECK: ret <2 x i64> [[ADD_I]] 8544uint64x2_t test_vabal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { 8545 return vabal_u32(a, b, c); 8546} 8547 8548// CHECK-LABEL: define <8 x i16> @test_vabdl_high_s8(<16 x i8> %a, <16 x i8> %b) #0 { 8549// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8550// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8551// CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4 8552// CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16> 8553// CHECK: ret <8 x i16> [[VMOVL_I_I_I]] 8554int16x8_t test_vabdl_high_s8(int8x16_t a, int8x16_t b) { 8555 return vabdl_high_s8(a, b); 8556} 8557// CHECK-LABEL: define <4 x i32> @test_vabdl_high_s16(<8 x i16> %a, <8 x i16> %b) #0 { 8558// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8559// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8560// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 8561// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> 8562// CHECK: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8563// CHECK: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8564// CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I_I_I]], <4 x i16> [[VABD1_I_I_I]]) #4 8565// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8> 8566// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 8567// CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> 8568// CHECK: ret <4 x i32> [[VMOVL_I_I_I]] 8569int32x4_t test_vabdl_high_s16(int16x8_t a, int16x8_t b) { 8570 return vabdl_high_s16(a, b); 8571} 8572// CHECK-LABEL: define <2 x i64> @test_vabdl_high_s32(<4 x i32> %a, <4 x i32> %b) #0 { 8573// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 8574// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 8575// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 8576// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> 8577// CHECK: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8578// CHECK: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8579// CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I_I_I]], <2 x i32> [[VABD1_I_I_I]]) #4 8580// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8> 8581// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 8582// CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64> 8583// CHECK: ret <2 x i64> [[VMOVL_I_I_I]] 8584int64x2_t test_vabdl_high_s32(int32x4_t a, int32x4_t b) { 8585 return vabdl_high_s32(a, b); 8586} 8587// CHECK-LABEL: define <8 x i16> @test_vabdl_high_u8(<16 x i8> %a, <16 x i8> %b) #0 { 8588// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8589// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8590// CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4 8591// CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16> 8592// CHECK: ret <8 x i16> [[VMOVL_I_I_I]] 8593uint16x8_t test_vabdl_high_u8(uint8x16_t a, uint8x16_t b) { 8594 return vabdl_high_u8(a, b); 8595} 8596// CHECK-LABEL: define <4 x i32> @test_vabdl_high_u16(<8 x i16> %a, <8 x i16> %b) #0 { 8597// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8598// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8599// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 8600// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> 8601// CHECK: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8602// CHECK: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8603// CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I_I_I]], <4 x i16> [[VABD1_I_I_I]]) #4 8604// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8> 8605// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 8606// CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> 8607// CHECK: ret <4 x i32> [[VMOVL_I_I_I]] 8608uint32x4_t test_vabdl_high_u16(uint16x8_t a, uint16x8_t b) { 8609 return vabdl_high_u16(a, b); 8610} 8611// CHECK-LABEL: define <2 x i64> @test_vabdl_high_u32(<4 x i32> %a, <4 x i32> %b) #0 { 8612// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 8613// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 8614// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 8615// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> 8616// CHECK: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8617// CHECK: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8618// CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I_I_I]], <2 x i32> [[VABD1_I_I_I]]) #4 8619// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8> 8620// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 8621// CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64> 8622// CHECK: ret <2 x i64> [[VMOVL_I_I_I]] 8623uint64x2_t test_vabdl_high_u32(uint32x4_t a, uint32x4_t b) { 8624 return vabdl_high_u32(a, b); 8625} 8626 8627// CHECK-LABEL: define <8 x i16> @test_vabal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) #0 { 8628// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8629// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8630// CHECK: [[VABD_I_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4 8631// CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I_I]] to <8 x i16> 8632// CHECK: [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I_I]] 8633// CHECK: ret <8 x i16> [[ADD_I_I]] 8634int16x8_t test_vabal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) { 8635 return vabal_high_s8(a, b, c); 8636} 8637// CHECK-LABEL: define <4 x i32> @test_vabal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 { 8638// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8639// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8640// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 8641// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> 8642// CHECK: [[VABD_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8643// CHECK: [[VABD1_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8644// CHECK: [[VABD2_I_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I_I_I_I]], <4 x i16> [[VABD1_I_I_I_I]]) #4 8645// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I_I]] to <8 x i8> 8646// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 8647// CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> 8648// CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I_I]] 8649// CHECK: ret <4 x i32> [[ADD_I_I]] 8650int32x4_t test_vabal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) { 8651 return vabal_high_s16(a, b, c); 8652} 8653// CHECK-LABEL: define <2 x i64> @test_vabal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 { 8654// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 8655// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3> 8656// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 8657// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> 8658// CHECK: [[VABD_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8659// CHECK: [[VABD1_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8660// CHECK: [[VABD2_I_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I_I_I_I]], <2 x i32> [[VABD1_I_I_I_I]]) #4 8661// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I_I]] to <8 x i8> 8662// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 8663// CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64> 8664// CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I_I]] 8665// CHECK: ret <2 x i64> [[ADD_I_I]] 8666int64x2_t test_vabal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) { 8667 return vabal_high_s32(a, b, c); 8668} 8669// CHECK-LABEL: define <8 x i16> @test_vabal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) #0 { 8670// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8671// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8672// CHECK: [[VABD_I_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4 8673// CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I_I]] to <8 x i16> 8674// CHECK: [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I_I]] 8675// CHECK: ret <8 x i16> [[ADD_I_I]] 8676uint16x8_t test_vabal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) { 8677 return vabal_high_u8(a, b, c); 8678} 8679// CHECK-LABEL: define <4 x i32> @test_vabal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 { 8680// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8681// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8682// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 8683// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> 8684// CHECK: [[VABD_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8685// CHECK: [[VABD1_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8686// CHECK: [[VABD2_I_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I_I_I_I]], <4 x i16> [[VABD1_I_I_I_I]]) #4 8687// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I_I]] to <8 x i8> 8688// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 8689// CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> 8690// CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I_I]] 8691// CHECK: ret <4 x i32> [[ADD_I_I]] 8692uint32x4_t test_vabal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) { 8693 return vabal_high_u16(a, b, c); 8694} 8695// CHECK-LABEL: define <2 x i64> @test_vabal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 { 8696// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 8697// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3> 8698// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 8699// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> 8700// CHECK: [[VABD_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8701// CHECK: [[VABD1_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8702// CHECK: [[VABD2_I_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I_I_I_I]], <2 x i32> [[VABD1_I_I_I_I]]) #4 8703// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I_I]] to <8 x i8> 8704// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 8705// CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64> 8706// CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I_I]] 8707// CHECK: ret <2 x i64> [[ADD_I_I]] 8708uint64x2_t test_vabal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) { 8709 return vabal_high_u32(a, b, c); 8710} 8711 8712// CHECK-LABEL: define <8 x i16> @test_vmull_s8(<8 x i8> %a, <8 x i8> %b) #0 { 8713// CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %a, <8 x i8> %b) #4 8714// CHECK: ret <8 x i16> [[VMULL_I]] 8715int16x8_t test_vmull_s8(int8x8_t a, int8x8_t b) { 8716 return vmull_s8(a, b); 8717} 8718// CHECK-LABEL: define <4 x i32> @test_vmull_s16(<4 x i16> %a, <4 x i16> %b) #0 { 8719// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 8720// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 8721// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8722// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8723// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4 8724// CHECK: ret <4 x i32> [[VMULL2_I]] 8725int32x4_t test_vmull_s16(int16x4_t a, int16x4_t b) { 8726 return vmull_s16(a, b); 8727} 8728// CHECK-LABEL: define <2 x i64> @test_vmull_s32(<2 x i32> %a, <2 x i32> %b) #0 { 8729// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 8730// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 8731// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8732// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8733// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4 8734// CHECK: ret <2 x i64> [[VMULL2_I]] 8735int64x2_t test_vmull_s32(int32x2_t a, int32x2_t b) { 8736 return vmull_s32(a, b); 8737} 8738// CHECK-LABEL: define <8 x i16> @test_vmull_u8(<8 x i8> %a, <8 x i8> %b) #0 { 8739// CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %a, <8 x i8> %b) #4 8740// CHECK: ret <8 x i16> [[VMULL_I]] 8741uint16x8_t test_vmull_u8(uint8x8_t a, uint8x8_t b) { 8742 return vmull_u8(a, b); 8743} 8744// CHECK-LABEL: define <4 x i32> @test_vmull_u16(<4 x i16> %a, <4 x i16> %b) #0 { 8745// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 8746// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 8747// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8748// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8749// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4 8750// CHECK: ret <4 x i32> [[VMULL2_I]] 8751uint32x4_t test_vmull_u16(uint16x4_t a, uint16x4_t b) { 8752 return vmull_u16(a, b); 8753} 8754// CHECK-LABEL: define <2 x i64> @test_vmull_u32(<2 x i32> %a, <2 x i32> %b) #0 { 8755// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 8756// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 8757// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8758// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8759// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4 8760// CHECK: ret <2 x i64> [[VMULL2_I]] 8761uint64x2_t test_vmull_u32(uint32x2_t a, uint32x2_t b) { 8762 return vmull_u32(a, b); 8763} 8764 8765// CHECK-LABEL: define <8 x i16> @test_vmull_high_s8(<16 x i8> %a, <16 x i8> %b) #0 { 8766// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8767// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8768// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4 8769// CHECK: ret <8 x i16> [[VMULL_I_I]] 8770int16x8_t test_vmull_high_s8(int8x16_t a, int8x16_t b) { 8771 return vmull_high_s8(a, b); 8772} 8773// CHECK-LABEL: define <4 x i32> @test_vmull_high_s16(<8 x i16> %a, <8 x i16> %b) #0 { 8774// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8775// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8776// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 8777// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> 8778// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8779// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8780// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4 8781// CHECK: ret <4 x i32> [[VMULL2_I_I]] 8782int32x4_t test_vmull_high_s16(int16x8_t a, int16x8_t b) { 8783 return vmull_high_s16(a, b); 8784} 8785// CHECK-LABEL: define <2 x i64> @test_vmull_high_s32(<4 x i32> %a, <4 x i32> %b) #0 { 8786// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 8787// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 8788// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 8789// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> 8790// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8791// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8792// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4 8793// CHECK: ret <2 x i64> [[VMULL2_I_I]] 8794int64x2_t test_vmull_high_s32(int32x4_t a, int32x4_t b) { 8795 return vmull_high_s32(a, b); 8796} 8797// CHECK-LABEL: define <8 x i16> @test_vmull_high_u8(<16 x i8> %a, <16 x i8> %b) #0 { 8798// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8799// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8800// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4 8801// CHECK: ret <8 x i16> [[VMULL_I_I]] 8802uint16x8_t test_vmull_high_u8(uint8x16_t a, uint8x16_t b) { 8803 return vmull_high_u8(a, b); 8804} 8805// CHECK-LABEL: define <4 x i32> @test_vmull_high_u16(<8 x i16> %a, <8 x i16> %b) #0 { 8806// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8807// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8808// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 8809// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> 8810// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8811// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8812// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4 8813// CHECK: ret <4 x i32> [[VMULL2_I_I]] 8814uint32x4_t test_vmull_high_u16(uint16x8_t a, uint16x8_t b) { 8815 return vmull_high_u16(a, b); 8816} 8817// CHECK-LABEL: define <2 x i64> @test_vmull_high_u32(<4 x i32> %a, <4 x i32> %b) #0 { 8818// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 8819// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 8820// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 8821// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> 8822// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8823// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8824// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4 8825// CHECK: ret <2 x i64> [[VMULL2_I_I]] 8826uint64x2_t test_vmull_high_u32(uint32x4_t a, uint32x4_t b) { 8827 return vmull_high_u32(a, b); 8828} 8829 8830// CHECK-LABEL: define <8 x i16> @test_vmlal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 { 8831// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c) #4 8832// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]] 8833// CHECK: ret <8 x i16> [[ADD_I]] 8834int16x8_t test_vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c) { 8835 return vmlal_s8(a, b, c); 8836} 8837// CHECK-LABEL: define <4 x i32> @test_vmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { 8838// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 8839// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> 8840// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8841// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8842// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4 8843// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]] 8844// CHECK: ret <4 x i32> [[ADD_I]] 8845int32x4_t test_vmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) { 8846 return vmlal_s16(a, b, c); 8847} 8848// CHECK-LABEL: define <2 x i64> @test_vmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { 8849// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 8850// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> 8851// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8852// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8853// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4 8854// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]] 8855// CHECK: ret <2 x i64> [[ADD_I]] 8856int64x2_t test_vmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) { 8857 return vmlal_s32(a, b, c); 8858} 8859// CHECK-LABEL: define <8 x i16> @test_vmlal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 { 8860// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c) #4 8861// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]] 8862// CHECK: ret <8 x i16> [[ADD_I]] 8863uint16x8_t test_vmlal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) { 8864 return vmlal_u8(a, b, c); 8865} 8866// CHECK-LABEL: define <4 x i32> @test_vmlal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { 8867// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 8868// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> 8869// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8870// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8871// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4 8872// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]] 8873// CHECK: ret <4 x i32> [[ADD_I]] 8874uint32x4_t test_vmlal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { 8875 return vmlal_u16(a, b, c); 8876} 8877// CHECK-LABEL: define <2 x i64> @test_vmlal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { 8878// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 8879// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> 8880// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8881// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8882// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4 8883// CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]] 8884// CHECK: ret <2 x i64> [[ADD_I]] 8885uint64x2_t test_vmlal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { 8886 return vmlal_u32(a, b, c); 8887} 8888 8889// CHECK-LABEL: define <8 x i16> @test_vmlal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) #0 { 8890// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8891// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8892// CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4 8893// CHECK: [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I_I]] 8894// CHECK: ret <8 x i16> [[ADD_I_I]] 8895int16x8_t test_vmlal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) { 8896 return vmlal_high_s8(a, b, c); 8897} 8898// CHECK-LABEL: define <4 x i32> @test_vmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 { 8899// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8900// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8901// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 8902// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> 8903// CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8904// CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8905// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I_I]], <4 x i16> [[VMULL1_I_I_I]]) #4 8906// CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]] 8907// CHECK: ret <4 x i32> [[ADD_I_I]] 8908int32x4_t test_vmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) { 8909 return vmlal_high_s16(a, b, c); 8910} 8911// CHECK-LABEL: define <2 x i64> @test_vmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 { 8912// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 8913// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3> 8914// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 8915// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> 8916// CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8917// CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8918// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I_I]], <2 x i32> [[VMULL1_I_I_I]]) #4 8919// CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]] 8920// CHECK: ret <2 x i64> [[ADD_I_I]] 8921int64x2_t test_vmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) { 8922 return vmlal_high_s32(a, b, c); 8923} 8924// CHECK-LABEL: define <8 x i16> @test_vmlal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) #0 { 8925// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8926// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8927// CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4 8928// CHECK: [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I_I]] 8929// CHECK: ret <8 x i16> [[ADD_I_I]] 8930uint16x8_t test_vmlal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) { 8931 return vmlal_high_u8(a, b, c); 8932} 8933// CHECK-LABEL: define <4 x i32> @test_vmlal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 { 8934// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8935// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8936// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 8937// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> 8938// CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8939// CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8940// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I_I]], <4 x i16> [[VMULL1_I_I_I]]) #4 8941// CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]] 8942// CHECK: ret <4 x i32> [[ADD_I_I]] 8943uint32x4_t test_vmlal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) { 8944 return vmlal_high_u16(a, b, c); 8945} 8946// CHECK-LABEL: define <2 x i64> @test_vmlal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 { 8947// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 8948// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3> 8949// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 8950// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> 8951// CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8952// CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8953// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I_I]], <2 x i32> [[VMULL1_I_I_I]]) #4 8954// CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]] 8955// CHECK: ret <2 x i64> [[ADD_I_I]] 8956uint64x2_t test_vmlal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) { 8957 return vmlal_high_u32(a, b, c); 8958} 8959 8960// CHECK-LABEL: define <8 x i16> @test_vmlsl_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 { 8961// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c) #4 8962// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]] 8963// CHECK: ret <8 x i16> [[SUB_I]] 8964int16x8_t test_vmlsl_s8(int16x8_t a, int8x8_t b, int8x8_t c) { 8965 return vmlsl_s8(a, b, c); 8966} 8967// CHECK-LABEL: define <4 x i32> @test_vmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { 8968// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 8969// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> 8970// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8971// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8972// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4 8973// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]] 8974// CHECK: ret <4 x i32> [[SUB_I]] 8975int32x4_t test_vmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) { 8976 return vmlsl_s16(a, b, c); 8977} 8978// CHECK-LABEL: define <2 x i64> @test_vmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { 8979// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 8980// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> 8981// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8982// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8983// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4 8984// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]] 8985// CHECK: ret <2 x i64> [[SUB_I]] 8986int64x2_t test_vmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) { 8987 return vmlsl_s32(a, b, c); 8988} 8989// CHECK-LABEL: define <8 x i16> @test_vmlsl_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 { 8990// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c) #4 8991// CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]] 8992// CHECK: ret <8 x i16> [[SUB_I]] 8993uint16x8_t test_vmlsl_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) { 8994 return vmlsl_u8(a, b, c); 8995} 8996// CHECK-LABEL: define <4 x i32> @test_vmlsl_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { 8997// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 8998// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> 8999// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 9000// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 9001// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4 9002// CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]] 9003// CHECK: ret <4 x i32> [[SUB_I]] 9004uint32x4_t test_vmlsl_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { 9005 return vmlsl_u16(a, b, c); 9006} 9007// CHECK-LABEL: define <2 x i64> @test_vmlsl_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { 9008// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 9009// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> 9010// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 9011// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 9012// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4 9013// CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]] 9014// CHECK: ret <2 x i64> [[SUB_I]] 9015uint64x2_t test_vmlsl_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { 9016 return vmlsl_u32(a, b, c); 9017} 9018 9019// CHECK-LABEL: define <8 x i16> @test_vmlsl_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) #0 { 9020// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9021// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9022// CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4 9023// CHECK: [[SUB_I_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I_I]] 9024// CHECK: ret <8 x i16> [[SUB_I_I]] 9025int16x8_t test_vmlsl_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) { 9026 return vmlsl_high_s8(a, b, c); 9027} 9028// CHECK-LABEL: define <4 x i32> @test_vmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 { 9029// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 9030// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 9031// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 9032// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> 9033// CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 9034// CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 9035// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I_I]], <4 x i16> [[VMULL1_I_I_I]]) #4 9036// CHECK: [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]] 9037// CHECK: ret <4 x i32> [[SUB_I_I]] 9038int32x4_t test_vmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) { 9039 return vmlsl_high_s16(a, b, c); 9040} 9041// CHECK-LABEL: define <2 x i64> @test_vmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 { 9042// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 9043// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3> 9044// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 9045// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> 9046// CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 9047// CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 9048// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I_I]], <2 x i32> [[VMULL1_I_I_I]]) #4 9049// CHECK: [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]] 9050// CHECK: ret <2 x i64> [[SUB_I_I]] 9051int64x2_t test_vmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) { 9052 return vmlsl_high_s32(a, b, c); 9053} 9054// CHECK-LABEL: define <8 x i16> @test_vmlsl_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) #0 { 9055// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9056// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9057// CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4 9058// CHECK: [[SUB_I_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I_I]] 9059// CHECK: ret <8 x i16> [[SUB_I_I]] 9060uint16x8_t test_vmlsl_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) { 9061 return vmlsl_high_u8(a, b, c); 9062} 9063// CHECK-LABEL: define <4 x i32> @test_vmlsl_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 { 9064// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 9065// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 9066// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 9067// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> 9068// CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 9069// CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 9070// CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I_I]], <4 x i16> [[VMULL1_I_I_I]]) #4 9071// CHECK: [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]] 9072// CHECK: ret <4 x i32> [[SUB_I_I]] 9073uint32x4_t test_vmlsl_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) { 9074 return vmlsl_high_u16(a, b, c); 9075} 9076// CHECK-LABEL: define <2 x i64> @test_vmlsl_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 { 9077// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 9078// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3> 9079// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 9080// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> 9081// CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 9082// CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 9083// CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I_I]], <2 x i32> [[VMULL1_I_I_I]]) #4 9084// CHECK: [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]] 9085// CHECK: ret <2 x i64> [[SUB_I_I]] 9086uint64x2_t test_vmlsl_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) { 9087 return vmlsl_high_u32(a, b, c); 9088} 9089 9090// CHECK-LABEL: define <4 x i32> @test_vqdmull_s16(<4 x i16> %a, <4 x i16> %b) #0 { 9091// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 9092// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 9093// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 9094// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 9095// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]]) #4 9096// CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> 9097// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32> 9098// CHECK: ret <4 x i32> [[TMP2]] 9099int32x4_t test_vqdmull_s16(int16x4_t a, int16x4_t b) { 9100 return vqdmull_s16(a, b); 9101} 9102// CHECK-LABEL: define <2 x i64> @test_vqdmull_s32(<2 x i32> %a, <2 x i32> %b) #0 { 9103// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 9104// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 9105// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 9106// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 9107// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]]) #4 9108// CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> 9109// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64> 9110// CHECK: ret <2 x i64> [[TMP2]] 9111int64x2_t test_vqdmull_s32(int32x2_t a, int32x2_t b) { 9112 return vqdmull_s32(a, b); 9113} 9114 9115// CHECK-LABEL: define <4 x i32> @test_vqdmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { 9116// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 9117// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 9118// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8> 9119// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 9120// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 9121// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #4 9122// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 9123// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #4 9124// CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] 9125int32x4_t test_vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) { 9126 return vqdmlal_s16(a, b, c); 9127} 9128 9129// CHECK-LABEL: define <2 x i64> @test_vqdmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { 9130// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 9131// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 9132// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8> 9133// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 9134// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 9135// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #4 9136// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 9137// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #4 9138// CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] 9139int64x2_t test_vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) { 9140 return vqdmlal_s32(a, b, c); 9141} 9142 9143// CHECK-LABEL: define <4 x i32> @test_vqdmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { 9144// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 9145// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 9146// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8> 9147// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 9148// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 9149// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #4 9150// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 9151// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #4 9152// CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] 9153int32x4_t test_vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) { 9154 return vqdmlsl_s16(a, b, c); 9155} 9156 9157// CHECK-LABEL: define <2 x i64> @test_vqdmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { 9158// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 9159// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 9160// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8> 9161// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 9162// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 9163// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #4 9164// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 9165// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #4 9166// CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] 9167int64x2_t test_vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) { 9168 return vqdmlsl_s32(a, b, c); 9169} 9170 9171// CHECK-LABEL: define <4 x i32> @test_vqdmull_high_s16(<8 x i16> %a, <8 x i16> %b) #0 { 9172// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 9173// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 9174// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 9175// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> 9176// CHECK: [[VQDMULL_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 9177// CHECK: [[VQDMULL_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 9178// CHECK: [[VQDMULL_V2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I_I]], <4 x i16> [[VQDMULL_V1_I_I]]) #4 9179// CHECK: [[VQDMULL_V3_I_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I_I]] to <16 x i8> 9180// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I_I]] to <4 x i32> 9181// CHECK: ret <4 x i32> [[TMP2]] 9182int32x4_t test_vqdmull_high_s16(int16x8_t a, int16x8_t b) { 9183 return vqdmull_high_s16(a, b); 9184} 9185// CHECK-LABEL: define <2 x i64> @test_vqdmull_high_s32(<4 x i32> %a, <4 x i32> %b) #0 { 9186// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 9187// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 9188// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 9189// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> 9190// CHECK: [[VQDMULL_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 9191// CHECK: [[VQDMULL_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 9192// CHECK: [[VQDMULL_V2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I_I]], <2 x i32> [[VQDMULL_V1_I_I]]) #4 9193// CHECK: [[VQDMULL_V3_I_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I_I]] to <16 x i8> 9194// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I_I]] to <2 x i64> 9195// CHECK: ret <2 x i64> [[TMP2]] 9196int64x2_t test_vqdmull_high_s32(int32x4_t a, int32x4_t b) { 9197 return vqdmull_high_s32(a, b); 9198} 9199 9200// CHECK-LABEL: define <4 x i32> @test_vqdmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 { 9201// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 9202// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 9203// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 9204// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 9205// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> 9206// CHECK: [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 9207// CHECK: [[VQDMLAL1_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 9208// CHECK: [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I_I]], <4 x i16> [[VQDMLAL1_I_I]]) #4 9209// CHECK: [[VQDMLAL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 9210// CHECK: [[VQDMLAL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I_I]], <4 x i32> [[VQDMLAL2_I_I]]) #4 9211// CHECK: ret <4 x i32> [[VQDMLAL_V3_I_I]] 9212int32x4_t test_vqdmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) { 9213 return vqdmlal_high_s16(a, b, c); 9214} 9215 9216// CHECK-LABEL: define <2 x i64> @test_vqdmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 { 9217// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 9218// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3> 9219// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 9220// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 9221// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> 9222// CHECK: [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 9223// CHECK: [[VQDMLAL1_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 9224// CHECK: [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I_I]], <2 x i32> [[VQDMLAL1_I_I]]) #4 9225// CHECK: [[VQDMLAL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 9226// CHECK: [[VQDMLAL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I_I]], <2 x i64> [[VQDMLAL2_I_I]]) #4 9227// CHECK: ret <2 x i64> [[VQDMLAL_V3_I_I]] 9228int64x2_t test_vqdmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) { 9229 return vqdmlal_high_s32(a, b, c); 9230} 9231 9232// CHECK-LABEL: define <4 x i32> @test_vqdmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 { 9233// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 9234// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 9235// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 9236// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 9237// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> 9238// CHECK: [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 9239// CHECK: [[VQDMLAL1_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 9240// CHECK: [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I_I]], <4 x i16> [[VQDMLAL1_I_I]]) #4 9241// CHECK: [[VQDMLSL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 9242// CHECK: [[VQDMLSL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I_I]], <4 x i32> [[VQDMLAL2_I_I]]) #4 9243// CHECK: ret <4 x i32> [[VQDMLSL_V3_I_I]] 9244int32x4_t test_vqdmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) { 9245 return vqdmlsl_high_s16(a, b, c); 9246} 9247 9248// CHECK-LABEL: define <2 x i64> @test_vqdmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 { 9249// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 9250// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3> 9251// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 9252// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 9253// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> 9254// CHECK: [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 9255// CHECK: [[VQDMLAL1_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 9256// CHECK: [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I_I]], <2 x i32> [[VQDMLAL1_I_I]]) #4 9257// CHECK: [[VQDMLSL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 9258// CHECK: [[VQDMLSL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I_I]], <2 x i64> [[VQDMLAL2_I_I]]) #4 9259// CHECK: ret <2 x i64> [[VQDMLSL_V3_I_I]] 9260int64x2_t test_vqdmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) { 9261 return vqdmlsl_high_s32(a, b, c); 9262} 9263 9264// CHECK-LABEL: define <8 x i16> @test_vmull_p8(<8 x i8> %a, <8 x i8> %b) #0 { 9265// CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %a, <8 x i8> %b) #4 9266// CHECK: ret <8 x i16> [[VMULL_I]] 9267poly16x8_t test_vmull_p8(poly8x8_t a, poly8x8_t b) { 9268 return vmull_p8(a, b); 9269} 9270 9271// CHECK-LABEL: define <8 x i16> @test_vmull_high_p8(<16 x i8> %a, <16 x i8> %b) #0 { 9272// CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9273// CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9274// CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4 9275// CHECK: ret <8 x i16> [[VMULL_I_I]] 9276poly16x8_t test_vmull_high_p8(poly8x16_t a, poly8x16_t b) { 9277 return vmull_high_p8(a, b); 9278} 9279 9280// CHECK-LABEL: define i64 @test_vaddd_s64(i64 %a, i64 %b) #0 { 9281// CHECK: [[VADDD_I:%.*]] = add i64 %a, %b 9282// CHECK: ret i64 [[VADDD_I]] 9283int64_t test_vaddd_s64(int64_t a, int64_t b) { 9284 return vaddd_s64(a, b); 9285} 9286 9287// CHECK-LABEL: define i64 @test_vaddd_u64(i64 %a, i64 %b) #0 { 9288// CHECK: [[VADDD_I:%.*]] = add i64 %a, %b 9289// CHECK: ret i64 [[VADDD_I]] 9290uint64_t test_vaddd_u64(uint64_t a, uint64_t b) { 9291 return vaddd_u64(a, b); 9292} 9293 9294// CHECK-LABEL: define i64 @test_vsubd_s64(i64 %a, i64 %b) #0 { 9295// CHECK: [[VSUBD_I:%.*]] = sub i64 %a, %b 9296// CHECK: ret i64 [[VSUBD_I]] 9297int64_t test_vsubd_s64(int64_t a, int64_t b) { 9298 return vsubd_s64(a, b); 9299} 9300 9301// CHECK-LABEL: define i64 @test_vsubd_u64(i64 %a, i64 %b) #0 { 9302// CHECK: [[VSUBD_I:%.*]] = sub i64 %a, %b 9303// CHECK: ret i64 [[VSUBD_I]] 9304uint64_t test_vsubd_u64(uint64_t a, uint64_t b) { 9305 return vsubd_u64(a, b); 9306} 9307 9308// CHECK-LABEL: define i8 @test_vqaddb_s8(i8 %a, i8 %b) #0 { 9309// CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 9310// CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0 9311// CHECK: [[VQADDB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4 9312// CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQADDB_S8_I]], i64 0 9313// CHECK: ret i8 [[TMP2]] 9314int8_t test_vqaddb_s8(int8_t a, int8_t b) { 9315 return vqaddb_s8(a, b); 9316} 9317 9318// CHECK-LABEL: define i16 @test_vqaddh_s16(i16 %a, i16 %b) #0 { 9319// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 9320// CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 9321// CHECK: [[VQADDH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 9322// CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQADDH_S16_I]], i64 0 9323// CHECK: ret i16 [[TMP2]] 9324int16_t test_vqaddh_s16(int16_t a, int16_t b) { 9325 return vqaddh_s16(a, b); 9326} 9327 9328// CHECK-LABEL: define i32 @test_vqadds_s32(i32 %a, i32 %b) #0 { 9329// CHECK: [[VQADDS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 %b) #4 9330// CHECK: ret i32 [[VQADDS_S32_I]] 9331int32_t test_vqadds_s32(int32_t a, int32_t b) { 9332 return vqadds_s32(a, b); 9333} 9334 9335// CHECK-LABEL: define i64 @test_vqaddd_s64(i64 %a, i64 %b) #0 { 9336// CHECK: [[VQADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 %b) #4 9337// CHECK: ret i64 [[VQADDD_S64_I]] 9338int64_t test_vqaddd_s64(int64_t a, int64_t b) { 9339 return vqaddd_s64(a, b); 9340} 9341 9342// CHECK-LABEL: define i8 @test_vqaddb_u8(i8 %a, i8 %b) #0 { 9343// CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 9344// CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0 9345// CHECK: [[VQADDB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4 9346// CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQADDB_U8_I]], i64 0 9347// CHECK: ret i8 [[TMP2]] 9348uint8_t test_vqaddb_u8(uint8_t a, uint8_t b) { 9349 return vqaddb_u8(a, b); 9350} 9351 9352// CHECK-LABEL: define i16 @test_vqaddh_u16(i16 %a, i16 %b) #0 { 9353// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 9354// CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 9355// CHECK: [[VQADDH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 9356// CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQADDH_U16_I]], i64 0 9357// CHECK: ret i16 [[TMP2]] 9358uint16_t test_vqaddh_u16(uint16_t a, uint16_t b) { 9359 return vqaddh_u16(a, b); 9360} 9361 9362// CHECK-LABEL: define i32 @test_vqadds_u32(i32 %a, i32 %b) #0 { 9363// CHECK: [[VQADDS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqadd.i32(i32 %a, i32 %b) #4 9364// CHECK: ret i32 [[VQADDS_U32_I]] 9365uint32_t test_vqadds_u32(uint32_t a, uint32_t b) { 9366 return vqadds_u32(a, b); 9367} 9368 9369// CHECK-LABEL: define i64 @test_vqaddd_u64(i64 %a, i64 %b) #0 { 9370// CHECK: [[VQADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqadd.i64(i64 %a, i64 %b) #4 9371// CHECK: ret i64 [[VQADDD_U64_I]] 9372uint64_t test_vqaddd_u64(uint64_t a, uint64_t b) { 9373 return vqaddd_u64(a, b); 9374} 9375 9376// CHECK-LABEL: define i8 @test_vqsubb_s8(i8 %a, i8 %b) #0 { 9377// CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 9378// CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0 9379// CHECK: [[VQSUBB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4 9380// CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQSUBB_S8_I]], i64 0 9381// CHECK: ret i8 [[TMP2]] 9382int8_t test_vqsubb_s8(int8_t a, int8_t b) { 9383 return vqsubb_s8(a, b); 9384} 9385 9386// CHECK-LABEL: define i16 @test_vqsubh_s16(i16 %a, i16 %b) #0 { 9387// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 9388// CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 9389// CHECK: [[VQSUBH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 9390// CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQSUBH_S16_I]], i64 0 9391// CHECK: ret i16 [[TMP2]] 9392int16_t test_vqsubh_s16(int16_t a, int16_t b) { 9393 return vqsubh_s16(a, b); 9394} 9395 9396// CHECK-LABEL: define i32 @test_vqsubs_s32(i32 %a, i32 %b) #0 { 9397// CHECK: [[VQSUBS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 %b) #4 9398// CHECK: ret i32 [[VQSUBS_S32_I]] 9399int32_t test_vqsubs_s32(int32_t a, int32_t b) { 9400 return vqsubs_s32(a, b); 9401} 9402 9403// CHECK-LABEL: define i64 @test_vqsubd_s64(i64 %a, i64 %b) #0 { 9404// CHECK: [[VQSUBD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 %b) #4 9405// CHECK: ret i64 [[VQSUBD_S64_I]] 9406int64_t test_vqsubd_s64(int64_t a, int64_t b) { 9407 return vqsubd_s64(a, b); 9408} 9409 9410// CHECK-LABEL: define i8 @test_vqsubb_u8(i8 %a, i8 %b) #0 { 9411// CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 9412// CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0 9413// CHECK: [[VQSUBB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4 9414// CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQSUBB_U8_I]], i64 0 9415// CHECK: ret i8 [[TMP2]] 9416uint8_t test_vqsubb_u8(uint8_t a, uint8_t b) { 9417 return vqsubb_u8(a, b); 9418} 9419 9420// CHECK-LABEL: define i16 @test_vqsubh_u16(i16 %a, i16 %b) #0 { 9421// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 9422// CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 9423// CHECK: [[VQSUBH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 9424// CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQSUBH_U16_I]], i64 0 9425// CHECK: ret i16 [[TMP2]] 9426uint16_t test_vqsubh_u16(uint16_t a, uint16_t b) { 9427 return vqsubh_u16(a, b); 9428} 9429 9430// CHECK-LABEL: define i32 @test_vqsubs_u32(i32 %a, i32 %b) #0 { 9431// CHECK: [[VQSUBS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqsub.i32(i32 %a, i32 %b) #4 9432// CHECK: ret i32 [[VQSUBS_U32_I]] 9433uint32_t test_vqsubs_u32(uint32_t a, uint32_t b) { 9434 return vqsubs_u32(a, b); 9435} 9436 9437// CHECK-LABEL: define i64 @test_vqsubd_u64(i64 %a, i64 %b) #0 { 9438// CHECK: [[VQSUBD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqsub.i64(i64 %a, i64 %b) #4 9439// CHECK: ret i64 [[VQSUBD_U64_I]] 9440uint64_t test_vqsubd_u64(uint64_t a, uint64_t b) { 9441 return vqsubd_u64(a, b); 9442} 9443 9444// CHECK-LABEL: define i64 @test_vshld_s64(i64 %a, i64 %b) #0 { 9445// CHECK: [[VSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sshl.i64(i64 %a, i64 %b) #4 9446// CHECK: ret i64 [[VSHLD_S64_I]] 9447int64_t test_vshld_s64(int64_t a, int64_t b) { 9448 return vshld_s64(a, b); 9449} 9450 9451// CHECK-LABEL: define i64 @test_vshld_u64(i64 %a, i64 %b) #0 { 9452// CHECK: [[VSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.ushl.i64(i64 %a, i64 %b) #4 9453// CHECK: ret i64 [[VSHLD_U64_I]] 9454uint64_t test_vshld_u64(uint64_t a, uint64_t b) { 9455 return vshld_u64(a, b); 9456} 9457 9458// CHECK-LABEL: define i8 @test_vqshlb_s8(i8 %a, i8 %b) #0 { 9459// CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 9460// CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0 9461// CHECK: [[VQSHLB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4 9462// CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQSHLB_S8_I]], i64 0 9463// CHECK: ret i8 [[TMP2]] 9464int8_t test_vqshlb_s8(int8_t a, int8_t b) { 9465 return vqshlb_s8(a, b); 9466} 9467 9468// CHECK-LABEL: define i16 @test_vqshlh_s16(i16 %a, i16 %b) #0 { 9469// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 9470// CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 9471// CHECK: [[VQSHLH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 9472// CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQSHLH_S16_I]], i64 0 9473// CHECK: ret i16 [[TMP2]] 9474int16_t test_vqshlh_s16(int16_t a, int16_t b) { 9475 return vqshlh_s16(a, b); 9476} 9477 9478// CHECK-LABEL: define i32 @test_vqshls_s32(i32 %a, i32 %b) #0 { 9479// CHECK: [[VQSHLS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqshl.i32(i32 %a, i32 %b) #4 9480// CHECK: ret i32 [[VQSHLS_S32_I]] 9481int32_t test_vqshls_s32(int32_t a, int32_t b) { 9482 return vqshls_s32(a, b); 9483} 9484 9485// CHECK-LABEL: define i64 @test_vqshld_s64(i64 %a, i64 %b) #0 { 9486// CHECK: [[VQSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %a, i64 %b) #4 9487// CHECK: ret i64 [[VQSHLD_S64_I]] 9488int64_t test_vqshld_s64(int64_t a, int64_t b) { 9489 return vqshld_s64(a, b); 9490} 9491 9492// CHECK-LABEL: define i8 @test_vqshlb_u8(i8 %a, i8 %b) #0 { 9493// CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 9494// CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0 9495// CHECK: [[VQSHLB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4 9496// CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQSHLB_U8_I]], i64 0 9497// CHECK: ret i8 [[TMP2]] 9498uint8_t test_vqshlb_u8(uint8_t a, uint8_t b) { 9499 return vqshlb_u8(a, b); 9500} 9501 9502// CHECK-LABEL: define i16 @test_vqshlh_u16(i16 %a, i16 %b) #0 { 9503// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 9504// CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 9505// CHECK: [[VQSHLH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 9506// CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQSHLH_U16_I]], i64 0 9507// CHECK: ret i16 [[TMP2]] 9508uint16_t test_vqshlh_u16(uint16_t a, uint16_t b) { 9509 return vqshlh_u16(a, b); 9510} 9511 9512// CHECK-LABEL: define i32 @test_vqshls_u32(i32 %a, i32 %b) #0 { 9513// CHECK: [[VQSHLS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqshl.i32(i32 %a, i32 %b) #4 9514// CHECK: ret i32 [[VQSHLS_U32_I]] 9515uint32_t test_vqshls_u32(uint32_t a, uint32_t b) { 9516 return vqshls_u32(a, b); 9517} 9518 9519// CHECK-LABEL: define i64 @test_vqshld_u64(i64 %a, i64 %b) #0 { 9520// CHECK: [[VQSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %a, i64 %b) #4 9521// CHECK: ret i64 [[VQSHLD_U64_I]] 9522uint64_t test_vqshld_u64(uint64_t a, uint64_t b) { 9523 return vqshld_u64(a, b); 9524} 9525 9526// CHECK-LABEL: define i64 @test_vrshld_s64(i64 %a, i64 %b) #0 { 9527// CHECK: [[VRSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %a, i64 %b) #4 9528// CHECK: ret i64 [[VRSHLD_S64_I]] 9529int64_t test_vrshld_s64(int64_t a, int64_t b) { 9530 return vrshld_s64(a, b); 9531} 9532 9533 9534// CHECK-LABEL: define i64 @test_vrshld_u64(i64 %a, i64 %b) #0 { 9535// CHECK: [[VRSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %a, i64 %b) #4 9536// CHECK: ret i64 [[VRSHLD_U64_I]] 9537uint64_t test_vrshld_u64(uint64_t a, uint64_t b) { 9538 return vrshld_u64(a, b); 9539} 9540 9541// CHECK-LABEL: define i8 @test_vqrshlb_s8(i8 %a, i8 %b) #0 { 9542// CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 9543// CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0 9544// CHECK: [[VQRSHLB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4 9545// CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQRSHLB_S8_I]], i64 0 9546// CHECK: ret i8 [[TMP2]] 9547int8_t test_vqrshlb_s8(int8_t a, int8_t b) { 9548 return vqrshlb_s8(a, b); 9549} 9550 9551// CHECK-LABEL: define i16 @test_vqrshlh_s16(i16 %a, i16 %b) #0 { 9552// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 9553// CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 9554// CHECK: [[VQRSHLH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 9555// CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQRSHLH_S16_I]], i64 0 9556// CHECK: ret i16 [[TMP2]] 9557int16_t test_vqrshlh_s16(int16_t a, int16_t b) { 9558 return vqrshlh_s16(a, b); 9559} 9560 9561// CHECK-LABEL: define i32 @test_vqrshls_s32(i32 %a, i32 %b) #0 { 9562// CHECK: [[VQRSHLS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrshl.i32(i32 %a, i32 %b) #4 9563// CHECK: ret i32 [[VQRSHLS_S32_I]] 9564int32_t test_vqrshls_s32(int32_t a, int32_t b) { 9565 return vqrshls_s32(a, b); 9566} 9567 9568// CHECK-LABEL: define i64 @test_vqrshld_s64(i64 %a, i64 %b) #0 { 9569// CHECK: [[VQRSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %a, i64 %b) #4 9570// CHECK: ret i64 [[VQRSHLD_S64_I]] 9571int64_t test_vqrshld_s64(int64_t a, int64_t b) { 9572 return vqrshld_s64(a, b); 9573} 9574 9575// CHECK-LABEL: define i8 @test_vqrshlb_u8(i8 %a, i8 %b) #0 { 9576// CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 9577// CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0 9578// CHECK: [[VQRSHLB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4 9579// CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQRSHLB_U8_I]], i64 0 9580// CHECK: ret i8 [[TMP2]] 9581uint8_t test_vqrshlb_u8(uint8_t a, uint8_t b) { 9582 return vqrshlb_u8(a, b); 9583} 9584 9585// CHECK-LABEL: define i16 @test_vqrshlh_u16(i16 %a, i16 %b) #0 { 9586// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 9587// CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 9588// CHECK: [[VQRSHLH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 9589// CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQRSHLH_U16_I]], i64 0 9590// CHECK: ret i16 [[TMP2]] 9591uint16_t test_vqrshlh_u16(uint16_t a, uint16_t b) { 9592 return vqrshlh_u16(a, b); 9593} 9594 9595// CHECK-LABEL: define i32 @test_vqrshls_u32(i32 %a, i32 %b) #0 { 9596// CHECK: [[VQRSHLS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqrshl.i32(i32 %a, i32 %b) #4 9597// CHECK: ret i32 [[VQRSHLS_U32_I]] 9598uint32_t test_vqrshls_u32(uint32_t a, uint32_t b) { 9599 return vqrshls_u32(a, b); 9600} 9601 9602// CHECK-LABEL: define i64 @test_vqrshld_u64(i64 %a, i64 %b) #0 { 9603// CHECK: [[VQRSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %a, i64 %b) #4 9604// CHECK: ret i64 [[VQRSHLD_U64_I]] 9605uint64_t test_vqrshld_u64(uint64_t a, uint64_t b) { 9606 return vqrshld_u64(a, b); 9607} 9608 9609// CHECK-LABEL: define i64 @test_vpaddd_s64(<2 x i64> %a) #0 { 9610// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 9611// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 9612// CHECK: [[VPADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> [[TMP1]]) #4 9613// CHECK: ret i64 [[VPADDD_S64_I]] 9614int64_t test_vpaddd_s64(int64x2_t a) { 9615 return vpaddd_s64(a); 9616} 9617 9618// CHECK-LABEL: define float @test_vpadds_f32(<2 x float> %a) #0 { 9619// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 9620// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 9621// CHECK: [[LANE0_I:%.*]] = extractelement <2 x float> [[TMP1]], i64 0 9622// CHECK: [[LANE1_I:%.*]] = extractelement <2 x float> [[TMP1]], i64 1 9623// CHECK: [[VPADDD_I:%.*]] = fadd float [[LANE0_I]], [[LANE1_I]] 9624// CHECK: ret float [[VPADDD_I]] 9625float32_t test_vpadds_f32(float32x2_t a) { 9626 return vpadds_f32(a); 9627} 9628 9629// CHECK-LABEL: define double @test_vpaddd_f64(<2 x double> %a) #0 { 9630// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 9631// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 9632// CHECK: [[LANE0_I:%.*]] = extractelement <2 x double> [[TMP1]], i64 0 9633// CHECK: [[LANE1_I:%.*]] = extractelement <2 x double> [[TMP1]], i64 1 9634// CHECK: [[VPADDD_I:%.*]] = fadd double [[LANE0_I]], [[LANE1_I]] 9635// CHECK: ret double [[VPADDD_I]] 9636float64_t test_vpaddd_f64(float64x2_t a) { 9637 return vpaddd_f64(a); 9638} 9639 9640// CHECK-LABEL: define float @test_vpmaxnms_f32(<2 x float> %a) #0 { 9641// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 9642// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 9643// CHECK: [[VPMAXNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> [[TMP1]]) #4 9644// CHECK: ret float [[VPMAXNMS_F32_I]] 9645float32_t test_vpmaxnms_f32(float32x2_t a) { 9646 return vpmaxnms_f32(a); 9647} 9648 9649// CHECK-LABEL: define double @test_vpmaxnmqd_f64(<2 x double> %a) #0 { 9650// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 9651// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 9652// CHECK: [[VPMAXNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> [[TMP1]]) #4 9653// CHECK: ret double [[VPMAXNMQD_F64_I]] 9654float64_t test_vpmaxnmqd_f64(float64x2_t a) { 9655 return vpmaxnmqd_f64(a); 9656} 9657 9658// CHECK-LABEL: define float @test_vpmaxs_f32(<2 x float> %a) #0 { 9659// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 9660// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 9661// CHECK: [[VPMAXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> [[TMP1]]) #4 9662// CHECK: ret float [[VPMAXS_F32_I]] 9663float32_t test_vpmaxs_f32(float32x2_t a) { 9664 return vpmaxs_f32(a); 9665} 9666 9667// CHECK-LABEL: define double @test_vpmaxqd_f64(<2 x double> %a) #0 { 9668// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 9669// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 9670// CHECK: [[VPMAXQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> [[TMP1]]) #4 9671// CHECK: ret double [[VPMAXQD_F64_I]] 9672float64_t test_vpmaxqd_f64(float64x2_t a) { 9673 return vpmaxqd_f64(a); 9674} 9675 9676// CHECK-LABEL: define float @test_vpminnms_f32(<2 x float> %a) #0 { 9677// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 9678// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 9679// CHECK: [[VPMINNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> [[TMP1]]) #4 9680// CHECK: ret float [[VPMINNMS_F32_I]] 9681float32_t test_vpminnms_f32(float32x2_t a) { 9682 return vpminnms_f32(a); 9683} 9684 9685// CHECK-LABEL: define double @test_vpminnmqd_f64(<2 x double> %a) #0 { 9686// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 9687// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 9688// CHECK: [[VPMINNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> [[TMP1]]) #4 9689// CHECK: ret double [[VPMINNMQD_F64_I]] 9690float64_t test_vpminnmqd_f64(float64x2_t a) { 9691 return vpminnmqd_f64(a); 9692} 9693 9694// CHECK-LABEL: define float @test_vpmins_f32(<2 x float> %a) #0 { 9695// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 9696// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 9697// CHECK: [[VPMINS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> [[TMP1]]) #4 9698// CHECK: ret float [[VPMINS_F32_I]] 9699float32_t test_vpmins_f32(float32x2_t a) { 9700 return vpmins_f32(a); 9701} 9702 9703// CHECK-LABEL: define double @test_vpminqd_f64(<2 x double> %a) #0 { 9704// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 9705// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 9706// CHECK: [[VPMINQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> [[TMP1]]) #4 9707// CHECK: ret double [[VPMINQD_F64_I]] 9708float64_t test_vpminqd_f64(float64x2_t a) { 9709 return vpminqd_f64(a); 9710} 9711 9712// CHECK-LABEL: define i16 @test_vqdmulhh_s16(i16 %a, i16 %b) #0 { 9713// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 9714// CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 9715// CHECK: [[VQDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 9716// CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQDMULHH_S16_I]], i64 0 9717// CHECK: ret i16 [[TMP2]] 9718int16_t test_vqdmulhh_s16(int16_t a, int16_t b) { 9719 return vqdmulhh_s16(a, b); 9720} 9721 9722// CHECK-LABEL: define i32 @test_vqdmulhs_s32(i32 %a, i32 %b) #0 { 9723// CHECK: [[VQDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %a, i32 %b) #4 9724// CHECK: ret i32 [[VQDMULHS_S32_I]] 9725int32_t test_vqdmulhs_s32(int32_t a, int32_t b) { 9726 return vqdmulhs_s32(a, b); 9727} 9728 9729// CHECK-LABEL: define i16 @test_vqrdmulhh_s16(i16 %a, i16 %b) #0 { 9730// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 9731// CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 9732// CHECK: [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 9733// CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQRDMULHH_S16_I]], i64 0 9734// CHECK: ret i16 [[TMP2]] 9735int16_t test_vqrdmulhh_s16(int16_t a, int16_t b) { 9736 return vqrdmulhh_s16(a, b); 9737} 9738 9739// CHECK-LABEL: define i32 @test_vqrdmulhs_s32(i32 %a, i32 %b) #0 { 9740// CHECK: [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %a, i32 %b) #4 9741// CHECK: ret i32 [[VQRDMULHS_S32_I]] 9742int32_t test_vqrdmulhs_s32(int32_t a, int32_t b) { 9743 return vqrdmulhs_s32(a, b); 9744} 9745 9746// CHECK-LABEL: define float @test_vmulxs_f32(float %a, float %b) #0 { 9747// CHECK: [[VMULXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmulx.f32(float %a, float %b) #4 9748// CHECK: ret float [[VMULXS_F32_I]] 9749float32_t test_vmulxs_f32(float32_t a, float32_t b) { 9750 return vmulxs_f32(a, b); 9751} 9752 9753// CHECK-LABEL: define double @test_vmulxd_f64(double %a, double %b) #0 { 9754// CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double %a, double %b) #4 9755// CHECK: ret double [[VMULXD_F64_I]] 9756float64_t test_vmulxd_f64(float64_t a, float64_t b) { 9757 return vmulxd_f64(a, b); 9758} 9759 9760// CHECK-LABEL: define <1 x double> @test_vmulx_f64(<1 x double> %a, <1 x double> %b) #0 { 9761// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 9762// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 9763// CHECK: [[VMULX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 9764// CHECK: [[VMULX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> 9765// CHECK: [[VMULX2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmulx.v1f64(<1 x double> [[VMULX_I]], <1 x double> [[VMULX1_I]]) #4 9766// CHECK: ret <1 x double> [[VMULX2_I]] 9767float64x1_t test_vmulx_f64(float64x1_t a, float64x1_t b) { 9768 return vmulx_f64(a, b); 9769} 9770 9771// CHECK-LABEL: define float @test_vrecpss_f32(float %a, float %b) #0 { 9772// CHECK: [[VRECPS_I:%.*]] = call float @llvm.aarch64.neon.frecps.f32(float %a, float %b) #4 9773// CHECK: ret float [[VRECPS_I]] 9774float32_t test_vrecpss_f32(float32_t a, float32_t b) { 9775 return vrecpss_f32(a, b); 9776} 9777 9778// CHECK-LABEL: define double @test_vrecpsd_f64(double %a, double %b) #0 { 9779// CHECK: [[VRECPS_I:%.*]] = call double @llvm.aarch64.neon.frecps.f64(double %a, double %b) #4 9780// CHECK: ret double [[VRECPS_I]] 9781float64_t test_vrecpsd_f64(float64_t a, float64_t b) { 9782 return vrecpsd_f64(a, b); 9783} 9784 9785// CHECK-LABEL: define float @test_vrsqrtss_f32(float %a, float %b) #0 { 9786// CHECK: [[VRSQRTSS_F32_I:%.*]] = call float @llvm.aarch64.neon.frsqrts.f32(float %a, float %b) #4 9787// CHECK: ret float [[VRSQRTSS_F32_I]] 9788float32_t test_vrsqrtss_f32(float32_t a, float32_t b) { 9789 return vrsqrtss_f32(a, b); 9790} 9791 9792// CHECK-LABEL: define double @test_vrsqrtsd_f64(double %a, double %b) #0 { 9793// CHECK: [[VRSQRTSD_F64_I:%.*]] = call double @llvm.aarch64.neon.frsqrts.f64(double %a, double %b) #4 9794// CHECK: ret double [[VRSQRTSD_F64_I]] 9795float64_t test_vrsqrtsd_f64(float64_t a, float64_t b) { 9796 return vrsqrtsd_f64(a, b); 9797} 9798 9799// CHECK-LABEL: define float @test_vcvts_f32_s32(i32 %a) #0 { 9800// CHECK: [[TMP0:%.*]] = sitofp i32 %a to float 9801// CHECK: ret float [[TMP0]] 9802float32_t test_vcvts_f32_s32(int32_t a) { 9803 return vcvts_f32_s32(a); 9804} 9805 9806// CHECK-LABEL: define double @test_vcvtd_f64_s64(i64 %a) #0 { 9807// CHECK: [[TMP0:%.*]] = sitofp i64 %a to double 9808// CHECK: ret double [[TMP0]] 9809float64_t test_vcvtd_f64_s64(int64_t a) { 9810 return vcvtd_f64_s64(a); 9811} 9812 9813// CHECK-LABEL: define float @test_vcvts_f32_u32(i32 %a) #0 { 9814// CHECK: [[TMP0:%.*]] = uitofp i32 %a to float 9815// CHECK: ret float [[TMP0]] 9816float32_t test_vcvts_f32_u32(uint32_t a) { 9817 return vcvts_f32_u32(a); 9818} 9819 9820// CHECK-LABEL: define double @test_vcvtd_f64_u64(i64 %a) #0 { 9821// CHECK: [[TMP0:%.*]] = uitofp i64 %a to double 9822// CHECK: ret double [[TMP0]] 9823float64_t test_vcvtd_f64_u64(uint64_t a) { 9824 return vcvtd_f64_u64(a); 9825} 9826 9827// CHECK-LABEL: define float @test_vrecpes_f32(float %a) #0 { 9828// CHECK: [[VRECPES_F32_I:%.*]] = call float @llvm.aarch64.neon.frecpe.f32(float %a) #4 9829// CHECK: ret float [[VRECPES_F32_I]] 9830float32_t test_vrecpes_f32(float32_t a) { 9831 return vrecpes_f32(a); 9832} 9833 9834// CHECK-LABEL: define double @test_vrecped_f64(double %a) #0 { 9835// CHECK: [[VRECPED_F64_I:%.*]] = call double @llvm.aarch64.neon.frecpe.f64(double %a) #4 9836// CHECK: ret double [[VRECPED_F64_I]] 9837float64_t test_vrecped_f64(float64_t a) { 9838 return vrecped_f64(a); 9839} 9840 9841// CHECK-LABEL: define float @test_vrecpxs_f32(float %a) #0 { 9842// CHECK: [[VRECPXS_F32_I:%.*]] = call float @llvm.aarch64.neon.frecpx.f32(float %a) #4 9843// CHECK: ret float [[VRECPXS_F32_I]] 9844float32_t test_vrecpxs_f32(float32_t a) { 9845 return vrecpxs_f32(a); 9846 } 9847 9848// CHECK-LABEL: define double @test_vrecpxd_f64(double %a) #0 { 9849// CHECK: [[VRECPXD_F64_I:%.*]] = call double @llvm.aarch64.neon.frecpx.f64(double %a) #4 9850// CHECK: ret double [[VRECPXD_F64_I]] 9851float64_t test_vrecpxd_f64(float64_t a) { 9852 return vrecpxd_f64(a); 9853} 9854 9855// CHECK-LABEL: define <2 x i32> @test_vrsqrte_u32(<2 x i32> %a) #0 { 9856// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 9857// CHECK: [[VRSQRTE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 9858// CHECK: [[VRSQRTE_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.ursqrte.v2i32(<2 x i32> [[VRSQRTE_V_I]]) #4 9859// CHECK: ret <2 x i32> [[VRSQRTE_V1_I]] 9860uint32x2_t test_vrsqrte_u32(uint32x2_t a) { 9861 return vrsqrte_u32(a); 9862} 9863 9864// CHECK-LABEL: define <4 x i32> @test_vrsqrteq_u32(<4 x i32> %a) #0 { 9865// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 9866// CHECK: [[VRSQRTEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 9867// CHECK: [[VRSQRTEQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ursqrte.v4i32(<4 x i32> [[VRSQRTEQ_V_I]]) #4 9868// CHECK: ret <4 x i32> [[VRSQRTEQ_V1_I]] 9869uint32x4_t test_vrsqrteq_u32(uint32x4_t a) { 9870 return vrsqrteq_u32(a); 9871} 9872 9873// CHECK-LABEL: define float @test_vrsqrtes_f32(float %a) #0 { 9874// CHECK: [[VRSQRTES_F32_I:%.*]] = call float @llvm.aarch64.neon.frsqrte.f32(float %a) #4 9875// CHECK: ret float [[VRSQRTES_F32_I]] 9876float32_t test_vrsqrtes_f32(float32_t a) { 9877 return vrsqrtes_f32(a); 9878} 9879 9880// CHECK-LABEL: define double @test_vrsqrted_f64(double %a) #0 { 9881// CHECK: [[VRSQRTED_F64_I:%.*]] = call double @llvm.aarch64.neon.frsqrte.f64(double %a) #4 9882// CHECK: ret double [[VRSQRTED_F64_I]] 9883float64_t test_vrsqrted_f64(float64_t a) { 9884 return vrsqrted_f64(a); 9885} 9886 9887// CHECK-LABEL: define <16 x i8> @test_vld1q_u8(i8* %a) #0 { 9888// CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>* 9889// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]] 9890// CHECK: ret <16 x i8> [[TMP1]] 9891uint8x16_t test_vld1q_u8(uint8_t const *a) { 9892 return vld1q_u8(a); 9893} 9894 9895// CHECK-LABEL: define <8 x i16> @test_vld1q_u16(i16* %a) #0 { 9896// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 9897// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* 9898// CHECK: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]] 9899// CHECK: ret <8 x i16> [[TMP2]] 9900uint16x8_t test_vld1q_u16(uint16_t const *a) { 9901 return vld1q_u16(a); 9902} 9903 9904// CHECK-LABEL: define <4 x i32> @test_vld1q_u32(i32* %a) #0 { 9905// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 9906// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* 9907// CHECK: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]] 9908// CHECK: ret <4 x i32> [[TMP2]] 9909uint32x4_t test_vld1q_u32(uint32_t const *a) { 9910 return vld1q_u32(a); 9911} 9912 9913// CHECK-LABEL: define <2 x i64> @test_vld1q_u64(i64* %a) #0 { 9914// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 9915// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>* 9916// CHECK: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]] 9917// CHECK: ret <2 x i64> [[TMP2]] 9918uint64x2_t test_vld1q_u64(uint64_t const *a) { 9919 return vld1q_u64(a); 9920} 9921 9922// CHECK-LABEL: define <16 x i8> @test_vld1q_s8(i8* %a) #0 { 9923// CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>* 9924// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]] 9925// CHECK: ret <16 x i8> [[TMP1]] 9926int8x16_t test_vld1q_s8(int8_t const *a) { 9927 return vld1q_s8(a); 9928} 9929 9930// CHECK-LABEL: define <8 x i16> @test_vld1q_s16(i16* %a) #0 { 9931// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 9932// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* 9933// CHECK: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]] 9934// CHECK: ret <8 x i16> [[TMP2]] 9935int16x8_t test_vld1q_s16(int16_t const *a) { 9936 return vld1q_s16(a); 9937} 9938 9939// CHECK-LABEL: define <4 x i32> @test_vld1q_s32(i32* %a) #0 { 9940// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 9941// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* 9942// CHECK: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]] 9943// CHECK: ret <4 x i32> [[TMP2]] 9944int32x4_t test_vld1q_s32(int32_t const *a) { 9945 return vld1q_s32(a); 9946} 9947 9948// CHECK-LABEL: define <2 x i64> @test_vld1q_s64(i64* %a) #0 { 9949// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 9950// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>* 9951// CHECK: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]] 9952// CHECK: ret <2 x i64> [[TMP2]] 9953int64x2_t test_vld1q_s64(int64_t const *a) { 9954 return vld1q_s64(a); 9955} 9956 9957// CHECK-LABEL: define <8 x half> @test_vld1q_f16(half* %a) #0 { 9958// CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* 9959// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* 9960// CHECK: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]] 9961// CHECK: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <8 x half> 9962// CHECK: ret <8 x half> [[TMP3]] 9963float16x8_t test_vld1q_f16(float16_t const *a) { 9964 return vld1q_f16(a); 9965} 9966 9967// CHECK-LABEL: define <4 x float> @test_vld1q_f32(float* %a) #0 { 9968// CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* 9969// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x float>* 9970// CHECK: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]] 9971// CHECK: ret <4 x float> [[TMP2]] 9972float32x4_t test_vld1q_f32(float32_t const *a) { 9973 return vld1q_f32(a); 9974} 9975 9976// CHECK-LABEL: define <2 x double> @test_vld1q_f64(double* %a) #0 { 9977// CHECK: [[TMP0:%.*]] = bitcast double* %a to i8* 9978// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x double>* 9979// CHECK: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]] 9980// CHECK: ret <2 x double> [[TMP2]] 9981float64x2_t test_vld1q_f64(float64_t const *a) { 9982 return vld1q_f64(a); 9983} 9984 9985// CHECK-LABEL: define <16 x i8> @test_vld1q_p8(i8* %a) #0 { 9986// CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>* 9987// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]] 9988// CHECK: ret <16 x i8> [[TMP1]] 9989poly8x16_t test_vld1q_p8(poly8_t const *a) { 9990 return vld1q_p8(a); 9991} 9992 9993// CHECK-LABEL: define <8 x i16> @test_vld1q_p16(i16* %a) #0 { 9994// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 9995// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* 9996// CHECK: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]] 9997// CHECK: ret <8 x i16> [[TMP2]] 9998poly16x8_t test_vld1q_p16(poly16_t const *a) { 9999 return vld1q_p16(a); 10000} 10001 10002// CHECK-LABEL: define <8 x i8> @test_vld1_u8(i8* %a) #0 { 10003// CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>* 10004// CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]] 10005// CHECK: ret <8 x i8> [[TMP1]] 10006uint8x8_t test_vld1_u8(uint8_t const *a) { 10007 return vld1_u8(a); 10008} 10009 10010// CHECK-LABEL: define <4 x i16> @test_vld1_u16(i16* %a) #0 { 10011// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 10012// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* 10013// CHECK: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]] 10014// CHECK: ret <4 x i16> [[TMP2]] 10015uint16x4_t test_vld1_u16(uint16_t const *a) { 10016 return vld1_u16(a); 10017} 10018 10019// CHECK-LABEL: define <2 x i32> @test_vld1_u32(i32* %a) #0 { 10020// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 10021// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* 10022// CHECK: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]] 10023// CHECK: ret <2 x i32> [[TMP2]] 10024uint32x2_t test_vld1_u32(uint32_t const *a) { 10025 return vld1_u32(a); 10026} 10027 10028// CHECK-LABEL: define <1 x i64> @test_vld1_u64(i64* %a) #0 { 10029// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 10030// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>* 10031// CHECK: [[TMP2:%.*]] = load <1 x i64>, <1 x i64>* [[TMP1]] 10032// CHECK: ret <1 x i64> [[TMP2]] 10033uint64x1_t test_vld1_u64(uint64_t const *a) { 10034 return vld1_u64(a); 10035} 10036 10037// CHECK-LABEL: define <8 x i8> @test_vld1_s8(i8* %a) #0 { 10038// CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>* 10039// CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]] 10040// CHECK: ret <8 x i8> [[TMP1]] 10041int8x8_t test_vld1_s8(int8_t const *a) { 10042 return vld1_s8(a); 10043} 10044 10045// CHECK-LABEL: define <4 x i16> @test_vld1_s16(i16* %a) #0 { 10046// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 10047// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* 10048// CHECK: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]] 10049// CHECK: ret <4 x i16> [[TMP2]] 10050int16x4_t test_vld1_s16(int16_t const *a) { 10051 return vld1_s16(a); 10052} 10053 10054// CHECK-LABEL: define <2 x i32> @test_vld1_s32(i32* %a) #0 { 10055// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 10056// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* 10057// CHECK: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]] 10058// CHECK: ret <2 x i32> [[TMP2]] 10059int32x2_t test_vld1_s32(int32_t const *a) { 10060 return vld1_s32(a); 10061} 10062 10063// CHECK-LABEL: define <1 x i64> @test_vld1_s64(i64* %a) #0 { 10064// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 10065// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>* 10066// CHECK: [[TMP2:%.*]] = load <1 x i64>, <1 x i64>* [[TMP1]] 10067// CHECK: ret <1 x i64> [[TMP2]] 10068int64x1_t test_vld1_s64(int64_t const *a) { 10069 return vld1_s64(a); 10070} 10071 10072// CHECK-LABEL: define <4 x half> @test_vld1_f16(half* %a) #0 { 10073// CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* 10074// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* 10075// CHECK: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]] 10076// CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <4 x half> 10077// CHECK: ret <4 x half> [[TMP3]] 10078float16x4_t test_vld1_f16(float16_t const *a) { 10079 return vld1_f16(a); 10080} 10081 10082// CHECK-LABEL: define <2 x float> @test_vld1_f32(float* %a) #0 { 10083// CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* 10084// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x float>* 10085// CHECK: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]] 10086// CHECK: ret <2 x float> [[TMP2]] 10087float32x2_t test_vld1_f32(float32_t const *a) { 10088 return vld1_f32(a); 10089} 10090 10091// CHECK-LABEL: define <1 x double> @test_vld1_f64(double* %a) #0 { 10092// CHECK: [[TMP0:%.*]] = bitcast double* %a to i8* 10093// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x double>* 10094// CHECK: [[TMP2:%.*]] = load <1 x double>, <1 x double>* [[TMP1]] 10095// CHECK: ret <1 x double> [[TMP2]] 10096float64x1_t test_vld1_f64(float64_t const *a) { 10097 return vld1_f64(a); 10098} 10099 10100// CHECK-LABEL: define <8 x i8> @test_vld1_p8(i8* %a) #0 { 10101// CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>* 10102// CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]] 10103// CHECK: ret <8 x i8> [[TMP1]] 10104poly8x8_t test_vld1_p8(poly8_t const *a) { 10105 return vld1_p8(a); 10106} 10107 10108// CHECK-LABEL: define <4 x i16> @test_vld1_p16(i16* %a) #0 { 10109// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 10110// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* 10111// CHECK: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]] 10112// CHECK: ret <4 x i16> [[TMP2]] 10113poly16x4_t test_vld1_p16(poly16_t const *a) { 10114 return vld1_p16(a); 10115} 10116 10117// CHECK-LABEL: define %struct.uint8x16x2_t @test_vld2q_u8(i8* %a) #0 { 10118// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16 10119// CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 16 10120// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8* 10121// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>* 10122// CHECK: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0v16i8(<16 x i8>* [[TMP1]]) 10123// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }* 10124// CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP2]] 10125// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL]] to i8* 10126// CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8* 10127// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) 10128// CHECK: [[TMP5:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16 10129// CHECK: ret %struct.uint8x16x2_t [[TMP5]] 10130uint8x16x2_t test_vld2q_u8(uint8_t const *a) { 10131 return vld2q_u8(a); 10132} 10133 10134// CHECK-LABEL: define %struct.uint16x8x2_t @test_vld2q_u16(i16* %a) #0 { 10135// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16 10136// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16 10137// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8* 10138// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 10139// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>* 10140// CHECK: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0v8i16(<8 x i16>* [[TMP2]]) 10141// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }* 10142// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]] 10143// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL]] to i8* 10144// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8* 10145// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 10146// CHECK: [[TMP6:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16 10147// CHECK: ret %struct.uint16x8x2_t [[TMP6]] 10148uint16x8x2_t test_vld2q_u16(uint16_t const *a) { 10149 return vld2q_u16(a); 10150} 10151 10152// CHECK-LABEL: define %struct.uint32x4x2_t @test_vld2q_u32(i32* %a) #0 { 10153// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16 10154// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16 10155// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8* 10156// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 10157// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>* 10158// CHECK: [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32(<4 x i32>* [[TMP2]]) 10159// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }* 10160// CHECK: store { <4 x i32>, <4 x i32> } [[VLD2]], { <4 x i32>, <4 x i32> }* [[TMP3]] 10161// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL]] to i8* 10162// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8* 10163// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 10164// CHECK: [[TMP6:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16 10165// CHECK: ret %struct.uint32x4x2_t [[TMP6]] 10166uint32x4x2_t test_vld2q_u32(uint32_t const *a) { 10167 return vld2q_u32(a); 10168} 10169 10170// CHECK-LABEL: define %struct.uint64x2x2_t @test_vld2q_u64(i64* %a) #0 { 10171// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x2_t, align 16 10172// CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x2_t, align 16 10173// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8* 10174// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 10175// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>* 10176// CHECK: [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0v2i64(<2 x i64>* [[TMP2]]) 10177// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }* 10178// CHECK: store { <2 x i64>, <2 x i64> } [[VLD2]], { <2 x i64>, <2 x i64> }* [[TMP3]] 10179// CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x2_t* [[RETVAL]] to i8* 10180// CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8* 10181// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 10182// CHECK: [[TMP6:%.*]] = load %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[RETVAL]], align 16 10183// CHECK: ret %struct.uint64x2x2_t [[TMP6]] 10184uint64x2x2_t test_vld2q_u64(uint64_t const *a) { 10185 return vld2q_u64(a); 10186} 10187 10188// CHECK-LABEL: define %struct.int8x16x2_t @test_vld2q_s8(i8* %a) #0 { 10189// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16 10190// CHECK: [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 16 10191// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8* 10192// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>* 10193// CHECK: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0v16i8(<16 x i8>* [[TMP1]]) 10194// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }* 10195// CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP2]] 10196// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL]] to i8* 10197// CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8* 10198// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) 10199// CHECK: [[TMP5:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16 10200// CHECK: ret %struct.int8x16x2_t [[TMP5]] 10201int8x16x2_t test_vld2q_s8(int8_t const *a) { 10202 return vld2q_s8(a); 10203} 10204 10205// CHECK-LABEL: define %struct.int16x8x2_t @test_vld2q_s16(i16* %a) #0 { 10206// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16 10207// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16 10208// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8* 10209// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 10210// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>* 10211// CHECK: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0v8i16(<8 x i16>* [[TMP2]]) 10212// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }* 10213// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]] 10214// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL]] to i8* 10215// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8* 10216// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 10217// CHECK: [[TMP6:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16 10218// CHECK: ret %struct.int16x8x2_t [[TMP6]] 10219int16x8x2_t test_vld2q_s16(int16_t const *a) { 10220 return vld2q_s16(a); 10221} 10222 10223// CHECK-LABEL: define %struct.int32x4x2_t @test_vld2q_s32(i32* %a) #0 { 10224// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16 10225// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16 10226// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8* 10227// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 10228// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>* 10229// CHECK: [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32(<4 x i32>* [[TMP2]]) 10230// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }* 10231// CHECK: store { <4 x i32>, <4 x i32> } [[VLD2]], { <4 x i32>, <4 x i32> }* [[TMP3]] 10232// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL]] to i8* 10233// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8* 10234// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 10235// CHECK: [[TMP6:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16 10236// CHECK: ret %struct.int32x4x2_t [[TMP6]] 10237int32x4x2_t test_vld2q_s32(int32_t const *a) { 10238 return vld2q_s32(a); 10239} 10240 10241// CHECK-LABEL: define %struct.int64x2x2_t @test_vld2q_s64(i64* %a) #0 { 10242// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x2_t, align 16 10243// CHECK: [[__RET:%.*]] = alloca %struct.int64x2x2_t, align 16 10244// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8* 10245// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 10246// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>* 10247// CHECK: [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0v2i64(<2 x i64>* [[TMP2]]) 10248// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }* 10249// CHECK: store { <2 x i64>, <2 x i64> } [[VLD2]], { <2 x i64>, <2 x i64> }* [[TMP3]] 10250// CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x2_t* [[RETVAL]] to i8* 10251// CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8* 10252// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 10253// CHECK: [[TMP6:%.*]] = load %struct.int64x2x2_t, %struct.int64x2x2_t* [[RETVAL]], align 16 10254// CHECK: ret %struct.int64x2x2_t [[TMP6]] 10255int64x2x2_t test_vld2q_s64(int64_t const *a) { 10256 return vld2q_s64(a); 10257} 10258 10259// CHECK-LABEL: define %struct.float16x8x2_t @test_vld2q_f16(half* %a) #0 { 10260// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16 10261// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16 10262// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8* 10263// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 10264// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>* 10265// CHECK: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0v8i16(<8 x i16>* [[TMP2]]) 10266// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }* 10267// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]] 10268// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8* 10269// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8* 10270// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 10271// CHECK: [[TMP6:%.*]] = load %struct.float16x8x2_t, %struct.float16x8x2_t* [[RETVAL]], align 16 10272// CHECK: ret %struct.float16x8x2_t [[TMP6]] 10273float16x8x2_t test_vld2q_f16(float16_t const *a) { 10274 return vld2q_f16(a); 10275} 10276 10277// CHECK-LABEL: define %struct.float32x4x2_t @test_vld2q_f32(float* %a) #0 { 10278// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16 10279// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16 10280// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8* 10281// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 10282// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x float>* 10283// CHECK: [[VLD2:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0v4f32(<4 x float>* [[TMP2]]) 10284// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float> }* 10285// CHECK: store { <4 x float>, <4 x float> } [[VLD2]], { <4 x float>, <4 x float> }* [[TMP3]] 10286// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL]] to i8* 10287// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8* 10288// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 10289// CHECK: [[TMP6:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16 10290// CHECK: ret %struct.float32x4x2_t [[TMP6]] 10291float32x4x2_t test_vld2q_f32(float32_t const *a) { 10292 return vld2q_f32(a); 10293} 10294 10295// CHECK-LABEL: define %struct.float64x2x2_t @test_vld2q_f64(double* %a) #0 { 10296// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16 10297// CHECK: [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16 10298// CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8* 10299// CHECK: [[TMP1:%.*]] = bitcast double* %a to i8* 10300// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x double>* 10301// CHECK: [[VLD2:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0v2f64(<2 x double>* [[TMP2]]) 10302// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double> }* 10303// CHECK: store { <2 x double>, <2 x double> } [[VLD2]], { <2 x double>, <2 x double> }* [[TMP3]] 10304// CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x2_t* [[RETVAL]] to i8* 10305// CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8* 10306// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 10307// CHECK: [[TMP6:%.*]] = load %struct.float64x2x2_t, %struct.float64x2x2_t* [[RETVAL]], align 16 10308// CHECK: ret %struct.float64x2x2_t [[TMP6]] 10309float64x2x2_t test_vld2q_f64(float64_t const *a) { 10310 return vld2q_f64(a); 10311} 10312 10313// CHECK-LABEL: define %struct.poly8x16x2_t @test_vld2q_p8(i8* %a) #0 { 10314// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16 10315// CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 16 10316// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8* 10317// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>* 10318// CHECK: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0v16i8(<16 x i8>* [[TMP1]]) 10319// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }* 10320// CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP2]] 10321// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL]] to i8* 10322// CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8* 10323// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) 10324// CHECK: [[TMP5:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16 10325// CHECK: ret %struct.poly8x16x2_t [[TMP5]] 10326poly8x16x2_t test_vld2q_p8(poly8_t const *a) { 10327 return vld2q_p8(a); 10328} 10329 10330// CHECK-LABEL: define %struct.poly16x8x2_t @test_vld2q_p16(i16* %a) #0 { 10331// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16 10332// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16 10333// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8* 10334// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 10335// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>* 10336// CHECK: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0v8i16(<8 x i16>* [[TMP2]]) 10337// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }* 10338// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]] 10339// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL]] to i8* 10340// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8* 10341// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 10342// CHECK: [[TMP6:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16 10343// CHECK: ret %struct.poly16x8x2_t [[TMP6]] 10344poly16x8x2_t test_vld2q_p16(poly16_t const *a) { 10345 return vld2q_p16(a); 10346} 10347 10348// CHECK-LABEL: define %struct.uint8x8x2_t @test_vld2_u8(i8* %a) #0 { 10349// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8 10350// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8 10351// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8* 10352// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>* 10353// CHECK: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* [[TMP1]]) 10354// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }* 10355// CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP2]] 10356// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL]] to i8* 10357// CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8* 10358// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) 10359// CHECK: [[TMP5:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8 10360// CHECK: ret %struct.uint8x8x2_t [[TMP5]] 10361uint8x8x2_t test_vld2_u8(uint8_t const *a) { 10362 return vld2_u8(a); 10363} 10364 10365// CHECK-LABEL: define %struct.uint16x4x2_t @test_vld2_u16(i16* %a) #0 { 10366// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8 10367// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8 10368// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* 10369// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 10370// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>* 10371// CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0v4i16(<4 x i16>* [[TMP2]]) 10372// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* 10373// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]] 10374// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL]] to i8* 10375// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* 10376// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 10377// CHECK: [[TMP6:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8 10378// CHECK: ret %struct.uint16x4x2_t [[TMP6]] 10379uint16x4x2_t test_vld2_u16(uint16_t const *a) { 10380 return vld2_u16(a); 10381} 10382 10383// CHECK-LABEL: define %struct.uint32x2x2_t @test_vld2_u32(i32* %a) #0 { 10384// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8 10385// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8 10386// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* 10387// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 10388// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>* 10389// CHECK: [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0v2i32(<2 x i32>* [[TMP2]]) 10390// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }* 10391// CHECK: store { <2 x i32>, <2 x i32> } [[VLD2]], { <2 x i32>, <2 x i32> }* [[TMP3]] 10392// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL]] to i8* 10393// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* 10394// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 10395// CHECK: [[TMP6:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8 10396// CHECK: ret %struct.uint32x2x2_t [[TMP6]] 10397uint32x2x2_t test_vld2_u32(uint32_t const *a) { 10398 return vld2_u32(a); 10399} 10400 10401// CHECK-LABEL: define %struct.uint64x1x2_t @test_vld2_u64(i64* %a) #0 { 10402// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x2_t, align 8 10403// CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8 10404// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8* 10405// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 10406// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>* 10407// CHECK: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0v1i64(<1 x i64>* [[TMP2]]) 10408// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }* 10409// CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]] 10410// CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x2_t* [[RETVAL]] to i8* 10411// CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8* 10412// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 10413// CHECK: [[TMP6:%.*]] = load %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[RETVAL]], align 8 10414// CHECK: ret %struct.uint64x1x2_t [[TMP6]] 10415uint64x1x2_t test_vld2_u64(uint64_t const *a) { 10416 return vld2_u64(a); 10417} 10418 10419// CHECK-LABEL: define %struct.int8x8x2_t @test_vld2_s8(i8* %a) #0 { 10420// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8 10421// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8 10422// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8* 10423// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>* 10424// CHECK: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* [[TMP1]]) 10425// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }* 10426// CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP2]] 10427// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL]] to i8* 10428// CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8* 10429// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) 10430// CHECK: [[TMP5:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8 10431// CHECK: ret %struct.int8x8x2_t [[TMP5]] 10432int8x8x2_t test_vld2_s8(int8_t const *a) { 10433 return vld2_s8(a); 10434} 10435 10436// CHECK-LABEL: define %struct.int16x4x2_t @test_vld2_s16(i16* %a) #0 { 10437// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8 10438// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8 10439// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* 10440// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 10441// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>* 10442// CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0v4i16(<4 x i16>* [[TMP2]]) 10443// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* 10444// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]] 10445// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL]] to i8* 10446// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* 10447// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 10448// CHECK: [[TMP6:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8 10449// CHECK: ret %struct.int16x4x2_t [[TMP6]] 10450int16x4x2_t test_vld2_s16(int16_t const *a) { 10451 return vld2_s16(a); 10452} 10453 10454// CHECK-LABEL: define %struct.int32x2x2_t @test_vld2_s32(i32* %a) #0 { 10455// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8 10456// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8 10457// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* 10458// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 10459// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>* 10460// CHECK: [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0v2i32(<2 x i32>* [[TMP2]]) 10461// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }* 10462// CHECK: store { <2 x i32>, <2 x i32> } [[VLD2]], { <2 x i32>, <2 x i32> }* [[TMP3]] 10463// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL]] to i8* 10464// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* 10465// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 10466// CHECK: [[TMP6:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8 10467// CHECK: ret %struct.int32x2x2_t [[TMP6]] 10468int32x2x2_t test_vld2_s32(int32_t const *a) { 10469 return vld2_s32(a); 10470} 10471 10472// CHECK-LABEL: define %struct.int64x1x2_t @test_vld2_s64(i64* %a) #0 { 10473// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x2_t, align 8 10474// CHECK: [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8 10475// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8* 10476// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 10477// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>* 10478// CHECK: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0v1i64(<1 x i64>* [[TMP2]]) 10479// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }* 10480// CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]] 10481// CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x2_t* [[RETVAL]] to i8* 10482// CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8* 10483// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 10484// CHECK: [[TMP6:%.*]] = load %struct.int64x1x2_t, %struct.int64x1x2_t* [[RETVAL]], align 8 10485// CHECK: ret %struct.int64x1x2_t [[TMP6]] 10486int64x1x2_t test_vld2_s64(int64_t const *a) { 10487 return vld2_s64(a); 10488} 10489 10490// CHECK-LABEL: define %struct.float16x4x2_t @test_vld2_f16(half* %a) #0 { 10491// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8 10492// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8 10493// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* 10494// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 10495// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>* 10496// CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0v4i16(<4 x i16>* [[TMP2]]) 10497// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* 10498// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]] 10499// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8* 10500// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* 10501// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 10502// CHECK: [[TMP6:%.*]] = load %struct.float16x4x2_t, %struct.float16x4x2_t* [[RETVAL]], align 8 10503// CHECK: ret %struct.float16x4x2_t [[TMP6]] 10504float16x4x2_t test_vld2_f16(float16_t const *a) { 10505 return vld2_f16(a); 10506} 10507 10508// CHECK-LABEL: define %struct.float32x2x2_t @test_vld2_f32(float* %a) #0 { 10509// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8 10510// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8 10511// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* 10512// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 10513// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x float>* 10514// CHECK: [[VLD2:%.*]] = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0v2f32(<2 x float>* [[TMP2]]) 10515// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float> }* 10516// CHECK: store { <2 x float>, <2 x float> } [[VLD2]], { <2 x float>, <2 x float> }* [[TMP3]] 10517// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL]] to i8* 10518// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* 10519// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 10520// CHECK: [[TMP6:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8 10521// CHECK: ret %struct.float32x2x2_t [[TMP6]] 10522float32x2x2_t test_vld2_f32(float32_t const *a) { 10523 return vld2_f32(a); 10524} 10525 10526// CHECK-LABEL: define %struct.float64x1x2_t @test_vld2_f64(double* %a) #0 { 10527// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8 10528// CHECK: [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8 10529// CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8* 10530// CHECK: [[TMP1:%.*]] = bitcast double* %a to i8* 10531// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x double>* 10532// CHECK: [[VLD2:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0v1f64(<1 x double>* [[TMP2]]) 10533// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double> }* 10534// CHECK: store { <1 x double>, <1 x double> } [[VLD2]], { <1 x double>, <1 x double> }* [[TMP3]] 10535// CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x2_t* [[RETVAL]] to i8* 10536// CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8* 10537// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 10538// CHECK: [[TMP6:%.*]] = load %struct.float64x1x2_t, %struct.float64x1x2_t* [[RETVAL]], align 8 10539// CHECK: ret %struct.float64x1x2_t [[TMP6]] 10540float64x1x2_t test_vld2_f64(float64_t const *a) { 10541 return vld2_f64(a); 10542} 10543 10544// CHECK-LABEL: define %struct.poly8x8x2_t @test_vld2_p8(i8* %a) #0 { 10545// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8 10546// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8 10547// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8* 10548// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>* 10549// CHECK: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* [[TMP1]]) 10550// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }* 10551// CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP2]] 10552// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL]] to i8* 10553// CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8* 10554// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) 10555// CHECK: [[TMP5:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8 10556// CHECK: ret %struct.poly8x8x2_t [[TMP5]] 10557poly8x8x2_t test_vld2_p8(poly8_t const *a) { 10558 return vld2_p8(a); 10559} 10560 10561// CHECK-LABEL: define %struct.poly16x4x2_t @test_vld2_p16(i16* %a) #0 { 10562// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8 10563// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8 10564// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* 10565// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 10566// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>* 10567// CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0v4i16(<4 x i16>* [[TMP2]]) 10568// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* 10569// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]] 10570// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL]] to i8* 10571// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* 10572// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 10573// CHECK: [[TMP6:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8 10574// CHECK: ret %struct.poly16x4x2_t [[TMP6]] 10575poly16x4x2_t test_vld2_p16(poly16_t const *a) { 10576 return vld2_p16(a); 10577} 10578 10579// CHECK-LABEL: define %struct.uint8x16x3_t @test_vld3q_u8(i8* %a) #0 { 10580// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x3_t, align 16 10581// CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 16 10582// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8* 10583// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>* 10584// CHECK: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>* [[TMP1]]) 10585// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }* 10586// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]] 10587// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x3_t* [[RETVAL]] to i8* 10588// CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8* 10589// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 48, i32 16, i1 false) 10590// CHECK: [[TMP5:%.*]] = load %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[RETVAL]], align 16 10591// CHECK: ret %struct.uint8x16x3_t [[TMP5]] 10592uint8x16x3_t test_vld3q_u8(uint8_t const *a) { 10593 return vld3q_u8(a); 10594} 10595 10596// CHECK-LABEL: define %struct.uint16x8x3_t @test_vld3q_u16(i16* %a) #0 { 10597// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x3_t, align 16 10598// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16 10599// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8* 10600// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 10601// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>* 10602// CHECK: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0v8i16(<8 x i16>* [[TMP2]]) 10603// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }* 10604// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] 10605// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x3_t* [[RETVAL]] to i8* 10606// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8* 10607// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 10608// CHECK: [[TMP6:%.*]] = load %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[RETVAL]], align 16 10609// CHECK: ret %struct.uint16x8x3_t [[TMP6]] 10610uint16x8x3_t test_vld3q_u16(uint16_t const *a) { 10611 return vld3q_u16(a); 10612} 10613 10614// CHECK-LABEL: define %struct.uint32x4x3_t @test_vld3q_u32(i32* %a) #0 { 10615// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x3_t, align 16 10616// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16 10617// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8* 10618// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 10619// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>* 10620// CHECK: [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0v4i32(<4 x i32>* [[TMP2]]) 10621// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }* 10622// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]] 10623// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x3_t* [[RETVAL]] to i8* 10624// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8* 10625// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 10626// CHECK: [[TMP6:%.*]] = load %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[RETVAL]], align 16 10627// CHECK: ret %struct.uint32x4x3_t [[TMP6]] 10628uint32x4x3_t test_vld3q_u32(uint32_t const *a) { 10629 return vld3q_u32(a); 10630} 10631 10632// CHECK-LABEL: define %struct.uint64x2x3_t @test_vld3q_u64(i64* %a) #0 { 10633// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x3_t, align 16 10634// CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x3_t, align 16 10635// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8* 10636// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 10637// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>* 10638// CHECK: [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0v2i64(<2 x i64>* [[TMP2]]) 10639// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }* 10640// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] 10641// CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x3_t* [[RETVAL]] to i8* 10642// CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8* 10643// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 10644// CHECK: [[TMP6:%.*]] = load %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[RETVAL]], align 16 10645// CHECK: ret %struct.uint64x2x3_t [[TMP6]] 10646uint64x2x3_t test_vld3q_u64(uint64_t const *a) { 10647 return vld3q_u64(a); 10648} 10649 10650// CHECK-LABEL: define %struct.int8x16x3_t @test_vld3q_s8(i8* %a) #0 { 10651// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x3_t, align 16 10652// CHECK: [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 16 10653// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8* 10654// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>* 10655// CHECK: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>* [[TMP1]]) 10656// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }* 10657// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]] 10658// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x3_t* [[RETVAL]] to i8* 10659// CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8* 10660// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 48, i32 16, i1 false) 10661// CHECK: [[TMP5:%.*]] = load %struct.int8x16x3_t, %struct.int8x16x3_t* [[RETVAL]], align 16 10662// CHECK: ret %struct.int8x16x3_t [[TMP5]] 10663int8x16x3_t test_vld3q_s8(int8_t const *a) { 10664 return vld3q_s8(a); 10665} 10666 10667// CHECK-LABEL: define %struct.int16x8x3_t @test_vld3q_s16(i16* %a) #0 { 10668// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x3_t, align 16 10669// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16 10670// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8* 10671// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 10672// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>* 10673// CHECK: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0v8i16(<8 x i16>* [[TMP2]]) 10674// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }* 10675// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] 10676// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x3_t* [[RETVAL]] to i8* 10677// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8* 10678// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 10679// CHECK: [[TMP6:%.*]] = load %struct.int16x8x3_t, %struct.int16x8x3_t* [[RETVAL]], align 16 10680// CHECK: ret %struct.int16x8x3_t [[TMP6]] 10681int16x8x3_t test_vld3q_s16(int16_t const *a) { 10682 return vld3q_s16(a); 10683} 10684 10685// CHECK-LABEL: define %struct.int32x4x3_t @test_vld3q_s32(i32* %a) #0 { 10686// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x3_t, align 16 10687// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16 10688// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8* 10689// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 10690// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>* 10691// CHECK: [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0v4i32(<4 x i32>* [[TMP2]]) 10692// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }* 10693// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]] 10694// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x3_t* [[RETVAL]] to i8* 10695// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8* 10696// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 10697// CHECK: [[TMP6:%.*]] = load %struct.int32x4x3_t, %struct.int32x4x3_t* [[RETVAL]], align 16 10698// CHECK: ret %struct.int32x4x3_t [[TMP6]] 10699int32x4x3_t test_vld3q_s32(int32_t const *a) { 10700 return vld3q_s32(a); 10701} 10702 10703// CHECK-LABEL: define %struct.int64x2x3_t @test_vld3q_s64(i64* %a) #0 { 10704// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x3_t, align 16 10705// CHECK: [[__RET:%.*]] = alloca %struct.int64x2x3_t, align 16 10706// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8* 10707// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 10708// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>* 10709// CHECK: [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0v2i64(<2 x i64>* [[TMP2]]) 10710// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }* 10711// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] 10712// CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x3_t* [[RETVAL]] to i8* 10713// CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8* 10714// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 10715// CHECK: [[TMP6:%.*]] = load %struct.int64x2x3_t, %struct.int64x2x3_t* [[RETVAL]], align 16 10716// CHECK: ret %struct.int64x2x3_t [[TMP6]] 10717int64x2x3_t test_vld3q_s64(int64_t const *a) { 10718 return vld3q_s64(a); 10719} 10720 10721// CHECK-LABEL: define %struct.float16x8x3_t @test_vld3q_f16(half* %a) #0 { 10722// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x3_t, align 16 10723// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16 10724// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8* 10725// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 10726// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>* 10727// CHECK: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0v8i16(<8 x i16>* [[TMP2]]) 10728// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }* 10729// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] 10730// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x3_t* [[RETVAL]] to i8* 10731// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8* 10732// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 10733// CHECK: [[TMP6:%.*]] = load %struct.float16x8x3_t, %struct.float16x8x3_t* [[RETVAL]], align 16 10734// CHECK: ret %struct.float16x8x3_t [[TMP6]] 10735float16x8x3_t test_vld3q_f16(float16_t const *a) { 10736 return vld3q_f16(a); 10737} 10738 10739// CHECK-LABEL: define %struct.float32x4x3_t @test_vld3q_f32(float* %a) #0 { 10740// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x3_t, align 16 10741// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16 10742// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8* 10743// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 10744// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x float>* 10745// CHECK: [[VLD3:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0v4f32(<4 x float>* [[TMP2]]) 10746// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float> }* 10747// CHECK: store { <4 x float>, <4 x float>, <4 x float> } [[VLD3]], { <4 x float>, <4 x float>, <4 x float> }* [[TMP3]] 10748// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x3_t* [[RETVAL]] to i8* 10749// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8* 10750// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 10751// CHECK: [[TMP6:%.*]] = load %struct.float32x4x3_t, %struct.float32x4x3_t* [[RETVAL]], align 16 10752// CHECK: ret %struct.float32x4x3_t [[TMP6]] 10753float32x4x3_t test_vld3q_f32(float32_t const *a) { 10754 return vld3q_f32(a); 10755} 10756 10757// CHECK-LABEL: define %struct.float64x2x3_t @test_vld3q_f64(double* %a) #0 { 10758// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16 10759// CHECK: [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16 10760// CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8* 10761// CHECK: [[TMP1:%.*]] = bitcast double* %a to i8* 10762// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x double>* 10763// CHECK: [[VLD3:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0v2f64(<2 x double>* [[TMP2]]) 10764// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double>, <2 x double> }* 10765// CHECK: store { <2 x double>, <2 x double>, <2 x double> } [[VLD3]], { <2 x double>, <2 x double>, <2 x double> }* [[TMP3]] 10766// CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x3_t* [[RETVAL]] to i8* 10767// CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8* 10768// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 10769// CHECK: [[TMP6:%.*]] = load %struct.float64x2x3_t, %struct.float64x2x3_t* [[RETVAL]], align 16 10770// CHECK: ret %struct.float64x2x3_t [[TMP6]] 10771float64x2x3_t test_vld3q_f64(float64_t const *a) { 10772 return vld3q_f64(a); 10773} 10774 10775// CHECK-LABEL: define %struct.poly8x16x3_t @test_vld3q_p8(i8* %a) #0 { 10776// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x3_t, align 16 10777// CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 16 10778// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8* 10779// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>* 10780// CHECK: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>* [[TMP1]]) 10781// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }* 10782// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]] 10783// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x3_t* [[RETVAL]] to i8* 10784// CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8* 10785// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 48, i32 16, i1 false) 10786// CHECK: [[TMP5:%.*]] = load %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[RETVAL]], align 16 10787// CHECK: ret %struct.poly8x16x3_t [[TMP5]] 10788poly8x16x3_t test_vld3q_p8(poly8_t const *a) { 10789 return vld3q_p8(a); 10790} 10791 10792// CHECK-LABEL: define %struct.poly16x8x3_t @test_vld3q_p16(i16* %a) #0 { 10793// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x3_t, align 16 10794// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16 10795// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8* 10796// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 10797// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>* 10798// CHECK: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0v8i16(<8 x i16>* [[TMP2]]) 10799// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }* 10800// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] 10801// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x3_t* [[RETVAL]] to i8* 10802// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8* 10803// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 10804// CHECK: [[TMP6:%.*]] = load %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[RETVAL]], align 16 10805// CHECK: ret %struct.poly16x8x3_t [[TMP6]] 10806poly16x8x3_t test_vld3q_p16(poly16_t const *a) { 10807 return vld3q_p16(a); 10808} 10809 10810// CHECK-LABEL: define %struct.uint8x8x3_t @test_vld3_u8(i8* %a) #0 { 10811// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x3_t, align 8 10812// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8 10813// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8* 10814// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>* 10815// CHECK: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>* [[TMP1]]) 10816// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }* 10817// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]] 10818// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x3_t* [[RETVAL]] to i8* 10819// CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8* 10820// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 24, i32 8, i1 false) 10821// CHECK: [[TMP5:%.*]] = load %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[RETVAL]], align 8 10822// CHECK: ret %struct.uint8x8x3_t [[TMP5]] 10823uint8x8x3_t test_vld3_u8(uint8_t const *a) { 10824 return vld3_u8(a); 10825} 10826 10827// CHECK-LABEL: define %struct.uint16x4x3_t @test_vld3_u16(i16* %a) #0 { 10828// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x3_t, align 8 10829// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8 10830// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* 10831// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 10832// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>* 10833// CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0v4i16(<4 x i16>* [[TMP2]]) 10834// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 10835// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] 10836// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x3_t* [[RETVAL]] to i8* 10837// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* 10838// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 10839// CHECK: [[TMP6:%.*]] = load %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[RETVAL]], align 8 10840// CHECK: ret %struct.uint16x4x3_t [[TMP6]] 10841uint16x4x3_t test_vld3_u16(uint16_t const *a) { 10842 return vld3_u16(a); 10843} 10844 10845// CHECK-LABEL: define %struct.uint32x2x3_t @test_vld3_u32(i32* %a) #0 { 10846// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x3_t, align 8 10847// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8 10848// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* 10849// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 10850// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>* 10851// CHECK: [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0v2i32(<2 x i32>* [[TMP2]]) 10852// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }* 10853// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]] 10854// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x3_t* [[RETVAL]] to i8* 10855// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* 10856// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 10857// CHECK: [[TMP6:%.*]] = load %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[RETVAL]], align 8 10858// CHECK: ret %struct.uint32x2x3_t [[TMP6]] 10859uint32x2x3_t test_vld3_u32(uint32_t const *a) { 10860 return vld3_u32(a); 10861} 10862 10863// CHECK-LABEL: define %struct.uint64x1x3_t @test_vld3_u64(i64* %a) #0 { 10864// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x3_t, align 8 10865// CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8 10866// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8* 10867// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 10868// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>* 10869// CHECK: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0v1i64(<1 x i64>* [[TMP2]]) 10870// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }* 10871// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] 10872// CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x3_t* [[RETVAL]] to i8* 10873// CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8* 10874// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 10875// CHECK: [[TMP6:%.*]] = load %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[RETVAL]], align 8 10876// CHECK: ret %struct.uint64x1x3_t [[TMP6]] 10877uint64x1x3_t test_vld3_u64(uint64_t const *a) { 10878 return vld3_u64(a); 10879} 10880 10881// CHECK-LABEL: define %struct.int8x8x3_t @test_vld3_s8(i8* %a) #0 { 10882// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x3_t, align 8 10883// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8 10884// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8* 10885// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>* 10886// CHECK: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>* [[TMP1]]) 10887// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }* 10888// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]] 10889// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x3_t* [[RETVAL]] to i8* 10890// CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8* 10891// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 24, i32 8, i1 false) 10892// CHECK: [[TMP5:%.*]] = load %struct.int8x8x3_t, %struct.int8x8x3_t* [[RETVAL]], align 8 10893// CHECK: ret %struct.int8x8x3_t [[TMP5]] 10894int8x8x3_t test_vld3_s8(int8_t const *a) { 10895 return vld3_s8(a); 10896} 10897 10898// CHECK-LABEL: define %struct.int16x4x3_t @test_vld3_s16(i16* %a) #0 { 10899// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x3_t, align 8 10900// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8 10901// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* 10902// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 10903// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>* 10904// CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0v4i16(<4 x i16>* [[TMP2]]) 10905// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 10906// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] 10907// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x3_t* [[RETVAL]] to i8* 10908// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* 10909// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 10910// CHECK: [[TMP6:%.*]] = load %struct.int16x4x3_t, %struct.int16x4x3_t* [[RETVAL]], align 8 10911// CHECK: ret %struct.int16x4x3_t [[TMP6]] 10912int16x4x3_t test_vld3_s16(int16_t const *a) { 10913 return vld3_s16(a); 10914} 10915 10916// CHECK-LABEL: define %struct.int32x2x3_t @test_vld3_s32(i32* %a) #0 { 10917// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x3_t, align 8 10918// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8 10919// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* 10920// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 10921// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>* 10922// CHECK: [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0v2i32(<2 x i32>* [[TMP2]]) 10923// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }* 10924// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]] 10925// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x3_t* [[RETVAL]] to i8* 10926// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* 10927// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 10928// CHECK: [[TMP6:%.*]] = load %struct.int32x2x3_t, %struct.int32x2x3_t* [[RETVAL]], align 8 10929// CHECK: ret %struct.int32x2x3_t [[TMP6]] 10930int32x2x3_t test_vld3_s32(int32_t const *a) { 10931 return vld3_s32(a); 10932} 10933 10934// CHECK-LABEL: define %struct.int64x1x3_t @test_vld3_s64(i64* %a) #0 { 10935// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x3_t, align 8 10936// CHECK: [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8 10937// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8* 10938// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 10939// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>* 10940// CHECK: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0v1i64(<1 x i64>* [[TMP2]]) 10941// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }* 10942// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] 10943// CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x3_t* [[RETVAL]] to i8* 10944// CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8* 10945// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 10946// CHECK: [[TMP6:%.*]] = load %struct.int64x1x3_t, %struct.int64x1x3_t* [[RETVAL]], align 8 10947// CHECK: ret %struct.int64x1x3_t [[TMP6]] 10948int64x1x3_t test_vld3_s64(int64_t const *a) { 10949 return vld3_s64(a); 10950} 10951 10952// CHECK-LABEL: define %struct.float16x4x3_t @test_vld3_f16(half* %a) #0 { 10953// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x3_t, align 8 10954// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8 10955// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* 10956// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 10957// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>* 10958// CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0v4i16(<4 x i16>* [[TMP2]]) 10959// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 10960// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] 10961// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x3_t* [[RETVAL]] to i8* 10962// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* 10963// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 10964// CHECK: [[TMP6:%.*]] = load %struct.float16x4x3_t, %struct.float16x4x3_t* [[RETVAL]], align 8 10965// CHECK: ret %struct.float16x4x3_t [[TMP6]] 10966float16x4x3_t test_vld3_f16(float16_t const *a) { 10967 return vld3_f16(a); 10968} 10969 10970// CHECK-LABEL: define %struct.float32x2x3_t @test_vld3_f32(float* %a) #0 { 10971// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x3_t, align 8 10972// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8 10973// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* 10974// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 10975// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x float>* 10976// CHECK: [[VLD3:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0v2f32(<2 x float>* [[TMP2]]) 10977// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float> }* 10978// CHECK: store { <2 x float>, <2 x float>, <2 x float> } [[VLD3]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP3]] 10979// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x3_t* [[RETVAL]] to i8* 10980// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* 10981// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 10982// CHECK: [[TMP6:%.*]] = load %struct.float32x2x3_t, %struct.float32x2x3_t* [[RETVAL]], align 8 10983// CHECK: ret %struct.float32x2x3_t [[TMP6]] 10984float32x2x3_t test_vld3_f32(float32_t const *a) { 10985 return vld3_f32(a); 10986} 10987 10988// CHECK-LABEL: define %struct.float64x1x3_t @test_vld3_f64(double* %a) #0 { 10989// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8 10990// CHECK: [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8 10991// CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8* 10992// CHECK: [[TMP1:%.*]] = bitcast double* %a to i8* 10993// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x double>* 10994// CHECK: [[VLD3:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0v1f64(<1 x double>* [[TMP2]]) 10995// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double>, <1 x double> }* 10996// CHECK: store { <1 x double>, <1 x double>, <1 x double> } [[VLD3]], { <1 x double>, <1 x double>, <1 x double> }* [[TMP3]] 10997// CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x3_t* [[RETVAL]] to i8* 10998// CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8* 10999// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 11000// CHECK: [[TMP6:%.*]] = load %struct.float64x1x3_t, %struct.float64x1x3_t* [[RETVAL]], align 8 11001// CHECK: ret %struct.float64x1x3_t [[TMP6]] 11002float64x1x3_t test_vld3_f64(float64_t const *a) { 11003 return vld3_f64(a); 11004} 11005 11006// CHECK-LABEL: define %struct.poly8x8x3_t @test_vld3_p8(i8* %a) #0 { 11007// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x3_t, align 8 11008// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8 11009// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8* 11010// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>* 11011// CHECK: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>* [[TMP1]]) 11012// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }* 11013// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]] 11014// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x3_t* [[RETVAL]] to i8* 11015// CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8* 11016// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 24, i32 8, i1 false) 11017// CHECK: [[TMP5:%.*]] = load %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[RETVAL]], align 8 11018// CHECK: ret %struct.poly8x8x3_t [[TMP5]] 11019poly8x8x3_t test_vld3_p8(poly8_t const *a) { 11020 return vld3_p8(a); 11021} 11022 11023// CHECK-LABEL: define %struct.poly16x4x3_t @test_vld3_p16(i16* %a) #0 { 11024// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x3_t, align 8 11025// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8 11026// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* 11027// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 11028// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>* 11029// CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0v4i16(<4 x i16>* [[TMP2]]) 11030// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 11031// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] 11032// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x3_t* [[RETVAL]] to i8* 11033// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* 11034// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 11035// CHECK: [[TMP6:%.*]] = load %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[RETVAL]], align 8 11036// CHECK: ret %struct.poly16x4x3_t [[TMP6]] 11037poly16x4x3_t test_vld3_p16(poly16_t const *a) { 11038 return vld3_p16(a); 11039} 11040 11041// CHECK-LABEL: define %struct.uint8x16x4_t @test_vld4q_u8(i8* %a) #0 { 11042// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x4_t, align 16 11043// CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 16 11044// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8* 11045// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>* 11046// CHECK: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>* [[TMP1]]) 11047// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* 11048// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]] 11049// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x4_t* [[RETVAL]] to i8* 11050// CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8* 11051// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 64, i32 16, i1 false) 11052// CHECK: [[TMP5:%.*]] = load %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[RETVAL]], align 16 11053// CHECK: ret %struct.uint8x16x4_t [[TMP5]] 11054uint8x16x4_t test_vld4q_u8(uint8_t const *a) { 11055 return vld4q_u8(a); 11056} 11057 11058// CHECK-LABEL: define %struct.uint16x8x4_t @test_vld4q_u16(i16* %a) #0 { 11059// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x4_t, align 16 11060// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16 11061// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8* 11062// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 11063// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>* 11064// CHECK: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0v8i16(<8 x i16>* [[TMP2]]) 11065// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* 11066// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] 11067// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x4_t* [[RETVAL]] to i8* 11068// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8* 11069// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 11070// CHECK: [[TMP6:%.*]] = load %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[RETVAL]], align 16 11071// CHECK: ret %struct.uint16x8x4_t [[TMP6]] 11072uint16x8x4_t test_vld4q_u16(uint16_t const *a) { 11073 return vld4q_u16(a); 11074} 11075 11076// CHECK-LABEL: define %struct.uint32x4x4_t @test_vld4q_u32(i32* %a) #0 { 11077// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x4_t, align 16 11078// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16 11079// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8* 11080// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 11081// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>* 11082// CHECK: [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0v4i32(<4 x i32>* [[TMP2]]) 11083// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* 11084// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]] 11085// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x4_t* [[RETVAL]] to i8* 11086// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8* 11087// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 11088// CHECK: [[TMP6:%.*]] = load %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[RETVAL]], align 16 11089// CHECK: ret %struct.uint32x4x4_t [[TMP6]] 11090uint32x4x4_t test_vld4q_u32(uint32_t const *a) { 11091 return vld4q_u32(a); 11092} 11093 11094// CHECK-LABEL: define %struct.uint64x2x4_t @test_vld4q_u64(i64* %a) #0 { 11095// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x4_t, align 16 11096// CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x4_t, align 16 11097// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8* 11098// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 11099// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>* 11100// CHECK: [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0v2i64(<2 x i64>* [[TMP2]]) 11101// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* 11102// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] 11103// CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x4_t* [[RETVAL]] to i8* 11104// CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8* 11105// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 11106// CHECK: [[TMP6:%.*]] = load %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[RETVAL]], align 16 11107// CHECK: ret %struct.uint64x2x4_t [[TMP6]] 11108uint64x2x4_t test_vld4q_u64(uint64_t const *a) { 11109 return vld4q_u64(a); 11110} 11111 11112// CHECK-LABEL: define %struct.int8x16x4_t @test_vld4q_s8(i8* %a) #0 { 11113// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x4_t, align 16 11114// CHECK: [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 16 11115// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8* 11116// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>* 11117// CHECK: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>* [[TMP1]]) 11118// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* 11119// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]] 11120// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x4_t* [[RETVAL]] to i8* 11121// CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8* 11122// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 64, i32 16, i1 false) 11123// CHECK: [[TMP5:%.*]] = load %struct.int8x16x4_t, %struct.int8x16x4_t* [[RETVAL]], align 16 11124// CHECK: ret %struct.int8x16x4_t [[TMP5]] 11125int8x16x4_t test_vld4q_s8(int8_t const *a) { 11126 return vld4q_s8(a); 11127} 11128 11129// CHECK-LABEL: define %struct.int16x8x4_t @test_vld4q_s16(i16* %a) #0 { 11130// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x4_t, align 16 11131// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16 11132// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8* 11133// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 11134// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>* 11135// CHECK: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0v8i16(<8 x i16>* [[TMP2]]) 11136// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* 11137// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] 11138// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x4_t* [[RETVAL]] to i8* 11139// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8* 11140// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 11141// CHECK: [[TMP6:%.*]] = load %struct.int16x8x4_t, %struct.int16x8x4_t* [[RETVAL]], align 16 11142// CHECK: ret %struct.int16x8x4_t [[TMP6]] 11143int16x8x4_t test_vld4q_s16(int16_t const *a) { 11144 return vld4q_s16(a); 11145} 11146 11147// CHECK-LABEL: define %struct.int32x4x4_t @test_vld4q_s32(i32* %a) #0 { 11148// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x4_t, align 16 11149// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16 11150// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8* 11151// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 11152// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>* 11153// CHECK: [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0v4i32(<4 x i32>* [[TMP2]]) 11154// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* 11155// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]] 11156// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x4_t* [[RETVAL]] to i8* 11157// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8* 11158// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 11159// CHECK: [[TMP6:%.*]] = load %struct.int32x4x4_t, %struct.int32x4x4_t* [[RETVAL]], align 16 11160// CHECK: ret %struct.int32x4x4_t [[TMP6]] 11161int32x4x4_t test_vld4q_s32(int32_t const *a) { 11162 return vld4q_s32(a); 11163} 11164 11165// CHECK-LABEL: define %struct.int64x2x4_t @test_vld4q_s64(i64* %a) #0 { 11166// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x4_t, align 16 11167// CHECK: [[__RET:%.*]] = alloca %struct.int64x2x4_t, align 16 11168// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8* 11169// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 11170// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>* 11171// CHECK: [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0v2i64(<2 x i64>* [[TMP2]]) 11172// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* 11173// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] 11174// CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x4_t* [[RETVAL]] to i8* 11175// CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8* 11176// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 11177// CHECK: [[TMP6:%.*]] = load %struct.int64x2x4_t, %struct.int64x2x4_t* [[RETVAL]], align 16 11178// CHECK: ret %struct.int64x2x4_t [[TMP6]] 11179int64x2x4_t test_vld4q_s64(int64_t const *a) { 11180 return vld4q_s64(a); 11181} 11182 11183// CHECK-LABEL: define %struct.float16x8x4_t @test_vld4q_f16(half* %a) #0 { 11184// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x4_t, align 16 11185// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16 11186// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8* 11187// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 11188// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>* 11189// CHECK: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0v8i16(<8 x i16>* [[TMP2]]) 11190// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* 11191// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] 11192// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x4_t* [[RETVAL]] to i8* 11193// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8* 11194// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 11195// CHECK: [[TMP6:%.*]] = load %struct.float16x8x4_t, %struct.float16x8x4_t* [[RETVAL]], align 16 11196// CHECK: ret %struct.float16x8x4_t [[TMP6]] 11197float16x8x4_t test_vld4q_f16(float16_t const *a) { 11198 return vld4q_f16(a); 11199} 11200 11201// CHECK-LABEL: define %struct.float32x4x4_t @test_vld4q_f32(float* %a) #0 { 11202// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x4_t, align 16 11203// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16 11204// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8* 11205// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 11206// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x float>* 11207// CHECK: [[VLD4:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0v4f32(<4 x float>* [[TMP2]]) 11208// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* 11209// CHECK: store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4]], { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* [[TMP3]] 11210// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x4_t* [[RETVAL]] to i8* 11211// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8* 11212// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 11213// CHECK: [[TMP6:%.*]] = load %struct.float32x4x4_t, %struct.float32x4x4_t* [[RETVAL]], align 16 11214// CHECK: ret %struct.float32x4x4_t [[TMP6]] 11215float32x4x4_t test_vld4q_f32(float32_t const *a) { 11216 return vld4q_f32(a); 11217} 11218 11219// CHECK-LABEL: define %struct.float64x2x4_t @test_vld4q_f64(double* %a) #0 { 11220// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16 11221// CHECK: [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16 11222// CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8* 11223// CHECK: [[TMP1:%.*]] = bitcast double* %a to i8* 11224// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x double>* 11225// CHECK: [[VLD4:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0v2f64(<2 x double>* [[TMP2]]) 11226// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double>, <2 x double>, <2 x double> }* 11227// CHECK: store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD4]], { <2 x double>, <2 x double>, <2 x double>, <2 x double> }* [[TMP3]] 11228// CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x4_t* [[RETVAL]] to i8* 11229// CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8* 11230// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 11231// CHECK: [[TMP6:%.*]] = load %struct.float64x2x4_t, %struct.float64x2x4_t* [[RETVAL]], align 16 11232// CHECK: ret %struct.float64x2x4_t [[TMP6]] 11233float64x2x4_t test_vld4q_f64(float64_t const *a) { 11234 return vld4q_f64(a); 11235} 11236 11237// CHECK-LABEL: define %struct.poly8x16x4_t @test_vld4q_p8(i8* %a) #0 { 11238// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x4_t, align 16 11239// CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 16 11240// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8* 11241// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>* 11242// CHECK: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>* [[TMP1]]) 11243// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* 11244// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]] 11245// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x4_t* [[RETVAL]] to i8* 11246// CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8* 11247// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 64, i32 16, i1 false) 11248// CHECK: [[TMP5:%.*]] = load %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[RETVAL]], align 16 11249// CHECK: ret %struct.poly8x16x4_t [[TMP5]] 11250poly8x16x4_t test_vld4q_p8(poly8_t const *a) { 11251 return vld4q_p8(a); 11252} 11253 11254// CHECK-LABEL: define %struct.poly16x8x4_t @test_vld4q_p16(i16* %a) #0 { 11255// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x4_t, align 16 11256// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16 11257// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8* 11258// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 11259// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>* 11260// CHECK: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0v8i16(<8 x i16>* [[TMP2]]) 11261// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* 11262// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] 11263// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x4_t* [[RETVAL]] to i8* 11264// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8* 11265// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 11266// CHECK: [[TMP6:%.*]] = load %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[RETVAL]], align 16 11267// CHECK: ret %struct.poly16x8x4_t [[TMP6]] 11268poly16x8x4_t test_vld4q_p16(poly16_t const *a) { 11269 return vld4q_p16(a); 11270} 11271 11272// CHECK-LABEL: define %struct.uint8x8x4_t @test_vld4_u8(i8* %a) #0 { 11273// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x4_t, align 8 11274// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8 11275// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* 11276// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>* 11277// CHECK: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0v8i8(<8 x i8>* [[TMP1]]) 11278// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* 11279// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]] 11280// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x4_t* [[RETVAL]] to i8* 11281// CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* 11282// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 8, i1 false) 11283// CHECK: [[TMP5:%.*]] = load %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[RETVAL]], align 8 11284// CHECK: ret %struct.uint8x8x4_t [[TMP5]] 11285uint8x8x4_t test_vld4_u8(uint8_t const *a) { 11286 return vld4_u8(a); 11287} 11288 11289// CHECK-LABEL: define %struct.uint16x4x4_t @test_vld4_u16(i16* %a) #0 { 11290// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x4_t, align 8 11291// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8 11292// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* 11293// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 11294// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>* 11295// CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* [[TMP2]]) 11296// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 11297// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] 11298// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x4_t* [[RETVAL]] to i8* 11299// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* 11300// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 11301// CHECK: [[TMP6:%.*]] = load %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[RETVAL]], align 8 11302// CHECK: ret %struct.uint16x4x4_t [[TMP6]] 11303uint16x4x4_t test_vld4_u16(uint16_t const *a) { 11304 return vld4_u16(a); 11305} 11306 11307// CHECK-LABEL: define %struct.uint32x2x4_t @test_vld4_u32(i32* %a) #0 { 11308// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x4_t, align 8 11309// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8 11310// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* 11311// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 11312// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>* 11313// CHECK: [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0v2i32(<2 x i32>* [[TMP2]]) 11314// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* 11315// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]] 11316// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x4_t* [[RETVAL]] to i8* 11317// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* 11318// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 11319// CHECK: [[TMP6:%.*]] = load %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[RETVAL]], align 8 11320// CHECK: ret %struct.uint32x2x4_t [[TMP6]] 11321uint32x2x4_t test_vld4_u32(uint32_t const *a) { 11322 return vld4_u32(a); 11323} 11324 11325// CHECK-LABEL: define %struct.uint64x1x4_t @test_vld4_u64(i64* %a) #0 { 11326// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x4_t, align 8 11327// CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8 11328// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8* 11329// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 11330// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>* 11331// CHECK: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0v1i64(<1 x i64>* [[TMP2]]) 11332// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* 11333// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] 11334// CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x4_t* [[RETVAL]] to i8* 11335// CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8* 11336// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 11337// CHECK: [[TMP6:%.*]] = load %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[RETVAL]], align 8 11338// CHECK: ret %struct.uint64x1x4_t [[TMP6]] 11339uint64x1x4_t test_vld4_u64(uint64_t const *a) { 11340 return vld4_u64(a); 11341} 11342 11343// CHECK-LABEL: define %struct.int8x8x4_t @test_vld4_s8(i8* %a) #0 { 11344// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x4_t, align 8 11345// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8 11346// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* 11347// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>* 11348// CHECK: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0v8i8(<8 x i8>* [[TMP1]]) 11349// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* 11350// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]] 11351// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x4_t* [[RETVAL]] to i8* 11352// CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* 11353// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 8, i1 false) 11354// CHECK: [[TMP5:%.*]] = load %struct.int8x8x4_t, %struct.int8x8x4_t* [[RETVAL]], align 8 11355// CHECK: ret %struct.int8x8x4_t [[TMP5]] 11356int8x8x4_t test_vld4_s8(int8_t const *a) { 11357 return vld4_s8(a); 11358} 11359 11360// CHECK-LABEL: define %struct.int16x4x4_t @test_vld4_s16(i16* %a) #0 { 11361// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x4_t, align 8 11362// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8 11363// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* 11364// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 11365// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>* 11366// CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* [[TMP2]]) 11367// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 11368// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] 11369// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x4_t* [[RETVAL]] to i8* 11370// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* 11371// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 11372// CHECK: [[TMP6:%.*]] = load %struct.int16x4x4_t, %struct.int16x4x4_t* [[RETVAL]], align 8 11373// CHECK: ret %struct.int16x4x4_t [[TMP6]] 11374int16x4x4_t test_vld4_s16(int16_t const *a) { 11375 return vld4_s16(a); 11376} 11377 11378// CHECK-LABEL: define %struct.int32x2x4_t @test_vld4_s32(i32* %a) #0 { 11379// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x4_t, align 8 11380// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8 11381// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* 11382// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 11383// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>* 11384// CHECK: [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0v2i32(<2 x i32>* [[TMP2]]) 11385// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* 11386// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]] 11387// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x4_t* [[RETVAL]] to i8* 11388// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* 11389// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 11390// CHECK: [[TMP6:%.*]] = load %struct.int32x2x4_t, %struct.int32x2x4_t* [[RETVAL]], align 8 11391// CHECK: ret %struct.int32x2x4_t [[TMP6]] 11392int32x2x4_t test_vld4_s32(int32_t const *a) { 11393 return vld4_s32(a); 11394} 11395 11396// CHECK-LABEL: define %struct.int64x1x4_t @test_vld4_s64(i64* %a) #0 { 11397// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x4_t, align 8 11398// CHECK: [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8 11399// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8* 11400// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 11401// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>* 11402// CHECK: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0v1i64(<1 x i64>* [[TMP2]]) 11403// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* 11404// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] 11405// CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x4_t* [[RETVAL]] to i8* 11406// CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8* 11407// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 11408// CHECK: [[TMP6:%.*]] = load %struct.int64x1x4_t, %struct.int64x1x4_t* [[RETVAL]], align 8 11409// CHECK: ret %struct.int64x1x4_t [[TMP6]] 11410int64x1x4_t test_vld4_s64(int64_t const *a) { 11411 return vld4_s64(a); 11412} 11413 11414// CHECK-LABEL: define %struct.float16x4x4_t @test_vld4_f16(half* %a) #0 { 11415// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x4_t, align 8 11416// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8 11417// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* 11418// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 11419// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>* 11420// CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* [[TMP2]]) 11421// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 11422// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] 11423// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x4_t* [[RETVAL]] to i8* 11424// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* 11425// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 11426// CHECK: [[TMP6:%.*]] = load %struct.float16x4x4_t, %struct.float16x4x4_t* [[RETVAL]], align 8 11427// CHECK: ret %struct.float16x4x4_t [[TMP6]] 11428float16x4x4_t test_vld4_f16(float16_t const *a) { 11429 return vld4_f16(a); 11430} 11431 11432// CHECK-LABEL: define %struct.float32x2x4_t @test_vld4_f32(float* %a) #0 { 11433// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x4_t, align 8 11434// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8 11435// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* 11436// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 11437// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x float>* 11438// CHECK: [[VLD4:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0v2f32(<2 x float>* [[TMP2]]) 11439// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* 11440// CHECK: store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP3]] 11441// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x4_t* [[RETVAL]] to i8* 11442// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* 11443// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 11444// CHECK: [[TMP6:%.*]] = load %struct.float32x2x4_t, %struct.float32x2x4_t* [[RETVAL]], align 8 11445// CHECK: ret %struct.float32x2x4_t [[TMP6]] 11446float32x2x4_t test_vld4_f32(float32_t const *a) { 11447 return vld4_f32(a); 11448} 11449 11450// CHECK-LABEL: define %struct.float64x1x4_t @test_vld4_f64(double* %a) #0 { 11451// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8 11452// CHECK: [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8 11453// CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8* 11454// CHECK: [[TMP1:%.*]] = bitcast double* %a to i8* 11455// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x double>* 11456// CHECK: [[VLD4:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0v1f64(<1 x double>* [[TMP2]]) 11457// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double>, <1 x double>, <1 x double> }* 11458// CHECK: store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD4]], { <1 x double>, <1 x double>, <1 x double>, <1 x double> }* [[TMP3]] 11459// CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x4_t* [[RETVAL]] to i8* 11460// CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8* 11461// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 11462// CHECK: [[TMP6:%.*]] = load %struct.float64x1x4_t, %struct.float64x1x4_t* [[RETVAL]], align 8 11463// CHECK: ret %struct.float64x1x4_t [[TMP6]] 11464float64x1x4_t test_vld4_f64(float64_t const *a) { 11465 return vld4_f64(a); 11466} 11467 11468// CHECK-LABEL: define %struct.poly8x8x4_t @test_vld4_p8(i8* %a) #0 { 11469// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x4_t, align 8 11470// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8 11471// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* 11472// CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>* 11473// CHECK: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0v8i8(<8 x i8>* [[TMP1]]) 11474// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* 11475// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]] 11476// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x4_t* [[RETVAL]] to i8* 11477// CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* 11478// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 8, i1 false) 11479// CHECK: [[TMP5:%.*]] = load %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[RETVAL]], align 8 11480// CHECK: ret %struct.poly8x8x4_t [[TMP5]] 11481poly8x8x4_t test_vld4_p8(poly8_t const *a) { 11482 return vld4_p8(a); 11483} 11484 11485// CHECK-LABEL: define %struct.poly16x4x4_t @test_vld4_p16(i16* %a) #0 { 11486// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x4_t, align 8 11487// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8 11488// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* 11489// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 11490// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>* 11491// CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* [[TMP2]]) 11492// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 11493// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] 11494// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x4_t* [[RETVAL]] to i8* 11495// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* 11496// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 11497// CHECK: [[TMP6:%.*]] = load %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[RETVAL]], align 8 11498// CHECK: ret %struct.poly16x4x4_t [[TMP6]] 11499poly16x4x4_t test_vld4_p16(poly16_t const *a) { 11500 return vld4_p16(a); 11501} 11502 11503// CHECK-LABEL: define void @test_vst1q_u8(i8* %a, <16 x i8> %b) #0 { 11504// CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>* 11505// CHECK: store <16 x i8> %b, <16 x i8>* [[TMP0]] 11506// CHECK: ret void 11507void test_vst1q_u8(uint8_t *a, uint8x16_t b) { 11508 vst1q_u8(a, b); 11509} 11510 11511// CHECK-LABEL: define void @test_vst1q_u16(i16* %a, <8 x i16> %b) #0 { 11512// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 11513// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 11514// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* 11515// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 11516// CHECK: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP2]] 11517// CHECK: ret void 11518void test_vst1q_u16(uint16_t *a, uint16x8_t b) { 11519 vst1q_u16(a, b); 11520} 11521 11522// CHECK-LABEL: define void @test_vst1q_u32(i32* %a, <4 x i32> %b) #0 { 11523// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 11524// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 11525// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* 11526// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 11527// CHECK: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP2]] 11528// CHECK: ret void 11529void test_vst1q_u32(uint32_t *a, uint32x4_t b) { 11530 vst1q_u32(a, b); 11531} 11532 11533// CHECK-LABEL: define void @test_vst1q_u64(i64* %a, <2 x i64> %b) #0 { 11534// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 11535// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 11536// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>* 11537// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 11538// CHECK: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP2]] 11539// CHECK: ret void 11540void test_vst1q_u64(uint64_t *a, uint64x2_t b) { 11541 vst1q_u64(a, b); 11542} 11543 11544// CHECK-LABEL: define void @test_vst1q_s8(i8* %a, <16 x i8> %b) #0 { 11545// CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>* 11546// CHECK: store <16 x i8> %b, <16 x i8>* [[TMP0]] 11547// CHECK: ret void 11548void test_vst1q_s8(int8_t *a, int8x16_t b) { 11549 vst1q_s8(a, b); 11550} 11551 11552// CHECK-LABEL: define void @test_vst1q_s16(i16* %a, <8 x i16> %b) #0 { 11553// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 11554// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 11555// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* 11556// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 11557// CHECK: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP2]] 11558// CHECK: ret void 11559void test_vst1q_s16(int16_t *a, int16x8_t b) { 11560 vst1q_s16(a, b); 11561} 11562 11563// CHECK-LABEL: define void @test_vst1q_s32(i32* %a, <4 x i32> %b) #0 { 11564// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 11565// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 11566// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* 11567// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 11568// CHECK: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP2]] 11569// CHECK: ret void 11570void test_vst1q_s32(int32_t *a, int32x4_t b) { 11571 vst1q_s32(a, b); 11572} 11573 11574// CHECK-LABEL: define void @test_vst1q_s64(i64* %a, <2 x i64> %b) #0 { 11575// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 11576// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 11577// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>* 11578// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 11579// CHECK: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP2]] 11580// CHECK: ret void 11581void test_vst1q_s64(int64_t *a, int64x2_t b) { 11582 vst1q_s64(a, b); 11583} 11584 11585// CHECK-LABEL: define void @test_vst1q_f16(half* %a, <8 x half> %b) #0 { 11586// CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* 11587// CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8> 11588// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* 11589// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 11590// CHECK: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP2]] 11591// CHECK: ret void 11592void test_vst1q_f16(float16_t *a, float16x8_t b) { 11593 vst1q_f16(a, b); 11594} 11595 11596// CHECK-LABEL: define void @test_vst1q_f32(float* %a, <4 x float> %b) #0 { 11597// CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* 11598// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 11599// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x float>* 11600// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 11601// CHECK: store <4 x float> [[TMP3]], <4 x float>* [[TMP2]] 11602// CHECK: ret void 11603void test_vst1q_f32(float32_t *a, float32x4_t b) { 11604 vst1q_f32(a, b); 11605} 11606 11607// CHECK-LABEL: define void @test_vst1q_f64(double* %a, <2 x double> %b) #0 { 11608// CHECK: [[TMP0:%.*]] = bitcast double* %a to i8* 11609// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> 11610// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x double>* 11611// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 11612// CHECK: store <2 x double> [[TMP3]], <2 x double>* [[TMP2]] 11613// CHECK: ret void 11614void test_vst1q_f64(float64_t *a, float64x2_t b) { 11615 vst1q_f64(a, b); 11616} 11617 11618// CHECK-LABEL: define void @test_vst1q_p8(i8* %a, <16 x i8> %b) #0 { 11619// CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>* 11620// CHECK: store <16 x i8> %b, <16 x i8>* [[TMP0]] 11621// CHECK: ret void 11622void test_vst1q_p8(poly8_t *a, poly8x16_t b) { 11623 vst1q_p8(a, b); 11624} 11625 11626// CHECK-LABEL: define void @test_vst1q_p16(i16* %a, <8 x i16> %b) #0 { 11627// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 11628// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 11629// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* 11630// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 11631// CHECK: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP2]] 11632// CHECK: ret void 11633void test_vst1q_p16(poly16_t *a, poly16x8_t b) { 11634 vst1q_p16(a, b); 11635} 11636 11637// CHECK-LABEL: define void @test_vst1_u8(i8* %a, <8 x i8> %b) #0 { 11638// CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>* 11639// CHECK: store <8 x i8> %b, <8 x i8>* [[TMP0]] 11640// CHECK: ret void 11641void test_vst1_u8(uint8_t *a, uint8x8_t b) { 11642 vst1_u8(a, b); 11643} 11644 11645// CHECK-LABEL: define void @test_vst1_u16(i16* %a, <4 x i16> %b) #0 { 11646// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 11647// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 11648// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* 11649// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 11650// CHECK: store <4 x i16> [[TMP3]], <4 x i16>* [[TMP2]] 11651// CHECK: ret void 11652void test_vst1_u16(uint16_t *a, uint16x4_t b) { 11653 vst1_u16(a, b); 11654} 11655 11656// CHECK-LABEL: define void @test_vst1_u32(i32* %a, <2 x i32> %b) #0 { 11657// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 11658// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 11659// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* 11660// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 11661// CHECK: store <2 x i32> [[TMP3]], <2 x i32>* [[TMP2]] 11662// CHECK: ret void 11663void test_vst1_u32(uint32_t *a, uint32x2_t b) { 11664 vst1_u32(a, b); 11665} 11666 11667// CHECK-LABEL: define void @test_vst1_u64(i64* %a, <1 x i64> %b) #0 { 11668// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 11669// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 11670// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>* 11671// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 11672// CHECK: store <1 x i64> [[TMP3]], <1 x i64>* [[TMP2]] 11673// CHECK: ret void 11674void test_vst1_u64(uint64_t *a, uint64x1_t b) { 11675 vst1_u64(a, b); 11676} 11677 11678// CHECK-LABEL: define void @test_vst1_s8(i8* %a, <8 x i8> %b) #0 { 11679// CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>* 11680// CHECK: store <8 x i8> %b, <8 x i8>* [[TMP0]] 11681// CHECK: ret void 11682void test_vst1_s8(int8_t *a, int8x8_t b) { 11683 vst1_s8(a, b); 11684} 11685 11686// CHECK-LABEL: define void @test_vst1_s16(i16* %a, <4 x i16> %b) #0 { 11687// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 11688// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 11689// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* 11690// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 11691// CHECK: store <4 x i16> [[TMP3]], <4 x i16>* [[TMP2]] 11692// CHECK: ret void 11693void test_vst1_s16(int16_t *a, int16x4_t b) { 11694 vst1_s16(a, b); 11695} 11696 11697// CHECK-LABEL: define void @test_vst1_s32(i32* %a, <2 x i32> %b) #0 { 11698// CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 11699// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 11700// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* 11701// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 11702// CHECK: store <2 x i32> [[TMP3]], <2 x i32>* [[TMP2]] 11703// CHECK: ret void 11704void test_vst1_s32(int32_t *a, int32x2_t b) { 11705 vst1_s32(a, b); 11706} 11707 11708// CHECK-LABEL: define void @test_vst1_s64(i64* %a, <1 x i64> %b) #0 { 11709// CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 11710// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 11711// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>* 11712// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 11713// CHECK: store <1 x i64> [[TMP3]], <1 x i64>* [[TMP2]] 11714// CHECK: ret void 11715void test_vst1_s64(int64_t *a, int64x1_t b) { 11716 vst1_s64(a, b); 11717} 11718 11719// CHECK-LABEL: define void @test_vst1_f16(half* %a, <4 x half> %b) #0 { 11720// CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* 11721// CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8> 11722// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* 11723// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 11724// CHECK: store <4 x i16> [[TMP3]], <4 x i16>* [[TMP2]] 11725// CHECK: ret void 11726void test_vst1_f16(float16_t *a, float16x4_t b) { 11727 vst1_f16(a, b); 11728} 11729 11730// CHECK-LABEL: define void @test_vst1_f32(float* %a, <2 x float> %b) #0 { 11731// CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* 11732// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 11733// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x float>* 11734// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 11735// CHECK: store <2 x float> [[TMP3]], <2 x float>* [[TMP2]] 11736// CHECK: ret void 11737void test_vst1_f32(float32_t *a, float32x2_t b) { 11738 vst1_f32(a, b); 11739} 11740 11741// CHECK-LABEL: define void @test_vst1_f64(double* %a, <1 x double> %b) #0 { 11742// CHECK: [[TMP0:%.*]] = bitcast double* %a to i8* 11743// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 11744// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <1 x double>* 11745// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> 11746// CHECK: store <1 x double> [[TMP3]], <1 x double>* [[TMP2]] 11747// CHECK: ret void 11748void test_vst1_f64(float64_t *a, float64x1_t b) { 11749 vst1_f64(a, b); 11750} 11751 11752// CHECK-LABEL: define void @test_vst1_p8(i8* %a, <8 x i8> %b) #0 { 11753// CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>* 11754// CHECK: store <8 x i8> %b, <8 x i8>* [[TMP0]] 11755// CHECK: ret void 11756void test_vst1_p8(poly8_t *a, poly8x8_t b) { 11757 vst1_p8(a, b); 11758} 11759 11760// CHECK-LABEL: define void @test_vst1_p16(i16* %a, <4 x i16> %b) #0 { 11761// CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 11762// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 11763// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* 11764// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 11765// CHECK: store <4 x i16> [[TMP3]], <4 x i16>* [[TMP2]] 11766// CHECK: ret void 11767void test_vst1_p16(poly16_t *a, poly16x4_t b) { 11768 vst1_p16(a, b); 11769} 11770 11771// CHECK-LABEL: define void @test_vst2q_u8(i8* %a, [2 x <16 x i8>] %b.coerce) #0 { 11772// CHECK: [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16 11773// CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16 11774// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0 11775// CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 11776// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__S1]] to i8* 11777// CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x2_t* [[B]] to i8* 11778// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 11779// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0 11780// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 11781// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 11782// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0 11783// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1 11784// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 11785// CHECK: call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a) 11786// CHECK: ret void 11787void test_vst2q_u8(uint8_t *a, uint8x16x2_t b) { 11788 vst2q_u8(a, b); 11789} 11790 11791// CHECK-LABEL: define void @test_vst2q_u16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 { 11792// CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16 11793// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16 11794// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0 11795// CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16 11796// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8* 11797// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8* 11798// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 11799// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 11800// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0 11801// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0 11802// CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 11803// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 11804// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0 11805// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1 11806// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 11807// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 11808// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 11809// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 11810// CHECK: call void @llvm.aarch64.neon.st2.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i8* [[TMP2]]) 11811// CHECK: ret void 11812void test_vst2q_u16(uint16_t *a, uint16x8x2_t b) { 11813 vst2q_u16(a, b); 11814} 11815 11816// CHECK-LABEL: define void @test_vst2q_u32(i32* %a, [2 x <4 x i32>] %b.coerce) #0 { 11817// CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16 11818// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16 11819// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0 11820// CHECK: store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16 11821// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8* 11822// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8* 11823// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 11824// CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 11825// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0 11826// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0 11827// CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 11828// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> 11829// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0 11830// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i64 0, i64 1 11831// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 11832// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 11833// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> 11834// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 11835// CHECK: call void @llvm.aarch64.neon.st2.v4i32.p0i8(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], i8* [[TMP2]]) 11836// CHECK: ret void 11837void test_vst2q_u32(uint32_t *a, uint32x4x2_t b) { 11838 vst2q_u32(a, b); 11839} 11840 11841// CHECK-LABEL: define void @test_vst2q_u64(i64* %a, [2 x <2 x i64>] %b.coerce) #0 { 11842// CHECK: [[B:%.*]] = alloca %struct.uint64x2x2_t, align 16 11843// CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x2_t, align 16 11844// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[B]], i32 0, i32 0 11845// CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16 11846// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__S1]] to i8* 11847// CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x2_t* [[B]] to i8* 11848// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 11849// CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 11850// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0 11851// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0 11852// CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 11853// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 11854// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0 11855// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1 11856// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 11857// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 11858// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 11859// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 11860// CHECK: call void @llvm.aarch64.neon.st2.v2i64.p0i8(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i8* [[TMP2]]) 11861// CHECK: ret void 11862void test_vst2q_u64(uint64_t *a, uint64x2x2_t b) { 11863 vst2q_u64(a, b); 11864} 11865 11866// CHECK-LABEL: define void @test_vst2q_s8(i8* %a, [2 x <16 x i8>] %b.coerce) #0 { 11867// CHECK: [[B:%.*]] = alloca %struct.int8x16x2_t, align 16 11868// CHECK: [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16 11869// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0 11870// CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 11871// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__S1]] to i8* 11872// CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x2_t* [[B]] to i8* 11873// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 11874// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0 11875// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 11876// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 11877// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0 11878// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1 11879// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 11880// CHECK: call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a) 11881// CHECK: ret void 11882void test_vst2q_s8(int8_t *a, int8x16x2_t b) { 11883 vst2q_s8(a, b); 11884} 11885 11886// CHECK-LABEL: define void @test_vst2q_s16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 { 11887// CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16 11888// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16 11889// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0 11890// CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16 11891// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8* 11892// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8* 11893// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 11894// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 11895// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0 11896// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0 11897// CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 11898// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 11899// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0 11900// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1 11901// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 11902// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 11903// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 11904// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 11905// CHECK: call void @llvm.aarch64.neon.st2.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i8* [[TMP2]]) 11906// CHECK: ret void 11907void test_vst2q_s16(int16_t *a, int16x8x2_t b) { 11908 vst2q_s16(a, b); 11909} 11910 11911// CHECK-LABEL: define void @test_vst2q_s32(i32* %a, [2 x <4 x i32>] %b.coerce) #0 { 11912// CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16 11913// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16 11914// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0 11915// CHECK: store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16 11916// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8* 11917// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8* 11918// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 11919// CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 11920// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0 11921// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0 11922// CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 11923// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> 11924// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0 11925// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i64 0, i64 1 11926// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 11927// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 11928// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> 11929// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 11930// CHECK: call void @llvm.aarch64.neon.st2.v4i32.p0i8(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], i8* [[TMP2]]) 11931// CHECK: ret void 11932void test_vst2q_s32(int32_t *a, int32x4x2_t b) { 11933 vst2q_s32(a, b); 11934} 11935 11936// CHECK-LABEL: define void @test_vst2q_s64(i64* %a, [2 x <2 x i64>] %b.coerce) #0 { 11937// CHECK: [[B:%.*]] = alloca %struct.int64x2x2_t, align 16 11938// CHECK: [[__S1:%.*]] = alloca %struct.int64x2x2_t, align 16 11939// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[B]], i32 0, i32 0 11940// CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16 11941// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__S1]] to i8* 11942// CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x2_t* [[B]] to i8* 11943// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 11944// CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 11945// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0 11946// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0 11947// CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 11948// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 11949// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0 11950// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1 11951// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 11952// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 11953// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 11954// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 11955// CHECK: call void @llvm.aarch64.neon.st2.v2i64.p0i8(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i8* [[TMP2]]) 11956// CHECK: ret void 11957void test_vst2q_s64(int64_t *a, int64x2x2_t b) { 11958 vst2q_s64(a, b); 11959} 11960 11961// CHECK-LABEL: define void @test_vst2q_f16(half* %a, [2 x <8 x half>] %b.coerce) #0 { 11962// CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16 11963// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16 11964// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0 11965// CHECK: store [2 x <8 x half>] [[B]].coerce, [2 x <8 x half>]* [[COERCE_DIVE]], align 16 11966// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8* 11967// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8* 11968// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 11969// CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* 11970// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0 11971// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i64 0, i64 0 11972// CHECK: [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 11973// CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8> 11974// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0 11975// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i64 0, i64 1 11976// CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 11977// CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> 11978// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 11979// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 11980// CHECK: call void @llvm.aarch64.neon.st2.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i8* [[TMP2]]) 11981// CHECK: ret void 11982void test_vst2q_f16(float16_t *a, float16x8x2_t b) { 11983 vst2q_f16(a, b); 11984} 11985 11986// CHECK-LABEL: define void @test_vst2q_f32(float* %a, [2 x <4 x float>] %b.coerce) #0 { 11987// CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16 11988// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16 11989// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0 11990// CHECK: store [2 x <4 x float>] [[B]].coerce, [2 x <4 x float>]* [[COERCE_DIVE]], align 16 11991// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8* 11992// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8* 11993// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 11994// CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* 11995// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0 11996// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i64 0, i64 0 11997// CHECK: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 11998// CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8> 11999// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0 12000// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i64 0, i64 1 12001// CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 12002// CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8> 12003// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float> 12004// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float> 12005// CHECK: call void @llvm.aarch64.neon.st2.v4f32.p0i8(<4 x float> [[TMP7]], <4 x float> [[TMP8]], i8* [[TMP2]]) 12006// CHECK: ret void 12007void test_vst2q_f32(float32_t *a, float32x4x2_t b) { 12008 vst2q_f32(a, b); 12009} 12010 12011// CHECK-LABEL: define void @test_vst2q_f64(double* %a, [2 x <2 x double>] %b.coerce) #0 { 12012// CHECK: [[B:%.*]] = alloca %struct.float64x2x2_t, align 16 12013// CHECK: [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16 12014// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[B]], i32 0, i32 0 12015// CHECK: store [2 x <2 x double>] [[B]].coerce, [2 x <2 x double>]* [[COERCE_DIVE]], align 16 12016// CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__S1]] to i8* 12017// CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x2_t* [[B]] to i8* 12018// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 12019// CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* 12020// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0 12021// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL]], i64 0, i64 0 12022// CHECK: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16 12023// CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8> 12024// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0 12025// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL1]], i64 0, i64 1 12026// CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16 12027// CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8> 12028// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double> 12029// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double> 12030// CHECK: call void @llvm.aarch64.neon.st2.v2f64.p0i8(<2 x double> [[TMP7]], <2 x double> [[TMP8]], i8* [[TMP2]]) 12031// CHECK: ret void 12032void test_vst2q_f64(float64_t *a, float64x2x2_t b) { 12033 vst2q_f64(a, b); 12034} 12035 12036// CHECK-LABEL: define void @test_vst2q_p8(i8* %a, [2 x <16 x i8>] %b.coerce) #0 { 12037// CHECK: [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16 12038// CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16 12039// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0 12040// CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 12041// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__S1]] to i8* 12042// CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x2_t* [[B]] to i8* 12043// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 12044// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0 12045// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 12046// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 12047// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0 12048// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1 12049// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 12050// CHECK: call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a) 12051// CHECK: ret void 12052void test_vst2q_p8(poly8_t *a, poly8x16x2_t b) { 12053 vst2q_p8(a, b); 12054} 12055 12056// CHECK-LABEL: define void @test_vst2q_p16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 { 12057// CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16 12058// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16 12059// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0 12060// CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16 12061// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8* 12062// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8* 12063// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 12064// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 12065// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0 12066// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0 12067// CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 12068// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 12069// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0 12070// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1 12071// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 12072// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 12073// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 12074// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 12075// CHECK: call void @llvm.aarch64.neon.st2.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i8* [[TMP2]]) 12076// CHECK: ret void 12077void test_vst2q_p16(poly16_t *a, poly16x8x2_t b) { 12078 vst2q_p16(a, b); 12079} 12080 12081// CHECK-LABEL: define void @test_vst2_u8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 { 12082// CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8 12083// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8 12084// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0 12085// CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 12086// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8* 12087// CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8* 12088// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 12089// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0 12090// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 12091// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 12092// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0 12093// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1 12094// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 12095// CHECK: call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a) 12096// CHECK: ret void 12097void test_vst2_u8(uint8_t *a, uint8x8x2_t b) { 12098 vst2_u8(a, b); 12099} 12100 12101// CHECK-LABEL: define void @test_vst2_u16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 { 12102// CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8 12103// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8 12104// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0 12105// CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8 12106// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8* 12107// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8* 12108// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 12109// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 12110// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0 12111// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0 12112// CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 12113// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 12114// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0 12115// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1 12116// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 12117// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 12118// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 12119// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 12120// CHECK: call void @llvm.aarch64.neon.st2.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i8* [[TMP2]]) 12121// CHECK: ret void 12122void test_vst2_u16(uint16_t *a, uint16x4x2_t b) { 12123 vst2_u16(a, b); 12124} 12125 12126// CHECK-LABEL: define void @test_vst2_u32(i32* %a, [2 x <2 x i32>] %b.coerce) #0 { 12127// CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8 12128// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8 12129// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0 12130// CHECK: store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8 12131// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8* 12132// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8* 12133// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 12134// CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 12135// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0 12136// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0 12137// CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 12138// CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> 12139// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0 12140// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i64 0, i64 1 12141// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 12142// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 12143// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> 12144// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 12145// CHECK: call void @llvm.aarch64.neon.st2.v2i32.p0i8(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i8* [[TMP2]]) 12146// CHECK: ret void 12147void test_vst2_u32(uint32_t *a, uint32x2x2_t b) { 12148 vst2_u32(a, b); 12149} 12150 12151// CHECK-LABEL: define void @test_vst2_u64(i64* %a, [2 x <1 x i64>] %b.coerce) #0 { 12152// CHECK: [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8 12153// CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8 12154// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[B]], i32 0, i32 0 12155// CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8 12156// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__S1]] to i8* 12157// CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x2_t* [[B]] to i8* 12158// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 12159// CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 12160// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0 12161// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0 12162// CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 12163// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 12164// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0 12165// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1 12166// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 12167// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 12168// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 12169// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 12170// CHECK: call void @llvm.aarch64.neon.st2.v1i64.p0i8(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i8* [[TMP2]]) 12171// CHECK: ret void 12172void test_vst2_u64(uint64_t *a, uint64x1x2_t b) { 12173 vst2_u64(a, b); 12174} 12175 12176// CHECK-LABEL: define void @test_vst2_s8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 { 12177// CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8 12178// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8 12179// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0 12180// CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 12181// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8* 12182// CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8* 12183// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 12184// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0 12185// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 12186// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 12187// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0 12188// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1 12189// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 12190// CHECK: call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a) 12191// CHECK: ret void 12192void test_vst2_s8(int8_t *a, int8x8x2_t b) { 12193 vst2_s8(a, b); 12194} 12195 12196// CHECK-LABEL: define void @test_vst2_s16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 { 12197// CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8 12198// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8 12199// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0 12200// CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8 12201// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8* 12202// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8* 12203// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 12204// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 12205// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0 12206// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0 12207// CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 12208// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 12209// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0 12210// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1 12211// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 12212// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 12213// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 12214// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 12215// CHECK: call void @llvm.aarch64.neon.st2.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i8* [[TMP2]]) 12216// CHECK: ret void 12217void test_vst2_s16(int16_t *a, int16x4x2_t b) { 12218 vst2_s16(a, b); 12219} 12220 12221// CHECK-LABEL: define void @test_vst2_s32(i32* %a, [2 x <2 x i32>] %b.coerce) #0 { 12222// CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8 12223// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8 12224// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0 12225// CHECK: store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8 12226// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8* 12227// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8* 12228// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 12229// CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 12230// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0 12231// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0 12232// CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 12233// CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> 12234// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0 12235// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i64 0, i64 1 12236// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 12237// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 12238// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> 12239// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 12240// CHECK: call void @llvm.aarch64.neon.st2.v2i32.p0i8(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i8* [[TMP2]]) 12241// CHECK: ret void 12242void test_vst2_s32(int32_t *a, int32x2x2_t b) { 12243 vst2_s32(a, b); 12244} 12245 12246// CHECK-LABEL: define void @test_vst2_s64(i64* %a, [2 x <1 x i64>] %b.coerce) #0 { 12247// CHECK: [[B:%.*]] = alloca %struct.int64x1x2_t, align 8 12248// CHECK: [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8 12249// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[B]], i32 0, i32 0 12250// CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8 12251// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__S1]] to i8* 12252// CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x2_t* [[B]] to i8* 12253// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 12254// CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 12255// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0 12256// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0 12257// CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 12258// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 12259// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0 12260// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1 12261// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 12262// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 12263// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 12264// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 12265// CHECK: call void @llvm.aarch64.neon.st2.v1i64.p0i8(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i8* [[TMP2]]) 12266// CHECK: ret void 12267void test_vst2_s64(int64_t *a, int64x1x2_t b) { 12268 vst2_s64(a, b); 12269} 12270 12271// CHECK-LABEL: define void @test_vst2_f16(half* %a, [2 x <4 x half>] %b.coerce) #0 { 12272// CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8 12273// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8 12274// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0 12275// CHECK: store [2 x <4 x half>] [[B]].coerce, [2 x <4 x half>]* [[COERCE_DIVE]], align 8 12276// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8* 12277// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8* 12278// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 12279// CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* 12280// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0 12281// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i64 0, i64 0 12282// CHECK: [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 12283// CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8> 12284// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0 12285// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i64 0, i64 1 12286// CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 12287// CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> 12288// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 12289// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 12290// CHECK: call void @llvm.aarch64.neon.st2.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i8* [[TMP2]]) 12291// CHECK: ret void 12292void test_vst2_f16(float16_t *a, float16x4x2_t b) { 12293 vst2_f16(a, b); 12294} 12295 12296// CHECK-LABEL: define void @test_vst2_f32(float* %a, [2 x <2 x float>] %b.coerce) #0 { 12297// CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8 12298// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8 12299// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0 12300// CHECK: store [2 x <2 x float>] [[B]].coerce, [2 x <2 x float>]* [[COERCE_DIVE]], align 8 12301// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8* 12302// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8* 12303// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 12304// CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* 12305// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0 12306// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i64 0, i64 0 12307// CHECK: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 12308// CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8> 12309// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0 12310// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL1]], i64 0, i64 1 12311// CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 12312// CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8> 12313// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float> 12314// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float> 12315// CHECK: call void @llvm.aarch64.neon.st2.v2f32.p0i8(<2 x float> [[TMP7]], <2 x float> [[TMP8]], i8* [[TMP2]]) 12316// CHECK: ret void 12317void test_vst2_f32(float32_t *a, float32x2x2_t b) { 12318 vst2_f32(a, b); 12319} 12320 12321// CHECK-LABEL: define void @test_vst2_f64(double* %a, [2 x <1 x double>] %b.coerce) #0 { 12322// CHECK: [[B:%.*]] = alloca %struct.float64x1x2_t, align 8 12323// CHECK: [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8 12324// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[B]], i32 0, i32 0 12325// CHECK: store [2 x <1 x double>] [[B]].coerce, [2 x <1 x double>]* [[COERCE_DIVE]], align 8 12326// CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__S1]] to i8* 12327// CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x2_t* [[B]] to i8* 12328// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 12329// CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* 12330// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0 12331// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL]], i64 0, i64 0 12332// CHECK: [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8 12333// CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8> 12334// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0 12335// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL1]], i64 0, i64 1 12336// CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8 12337// CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8> 12338// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> 12339// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double> 12340// CHECK: call void @llvm.aarch64.neon.st2.v1f64.p0i8(<1 x double> [[TMP7]], <1 x double> [[TMP8]], i8* [[TMP2]]) 12341// CHECK: ret void 12342void test_vst2_f64(float64_t *a, float64x1x2_t b) { 12343 vst2_f64(a, b); 12344} 12345 12346// CHECK-LABEL: define void @test_vst2_p8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 { 12347// CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8 12348// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8 12349// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0 12350// CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 12351// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8* 12352// CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8* 12353// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 12354// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0 12355// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 12356// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 12357// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0 12358// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1 12359// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 12360// CHECK: call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a) 12361// CHECK: ret void 12362void test_vst2_p8(poly8_t *a, poly8x8x2_t b) { 12363 vst2_p8(a, b); 12364} 12365 12366// CHECK-LABEL: define void @test_vst2_p16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 { 12367// CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8 12368// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8 12369// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0 12370// CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8 12371// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8* 12372// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8* 12373// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 12374// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 12375// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0 12376// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0 12377// CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 12378// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 12379// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0 12380// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1 12381// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 12382// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 12383// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 12384// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 12385// CHECK: call void @llvm.aarch64.neon.st2.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i8* [[TMP2]]) 12386// CHECK: ret void 12387void test_vst2_p16(poly16_t *a, poly16x4x2_t b) { 12388 vst2_p16(a, b); 12389} 12390 12391// CHECK-LABEL: define void @test_vst3q_u8(i8* %a, [3 x <16 x i8>] %b.coerce) #0 { 12392// CHECK: [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16 12393// CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16 12394// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0 12395// CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 12396// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__S1]] to i8* 12397// CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x3_t* [[B]] to i8* 12398// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 12399// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0 12400// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 12401// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 12402// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0 12403// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1 12404// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 12405// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0 12406// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2 12407// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 12408// CHECK: call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a) 12409// CHECK: ret void 12410void test_vst3q_u8(uint8_t *a, uint8x16x3_t b) { 12411 vst3q_u8(a, b); 12412} 12413 12414// CHECK-LABEL: define void @test_vst3q_u16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 { 12415// CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16 12416// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16 12417// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0 12418// CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16 12419// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8* 12420// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8* 12421// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 12422// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 12423// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 12424// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0 12425// CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 12426// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 12427// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 12428// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1 12429// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 12430// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 12431// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 12432// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2 12433// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 12434// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 12435// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 12436// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 12437// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 12438// CHECK: call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i8* [[TMP2]]) 12439// CHECK: ret void 12440void test_vst3q_u16(uint16_t *a, uint16x8x3_t b) { 12441 vst3q_u16(a, b); 12442} 12443 12444// CHECK-LABEL: define void @test_vst3q_u32(i32* %a, [3 x <4 x i32>] %b.coerce) #0 { 12445// CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16 12446// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16 12447// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0 12448// CHECK: store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16 12449// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8* 12450// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8* 12451// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 12452// CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 12453// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 12454// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0 12455// CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 12456// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> 12457// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 12458// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i64 0, i64 1 12459// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 12460// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 12461// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 12462// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i64 0, i64 2 12463// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 12464// CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> 12465// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> 12466// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 12467// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> 12468// CHECK: call void @llvm.aarch64.neon.st3.v4i32.p0i8(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i8* [[TMP2]]) 12469// CHECK: ret void 12470void test_vst3q_u32(uint32_t *a, uint32x4x3_t b) { 12471 vst3q_u32(a, b); 12472} 12473 12474// CHECK-LABEL: define void @test_vst3q_u64(i64* %a, [3 x <2 x i64>] %b.coerce) #0 { 12475// CHECK: [[B:%.*]] = alloca %struct.uint64x2x3_t, align 16 12476// CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x3_t, align 16 12477// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[B]], i32 0, i32 0 12478// CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16 12479// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__S1]] to i8* 12480// CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x3_t* [[B]] to i8* 12481// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 12482// CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 12483// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0 12484// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0 12485// CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 12486// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 12487// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0 12488// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1 12489// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 12490// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 12491// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0 12492// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2 12493// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 12494// CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> 12495// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 12496// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 12497// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> 12498// CHECK: call void @llvm.aarch64.neon.st3.v2i64.p0i8(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i8* [[TMP2]]) 12499// CHECK: ret void 12500void test_vst3q_u64(uint64_t *a, uint64x2x3_t b) { 12501 vst3q_u64(a, b); 12502} 12503 12504// CHECK-LABEL: define void @test_vst3q_s8(i8* %a, [3 x <16 x i8>] %b.coerce) #0 { 12505// CHECK: [[B:%.*]] = alloca %struct.int8x16x3_t, align 16 12506// CHECK: [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16 12507// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0 12508// CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 12509// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__S1]] to i8* 12510// CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x3_t* [[B]] to i8* 12511// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 12512// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0 12513// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 12514// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 12515// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0 12516// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1 12517// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 12518// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0 12519// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2 12520// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 12521// CHECK: call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a) 12522// CHECK: ret void 12523void test_vst3q_s8(int8_t *a, int8x16x3_t b) { 12524 vst3q_s8(a, b); 12525} 12526 12527// CHECK-LABEL: define void @test_vst3q_s16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 { 12528// CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16 12529// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16 12530// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0 12531// CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16 12532// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8* 12533// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8* 12534// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 12535// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 12536// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 12537// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0 12538// CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 12539// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 12540// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 12541// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1 12542// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 12543// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 12544// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 12545// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2 12546// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 12547// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 12548// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 12549// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 12550// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 12551// CHECK: call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i8* [[TMP2]]) 12552// CHECK: ret void 12553void test_vst3q_s16(int16_t *a, int16x8x3_t b) { 12554 vst3q_s16(a, b); 12555} 12556 12557// CHECK-LABEL: define void @test_vst3q_s32(i32* %a, [3 x <4 x i32>] %b.coerce) #0 { 12558// CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16 12559// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16 12560// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0 12561// CHECK: store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16 12562// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8* 12563// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8* 12564// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 12565// CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 12566// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 12567// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0 12568// CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 12569// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> 12570// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 12571// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i64 0, i64 1 12572// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 12573// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 12574// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 12575// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i64 0, i64 2 12576// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 12577// CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> 12578// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> 12579// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 12580// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> 12581// CHECK: call void @llvm.aarch64.neon.st3.v4i32.p0i8(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i8* [[TMP2]]) 12582// CHECK: ret void 12583void test_vst3q_s32(int32_t *a, int32x4x3_t b) { 12584 vst3q_s32(a, b); 12585} 12586 12587// CHECK-LABEL: define void @test_vst3q_s64(i64* %a, [3 x <2 x i64>] %b.coerce) #0 { 12588// CHECK: [[B:%.*]] = alloca %struct.int64x2x3_t, align 16 12589// CHECK: [[__S1:%.*]] = alloca %struct.int64x2x3_t, align 16 12590// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[B]], i32 0, i32 0 12591// CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16 12592// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__S1]] to i8* 12593// CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x3_t* [[B]] to i8* 12594// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 12595// CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 12596// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0 12597// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0 12598// CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 12599// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 12600// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0 12601// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1 12602// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 12603// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 12604// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0 12605// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2 12606// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 12607// CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> 12608// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 12609// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 12610// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> 12611// CHECK: call void @llvm.aarch64.neon.st3.v2i64.p0i8(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i8* [[TMP2]]) 12612// CHECK: ret void 12613void test_vst3q_s64(int64_t *a, int64x2x3_t b) { 12614 vst3q_s64(a, b); 12615} 12616 12617// CHECK-LABEL: define void @test_vst3q_f16(half* %a, [3 x <8 x half>] %b.coerce) #0 { 12618// CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16 12619// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16 12620// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0 12621// CHECK: store [3 x <8 x half>] [[B]].coerce, [3 x <8 x half>]* [[COERCE_DIVE]], align 16 12622// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8* 12623// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8* 12624// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 12625// CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* 12626// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 12627// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i64 0, i64 0 12628// CHECK: [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 12629// CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8> 12630// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 12631// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i64 0, i64 1 12632// CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 12633// CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> 12634// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 12635// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i64 0, i64 2 12636// CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16 12637// CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8> 12638// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 12639// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 12640// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 12641// CHECK: call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i8* [[TMP2]]) 12642// CHECK: ret void 12643void test_vst3q_f16(float16_t *a, float16x8x3_t b) { 12644 vst3q_f16(a, b); 12645} 12646 12647// CHECK-LABEL: define void @test_vst3q_f32(float* %a, [3 x <4 x float>] %b.coerce) #0 { 12648// CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16 12649// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16 12650// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0 12651// CHECK: store [3 x <4 x float>] [[B]].coerce, [3 x <4 x float>]* [[COERCE_DIVE]], align 16 12652// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8* 12653// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8* 12654// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 12655// CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* 12656// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 12657// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i64 0, i64 0 12658// CHECK: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 12659// CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8> 12660// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 12661// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i64 0, i64 1 12662// CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 12663// CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8> 12664// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 12665// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i64 0, i64 2 12666// CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16 12667// CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8> 12668// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float> 12669// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float> 12670// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float> 12671// CHECK: call void @llvm.aarch64.neon.st3.v4f32.p0i8(<4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], i8* [[TMP2]]) 12672// CHECK: ret void 12673void test_vst3q_f32(float32_t *a, float32x4x3_t b) { 12674 vst3q_f32(a, b); 12675} 12676 12677// CHECK-LABEL: define void @test_vst3q_f64(double* %a, [3 x <2 x double>] %b.coerce) #0 { 12678// CHECK: [[B:%.*]] = alloca %struct.float64x2x3_t, align 16 12679// CHECK: [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16 12680// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[B]], i32 0, i32 0 12681// CHECK: store [3 x <2 x double>] [[B]].coerce, [3 x <2 x double>]* [[COERCE_DIVE]], align 16 12682// CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__S1]] to i8* 12683// CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x3_t* [[B]] to i8* 12684// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 12685// CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* 12686// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0 12687// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL]], i64 0, i64 0 12688// CHECK: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16 12689// CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8> 12690// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0 12691// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL1]], i64 0, i64 1 12692// CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16 12693// CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8> 12694// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0 12695// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL3]], i64 0, i64 2 12696// CHECK: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16 12697// CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8> 12698// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double> 12699// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double> 12700// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double> 12701// CHECK: call void @llvm.aarch64.neon.st3.v2f64.p0i8(<2 x double> [[TMP9]], <2 x double> [[TMP10]], <2 x double> [[TMP11]], i8* [[TMP2]]) 12702// CHECK: ret void 12703void test_vst3q_f64(float64_t *a, float64x2x3_t b) { 12704 vst3q_f64(a, b); 12705} 12706 12707// CHECK-LABEL: define void @test_vst3q_p8(i8* %a, [3 x <16 x i8>] %b.coerce) #0 { 12708// CHECK: [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16 12709// CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16 12710// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0 12711// CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 12712// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__S1]] to i8* 12713// CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x3_t* [[B]] to i8* 12714// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 12715// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0 12716// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 12717// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 12718// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0 12719// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1 12720// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 12721// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0 12722// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2 12723// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 12724// CHECK: call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a) 12725// CHECK: ret void 12726void test_vst3q_p8(poly8_t *a, poly8x16x3_t b) { 12727 vst3q_p8(a, b); 12728} 12729 12730// CHECK-LABEL: define void @test_vst3q_p16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 { 12731// CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16 12732// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16 12733// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0 12734// CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16 12735// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8* 12736// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8* 12737// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 12738// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 12739// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 12740// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0 12741// CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 12742// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 12743// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 12744// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1 12745// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 12746// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 12747// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 12748// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2 12749// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 12750// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 12751// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 12752// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 12753// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 12754// CHECK: call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i8* [[TMP2]]) 12755// CHECK: ret void 12756void test_vst3q_p16(poly16_t *a, poly16x8x3_t b) { 12757 vst3q_p16(a, b); 12758} 12759 12760// CHECK-LABEL: define void @test_vst3_u8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 { 12761// CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8 12762// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8 12763// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0 12764// CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 12765// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8* 12766// CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8* 12767// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 12768// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 12769// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 12770// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 12771// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 12772// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1 12773// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 12774// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 12775// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2 12776// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 12777// CHECK: call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a) 12778// CHECK: ret void 12779void test_vst3_u8(uint8_t *a, uint8x8x3_t b) { 12780 vst3_u8(a, b); 12781} 12782 12783// CHECK-LABEL: define void @test_vst3_u16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 { 12784// CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8 12785// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8 12786// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0 12787// CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8 12788// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8* 12789// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8* 12790// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 12791// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 12792// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 12793// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0 12794// CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 12795// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 12796// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 12797// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1 12798// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 12799// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 12800// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 12801// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2 12802// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 12803// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 12804// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 12805// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 12806// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 12807// CHECK: call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i8* [[TMP2]]) 12808// CHECK: ret void 12809void test_vst3_u16(uint16_t *a, uint16x4x3_t b) { 12810 vst3_u16(a, b); 12811} 12812 12813// CHECK-LABEL: define void @test_vst3_u32(i32* %a, [3 x <2 x i32>] %b.coerce) #0 { 12814// CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8 12815// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8 12816// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0 12817// CHECK: store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8 12818// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8* 12819// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8* 12820// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 12821// CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 12822// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 12823// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0 12824// CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 12825// CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> 12826// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 12827// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i64 0, i64 1 12828// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 12829// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 12830// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 12831// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i64 0, i64 2 12832// CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 12833// CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> 12834// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> 12835// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 12836// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> 12837// CHECK: call void @llvm.aarch64.neon.st3.v2i32.p0i8(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i8* [[TMP2]]) 12838// CHECK: ret void 12839void test_vst3_u32(uint32_t *a, uint32x2x3_t b) { 12840 vst3_u32(a, b); 12841} 12842 12843// CHECK-LABEL: define void @test_vst3_u64(i64* %a, [3 x <1 x i64>] %b.coerce) #0 { 12844// CHECK: [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8 12845// CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8 12846// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[B]], i32 0, i32 0 12847// CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8 12848// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__S1]] to i8* 12849// CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x3_t* [[B]] to i8* 12850// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 12851// CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 12852// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0 12853// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0 12854// CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 12855// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 12856// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0 12857// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1 12858// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 12859// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 12860// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0 12861// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2 12862// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 12863// CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> 12864// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 12865// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 12866// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> 12867// CHECK: call void @llvm.aarch64.neon.st3.v1i64.p0i8(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i8* [[TMP2]]) 12868// CHECK: ret void 12869void test_vst3_u64(uint64_t *a, uint64x1x3_t b) { 12870 vst3_u64(a, b); 12871} 12872 12873// CHECK-LABEL: define void @test_vst3_s8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 { 12874// CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8 12875// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8 12876// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0 12877// CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 12878// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8* 12879// CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8* 12880// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 12881// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 12882// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 12883// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 12884// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 12885// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1 12886// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 12887// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 12888// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2 12889// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 12890// CHECK: call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a) 12891// CHECK: ret void 12892void test_vst3_s8(int8_t *a, int8x8x3_t b) { 12893 vst3_s8(a, b); 12894} 12895 12896// CHECK-LABEL: define void @test_vst3_s16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 { 12897// CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8 12898// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8 12899// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0 12900// CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8 12901// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8* 12902// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8* 12903// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 12904// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 12905// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 12906// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0 12907// CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 12908// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 12909// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 12910// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1 12911// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 12912// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 12913// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 12914// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2 12915// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 12916// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 12917// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 12918// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 12919// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 12920// CHECK: call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i8* [[TMP2]]) 12921// CHECK: ret void 12922void test_vst3_s16(int16_t *a, int16x4x3_t b) { 12923 vst3_s16(a, b); 12924} 12925 12926// CHECK-LABEL: define void @test_vst3_s32(i32* %a, [3 x <2 x i32>] %b.coerce) #0 { 12927// CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8 12928// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8 12929// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0 12930// CHECK: store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8 12931// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8* 12932// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8* 12933// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 12934// CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 12935// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 12936// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0 12937// CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 12938// CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> 12939// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 12940// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i64 0, i64 1 12941// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 12942// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 12943// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 12944// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i64 0, i64 2 12945// CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 12946// CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> 12947// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> 12948// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 12949// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> 12950// CHECK: call void @llvm.aarch64.neon.st3.v2i32.p0i8(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i8* [[TMP2]]) 12951// CHECK: ret void 12952void test_vst3_s32(int32_t *a, int32x2x3_t b) { 12953 vst3_s32(a, b); 12954} 12955 12956// CHECK-LABEL: define void @test_vst3_s64(i64* %a, [3 x <1 x i64>] %b.coerce) #0 { 12957// CHECK: [[B:%.*]] = alloca %struct.int64x1x3_t, align 8 12958// CHECK: [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8 12959// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[B]], i32 0, i32 0 12960// CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8 12961// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__S1]] to i8* 12962// CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x3_t* [[B]] to i8* 12963// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 12964// CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 12965// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0 12966// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0 12967// CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 12968// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 12969// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0 12970// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1 12971// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 12972// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 12973// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0 12974// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2 12975// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 12976// CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> 12977// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 12978// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 12979// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> 12980// CHECK: call void @llvm.aarch64.neon.st3.v1i64.p0i8(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i8* [[TMP2]]) 12981// CHECK: ret void 12982void test_vst3_s64(int64_t *a, int64x1x3_t b) { 12983 vst3_s64(a, b); 12984} 12985 12986// CHECK-LABEL: define void @test_vst3_f16(half* %a, [3 x <4 x half>] %b.coerce) #0 { 12987// CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8 12988// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8 12989// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0 12990// CHECK: store [3 x <4 x half>] [[B]].coerce, [3 x <4 x half>]* [[COERCE_DIVE]], align 8 12991// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8* 12992// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8* 12993// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 12994// CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* 12995// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 12996// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i64 0, i64 0 12997// CHECK: [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 12998// CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8> 12999// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 13000// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL1]], i64 0, i64 1 13001// CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 13002// CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> 13003// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 13004// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i64 0, i64 2 13005// CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8 13006// CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8> 13007// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 13008// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 13009// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 13010// CHECK: call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i8* [[TMP2]]) 13011// CHECK: ret void 13012void test_vst3_f16(float16_t *a, float16x4x3_t b) { 13013 vst3_f16(a, b); 13014} 13015 13016// CHECK-LABEL: define void @test_vst3_f32(float* %a, [3 x <2 x float>] %b.coerce) #0 { 13017// CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8 13018// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8 13019// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0 13020// CHECK: store [3 x <2 x float>] [[B]].coerce, [3 x <2 x float>]* [[COERCE_DIVE]], align 8 13021// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8* 13022// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8* 13023// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 13024// CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* 13025// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 13026// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i64 0, i64 0 13027// CHECK: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 13028// CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8> 13029// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 13030// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL1]], i64 0, i64 1 13031// CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 13032// CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8> 13033// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 13034// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL3]], i64 0, i64 2 13035// CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8 13036// CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8> 13037// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float> 13038// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float> 13039// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float> 13040// CHECK: call void @llvm.aarch64.neon.st3.v2f32.p0i8(<2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], i8* [[TMP2]]) 13041// CHECK: ret void 13042void test_vst3_f32(float32_t *a, float32x2x3_t b) { 13043 vst3_f32(a, b); 13044} 13045 13046// CHECK-LABEL: define void @test_vst3_f64(double* %a, [3 x <1 x double>] %b.coerce) #0 { 13047// CHECK: [[B:%.*]] = alloca %struct.float64x1x3_t, align 8 13048// CHECK: [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8 13049// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[B]], i32 0, i32 0 13050// CHECK: store [3 x <1 x double>] [[B]].coerce, [3 x <1 x double>]* [[COERCE_DIVE]], align 8 13051// CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__S1]] to i8* 13052// CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x3_t* [[B]] to i8* 13053// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 13054// CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* 13055// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0 13056// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL]], i64 0, i64 0 13057// CHECK: [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8 13058// CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8> 13059// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0 13060// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL1]], i64 0, i64 1 13061// CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8 13062// CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8> 13063// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0 13064// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL3]], i64 0, i64 2 13065// CHECK: [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8 13066// CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8> 13067// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> 13068// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double> 13069// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double> 13070// CHECK: call void @llvm.aarch64.neon.st3.v1f64.p0i8(<1 x double> [[TMP9]], <1 x double> [[TMP10]], <1 x double> [[TMP11]], i8* [[TMP2]]) 13071// CHECK: ret void 13072void test_vst3_f64(float64_t *a, float64x1x3_t b) { 13073 vst3_f64(a, b); 13074} 13075 13076// CHECK-LABEL: define void @test_vst3_p8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 { 13077// CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8 13078// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8 13079// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0 13080// CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 13081// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8* 13082// CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8* 13083// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 13084// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 13085// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 13086// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 13087// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 13088// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1 13089// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 13090// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 13091// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2 13092// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 13093// CHECK: call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a) 13094// CHECK: ret void 13095void test_vst3_p8(poly8_t *a, poly8x8x3_t b) { 13096 vst3_p8(a, b); 13097} 13098 13099// CHECK-LABEL: define void @test_vst3_p16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 { 13100// CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8 13101// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8 13102// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0 13103// CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8 13104// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8* 13105// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8* 13106// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 13107// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 13108// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 13109// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0 13110// CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 13111// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 13112// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 13113// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1 13114// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 13115// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 13116// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 13117// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2 13118// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 13119// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 13120// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 13121// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 13122// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 13123// CHECK: call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i8* [[TMP2]]) 13124// CHECK: ret void 13125void test_vst3_p16(poly16_t *a, poly16x4x3_t b) { 13126 vst3_p16(a, b); 13127} 13128 13129// CHECK-LABEL: define void @test_vst4q_u8(i8* %a, [4 x <16 x i8>] %b.coerce) #0 { 13130// CHECK: [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16 13131// CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16 13132// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0 13133// CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 13134// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__S1]] to i8* 13135// CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x4_t* [[B]] to i8* 13136// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 13137// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 13138// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 13139// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 13140// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 13141// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1 13142// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 13143// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 13144// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2 13145// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 13146// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 13147// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3 13148// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16 13149// CHECK: call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a) 13150// CHECK: ret void 13151void test_vst4q_u8(uint8_t *a, uint8x16x4_t b) { 13152 vst4q_u8(a, b); 13153} 13154 13155// CHECK-LABEL: define void @test_vst4q_u16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 { 13156// CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16 13157// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16 13158// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0 13159// CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16 13160// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8* 13161// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8* 13162// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 13163// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 13164// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 13165// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0 13166// CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 13167// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 13168// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 13169// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1 13170// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 13171// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 13172// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 13173// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2 13174// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 13175// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 13176// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 13177// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3 13178// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 13179// CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> 13180// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 13181// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 13182// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 13183// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> 13184// CHECK: call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i8* [[TMP2]]) 13185// CHECK: ret void 13186void test_vst4q_u16(uint16_t *a, uint16x8x4_t b) { 13187 vst4q_u16(a, b); 13188} 13189 13190// CHECK-LABEL: define void @test_vst4q_u32(i32* %a, [4 x <4 x i32>] %b.coerce) #0 { 13191// CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16 13192// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16 13193// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0 13194// CHECK: store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16 13195// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8* 13196// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8* 13197// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 13198// CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 13199// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 13200// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0 13201// CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 13202// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> 13203// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 13204// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i64 0, i64 1 13205// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 13206// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 13207// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 13208// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i64 0, i64 2 13209// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 13210// CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> 13211// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 13212// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i64 0, i64 3 13213// CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16 13214// CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8> 13215// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> 13216// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 13217// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> 13218// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32> 13219// CHECK: call void @llvm.aarch64.neon.st4.v4i32.p0i8(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], i8* [[TMP2]]) 13220// CHECK: ret void 13221void test_vst4q_u32(uint32_t *a, uint32x4x4_t b) { 13222 vst4q_u32(a, b); 13223} 13224 13225// CHECK-LABEL: define void @test_vst4q_u64(i64* %a, [4 x <2 x i64>] %b.coerce) #0 { 13226// CHECK: [[B:%.*]] = alloca %struct.uint64x2x4_t, align 16 13227// CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x4_t, align 16 13228// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[B]], i32 0, i32 0 13229// CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16 13230// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__S1]] to i8* 13231// CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x4_t* [[B]] to i8* 13232// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 13233// CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 13234// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 13235// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0 13236// CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 13237// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 13238// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 13239// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1 13240// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 13241// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 13242// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 13243// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2 13244// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 13245// CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> 13246// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 13247// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3 13248// CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16 13249// CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8> 13250// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 13251// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 13252// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> 13253// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64> 13254// CHECK: call void @llvm.aarch64.neon.st4.v2i64.p0i8(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i8* [[TMP2]]) 13255// CHECK: ret void 13256void test_vst4q_u64(uint64_t *a, uint64x2x4_t b) { 13257 vst4q_u64(a, b); 13258} 13259 13260// CHECK-LABEL: define void @test_vst4q_s8(i8* %a, [4 x <16 x i8>] %b.coerce) #0 { 13261// CHECK: [[B:%.*]] = alloca %struct.int8x16x4_t, align 16 13262// CHECK: [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16 13263// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0 13264// CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 13265// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__S1]] to i8* 13266// CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x4_t* [[B]] to i8* 13267// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 13268// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 13269// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 13270// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 13271// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 13272// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1 13273// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 13274// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 13275// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2 13276// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 13277// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 13278// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3 13279// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16 13280// CHECK: call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a) 13281// CHECK: ret void 13282void test_vst4q_s8(int8_t *a, int8x16x4_t b) { 13283 vst4q_s8(a, b); 13284} 13285 13286// CHECK-LABEL: define void @test_vst4q_s16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 { 13287// CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16 13288// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16 13289// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0 13290// CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16 13291// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8* 13292// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8* 13293// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 13294// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 13295// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 13296// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0 13297// CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 13298// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 13299// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 13300// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1 13301// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 13302// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 13303// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 13304// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2 13305// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 13306// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 13307// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 13308// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3 13309// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 13310// CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> 13311// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 13312// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 13313// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 13314// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> 13315// CHECK: call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i8* [[TMP2]]) 13316// CHECK: ret void 13317void test_vst4q_s16(int16_t *a, int16x8x4_t b) { 13318 vst4q_s16(a, b); 13319} 13320 13321// CHECK-LABEL: define void @test_vst4q_s32(i32* %a, [4 x <4 x i32>] %b.coerce) #0 { 13322// CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16 13323// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16 13324// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0 13325// CHECK: store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16 13326// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8* 13327// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8* 13328// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 13329// CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 13330// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 13331// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0 13332// CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 13333// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> 13334// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 13335// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i64 0, i64 1 13336// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 13337// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 13338// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 13339// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i64 0, i64 2 13340// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 13341// CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> 13342// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 13343// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i64 0, i64 3 13344// CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16 13345// CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8> 13346// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> 13347// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 13348// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> 13349// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32> 13350// CHECK: call void @llvm.aarch64.neon.st4.v4i32.p0i8(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], i8* [[TMP2]]) 13351// CHECK: ret void 13352void test_vst4q_s32(int32_t *a, int32x4x4_t b) { 13353 vst4q_s32(a, b); 13354} 13355 13356// CHECK-LABEL: define void @test_vst4q_s64(i64* %a, [4 x <2 x i64>] %b.coerce) #0 { 13357// CHECK: [[B:%.*]] = alloca %struct.int64x2x4_t, align 16 13358// CHECK: [[__S1:%.*]] = alloca %struct.int64x2x4_t, align 16 13359// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[B]], i32 0, i32 0 13360// CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16 13361// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__S1]] to i8* 13362// CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x4_t* [[B]] to i8* 13363// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 13364// CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 13365// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 13366// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0 13367// CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 13368// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 13369// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 13370// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1 13371// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 13372// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 13373// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 13374// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2 13375// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 13376// CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> 13377// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 13378// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3 13379// CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16 13380// CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8> 13381// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 13382// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 13383// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> 13384// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64> 13385// CHECK: call void @llvm.aarch64.neon.st4.v2i64.p0i8(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i8* [[TMP2]]) 13386// CHECK: ret void 13387void test_vst4q_s64(int64_t *a, int64x2x4_t b) { 13388 vst4q_s64(a, b); 13389} 13390 13391// CHECK-LABEL: define void @test_vst4q_f16(half* %a, [4 x <8 x half>] %b.coerce) #0 { 13392// CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16 13393// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16 13394// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0 13395// CHECK: store [4 x <8 x half>] [[B]].coerce, [4 x <8 x half>]* [[COERCE_DIVE]], align 16 13396// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8* 13397// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8* 13398// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 13399// CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* 13400// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 13401// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i64 0, i64 0 13402// CHECK: [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 13403// CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8> 13404// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 13405// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i64 0, i64 1 13406// CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 13407// CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> 13408// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 13409// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i64 0, i64 2 13410// CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16 13411// CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8> 13412// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 13413// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i64 0, i64 3 13414// CHECK: [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16 13415// CHECK: [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8> 13416// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 13417// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 13418// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 13419// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> 13420// CHECK: call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i8* [[TMP2]]) 13421// CHECK: ret void 13422void test_vst4q_f16(float16_t *a, float16x8x4_t b) { 13423 vst4q_f16(a, b); 13424} 13425 13426// CHECK-LABEL: define void @test_vst4q_f32(float* %a, [4 x <4 x float>] %b.coerce) #0 { 13427// CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16 13428// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16 13429// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0 13430// CHECK: store [4 x <4 x float>] [[B]].coerce, [4 x <4 x float>]* [[COERCE_DIVE]], align 16 13431// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8* 13432// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8* 13433// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 13434// CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* 13435// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 13436// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i64 0, i64 0 13437// CHECK: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 13438// CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8> 13439// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 13440// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i64 0, i64 1 13441// CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 13442// CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8> 13443// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 13444// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i64 0, i64 2 13445// CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16 13446// CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8> 13447// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 13448// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i64 0, i64 3 13449// CHECK: [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16 13450// CHECK: [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8> 13451// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float> 13452// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float> 13453// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float> 13454// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float> 13455// CHECK: call void @llvm.aarch64.neon.st4.v4f32.p0i8(<4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], i8* [[TMP2]]) 13456// CHECK: ret void 13457void test_vst4q_f32(float32_t *a, float32x4x4_t b) { 13458 vst4q_f32(a, b); 13459} 13460 13461// CHECK-LABEL: define void @test_vst4q_f64(double* %a, [4 x <2 x double>] %b.coerce) #0 { 13462// CHECK: [[B:%.*]] = alloca %struct.float64x2x4_t, align 16 13463// CHECK: [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16 13464// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[B]], i32 0, i32 0 13465// CHECK: store [4 x <2 x double>] [[B]].coerce, [4 x <2 x double>]* [[COERCE_DIVE]], align 16 13466// CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__S1]] to i8* 13467// CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x4_t* [[B]] to i8* 13468// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 13469// CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* 13470// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0 13471// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL]], i64 0, i64 0 13472// CHECK: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16 13473// CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8> 13474// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0 13475// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL1]], i64 0, i64 1 13476// CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16 13477// CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8> 13478// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0 13479// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL3]], i64 0, i64 2 13480// CHECK: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16 13481// CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8> 13482// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0 13483// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL5]], i64 0, i64 3 13484// CHECK: [[TMP9:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX6]], align 16 13485// CHECK: [[TMP10:%.*]] = bitcast <2 x double> [[TMP9]] to <16 x i8> 13486// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double> 13487// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double> 13488// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double> 13489// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x double> 13490// CHECK: call void @llvm.aarch64.neon.st4.v2f64.p0i8(<2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x double> [[TMP13]], <2 x double> [[TMP14]], i8* [[TMP2]]) 13491// CHECK: ret void 13492void test_vst4q_f64(float64_t *a, float64x2x4_t b) { 13493 vst4q_f64(a, b); 13494} 13495 13496// CHECK-LABEL: define void @test_vst4q_p8(i8* %a, [4 x <16 x i8>] %b.coerce) #0 { 13497// CHECK: [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16 13498// CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16 13499// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0 13500// CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 13501// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__S1]] to i8* 13502// CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x4_t* [[B]] to i8* 13503// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 13504// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 13505// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 13506// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 13507// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 13508// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1 13509// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 13510// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 13511// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2 13512// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 13513// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 13514// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3 13515// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16 13516// CHECK: call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a) 13517// CHECK: ret void 13518void test_vst4q_p8(poly8_t *a, poly8x16x4_t b) { 13519 vst4q_p8(a, b); 13520} 13521 13522// CHECK-LABEL: define void @test_vst4q_p16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 { 13523// CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16 13524// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16 13525// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0 13526// CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16 13527// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8* 13528// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8* 13529// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 13530// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 13531// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 13532// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0 13533// CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 13534// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 13535// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 13536// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1 13537// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 13538// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 13539// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 13540// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2 13541// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 13542// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 13543// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 13544// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3 13545// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 13546// CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> 13547// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 13548// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 13549// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 13550// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> 13551// CHECK: call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i8* [[TMP2]]) 13552// CHECK: ret void 13553void test_vst4q_p16(poly16_t *a, poly16x8x4_t b) { 13554 vst4q_p16(a, b); 13555} 13556 13557// CHECK-LABEL: define void @test_vst4_u8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 { 13558// CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8 13559// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8 13560// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0 13561// CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 13562// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8* 13563// CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8* 13564// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 13565// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 13566// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 13567// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 13568// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 13569// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1 13570// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 13571// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 13572// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2 13573// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 13574// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 13575// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3 13576// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 13577// CHECK: call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a) 13578// CHECK: ret void 13579void test_vst4_u8(uint8_t *a, uint8x8x4_t b) { 13580 vst4_u8(a, b); 13581} 13582 13583// CHECK-LABEL: define void @test_vst4_u16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 { 13584// CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8 13585// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8 13586// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0 13587// CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8 13588// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8* 13589// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8* 13590// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 13591// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 13592// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 13593// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0 13594// CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 13595// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 13596// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 13597// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1 13598// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 13599// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 13600// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 13601// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2 13602// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 13603// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 13604// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 13605// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3 13606// CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 13607// CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8> 13608// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 13609// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 13610// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 13611// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> 13612// CHECK: call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i8* [[TMP2]]) 13613// CHECK: ret void 13614void test_vst4_u16(uint16_t *a, uint16x4x4_t b) { 13615 vst4_u16(a, b); 13616} 13617 13618// CHECK-LABEL: define void @test_vst4_u32(i32* %a, [4 x <2 x i32>] %b.coerce) #0 { 13619// CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8 13620// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8 13621// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0 13622// CHECK: store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8 13623// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8* 13624// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8* 13625// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 13626// CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 13627// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 13628// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0 13629// CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 13630// CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> 13631// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 13632// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i64 0, i64 1 13633// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 13634// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 13635// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 13636// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i64 0, i64 2 13637// CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 13638// CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> 13639// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 13640// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i64 0, i64 3 13641// CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8 13642// CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8> 13643// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> 13644// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 13645// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> 13646// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32> 13647// CHECK: call void @llvm.aarch64.neon.st4.v2i32.p0i8(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], i8* [[TMP2]]) 13648// CHECK: ret void 13649void test_vst4_u32(uint32_t *a, uint32x2x4_t b) { 13650 vst4_u32(a, b); 13651} 13652 13653// CHECK-LABEL: define void @test_vst4_u64(i64* %a, [4 x <1 x i64>] %b.coerce) #0 { 13654// CHECK: [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8 13655// CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8 13656// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[B]], i32 0, i32 0 13657// CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8 13658// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__S1]] to i8* 13659// CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x4_t* [[B]] to i8* 13660// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 13661// CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 13662// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 13663// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0 13664// CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 13665// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 13666// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 13667// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1 13668// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 13669// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 13670// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 13671// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2 13672// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 13673// CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> 13674// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 13675// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3 13676// CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8 13677// CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8> 13678// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 13679// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 13680// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> 13681// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64> 13682// CHECK: call void @llvm.aarch64.neon.st4.v1i64.p0i8(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i8* [[TMP2]]) 13683// CHECK: ret void 13684void test_vst4_u64(uint64_t *a, uint64x1x4_t b) { 13685 vst4_u64(a, b); 13686} 13687 13688// CHECK-LABEL: define void @test_vst4_s8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 { 13689// CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8 13690// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8 13691// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0 13692// CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 13693// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8* 13694// CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8* 13695// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 13696// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 13697// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 13698// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 13699// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 13700// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1 13701// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 13702// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 13703// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2 13704// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 13705// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 13706// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3 13707// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 13708// CHECK: call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a) 13709// CHECK: ret void 13710void test_vst4_s8(int8_t *a, int8x8x4_t b) { 13711 vst4_s8(a, b); 13712} 13713 13714// CHECK-LABEL: define void @test_vst4_s16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 { 13715// CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8 13716// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8 13717// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0 13718// CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8 13719// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8* 13720// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8* 13721// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 13722// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 13723// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 13724// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0 13725// CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 13726// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 13727// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 13728// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1 13729// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 13730// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 13731// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 13732// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2 13733// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 13734// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 13735// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 13736// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3 13737// CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 13738// CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8> 13739// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 13740// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 13741// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 13742// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> 13743// CHECK: call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i8* [[TMP2]]) 13744// CHECK: ret void 13745void test_vst4_s16(int16_t *a, int16x4x4_t b) { 13746 vst4_s16(a, b); 13747} 13748 13749// CHECK-LABEL: define void @test_vst4_s32(i32* %a, [4 x <2 x i32>] %b.coerce) #0 { 13750// CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8 13751// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8 13752// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0 13753// CHECK: store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8 13754// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8* 13755// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8* 13756// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 13757// CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 13758// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 13759// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0 13760// CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 13761// CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> 13762// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 13763// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i64 0, i64 1 13764// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 13765// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 13766// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 13767// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i64 0, i64 2 13768// CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 13769// CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> 13770// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 13771// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i64 0, i64 3 13772// CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8 13773// CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8> 13774// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> 13775// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 13776// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> 13777// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32> 13778// CHECK: call void @llvm.aarch64.neon.st4.v2i32.p0i8(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], i8* [[TMP2]]) 13779// CHECK: ret void 13780void test_vst4_s32(int32_t *a, int32x2x4_t b) { 13781 vst4_s32(a, b); 13782} 13783 13784// CHECK-LABEL: define void @test_vst4_s64(i64* %a, [4 x <1 x i64>] %b.coerce) #0 { 13785// CHECK: [[B:%.*]] = alloca %struct.int64x1x4_t, align 8 13786// CHECK: [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8 13787// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[B]], i32 0, i32 0 13788// CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8 13789// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__S1]] to i8* 13790// CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x4_t* [[B]] to i8* 13791// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 13792// CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 13793// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 13794// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0 13795// CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 13796// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 13797// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 13798// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1 13799// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 13800// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 13801// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 13802// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2 13803// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 13804// CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> 13805// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 13806// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3 13807// CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8 13808// CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8> 13809// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 13810// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 13811// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> 13812// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64> 13813// CHECK: call void @llvm.aarch64.neon.st4.v1i64.p0i8(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i8* [[TMP2]]) 13814// CHECK: ret void 13815void test_vst4_s64(int64_t *a, int64x1x4_t b) { 13816 vst4_s64(a, b); 13817} 13818 13819// CHECK-LABEL: define void @test_vst4_f16(half* %a, [4 x <4 x half>] %b.coerce) #0 { 13820// CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8 13821// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8 13822// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0 13823// CHECK: store [4 x <4 x half>] [[B]].coerce, [4 x <4 x half>]* [[COERCE_DIVE]], align 8 13824// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8* 13825// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8* 13826// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 13827// CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* 13828// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 13829// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i64 0, i64 0 13830// CHECK: [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 13831// CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8> 13832// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 13833// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL1]], i64 0, i64 1 13834// CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 13835// CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> 13836// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 13837// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL3]], i64 0, i64 2 13838// CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8 13839// CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8> 13840// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 13841// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i64 0, i64 3 13842// CHECK: [[TMP9:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8 13843// CHECK: [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8> 13844// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 13845// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 13846// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 13847// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> 13848// CHECK: call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i8* [[TMP2]]) 13849// CHECK: ret void 13850void test_vst4_f16(float16_t *a, float16x4x4_t b) { 13851 vst4_f16(a, b); 13852} 13853 13854// CHECK-LABEL: define void @test_vst4_f32(float* %a, [4 x <2 x float>] %b.coerce) #0 { 13855// CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8 13856// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8 13857// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0 13858// CHECK: store [4 x <2 x float>] [[B]].coerce, [4 x <2 x float>]* [[COERCE_DIVE]], align 8 13859// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8* 13860// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8* 13861// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 13862// CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* 13863// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 13864// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i64 0, i64 0 13865// CHECK: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 13866// CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8> 13867// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 13868// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL1]], i64 0, i64 1 13869// CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 13870// CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8> 13871// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 13872// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL3]], i64 0, i64 2 13873// CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8 13874// CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8> 13875// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 13876// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL5]], i64 0, i64 3 13877// CHECK: [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX6]], align 8 13878// CHECK: [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8> 13879// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float> 13880// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float> 13881// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float> 13882// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float> 13883// CHECK: call void @llvm.aarch64.neon.st4.v2f32.p0i8(<2 x float> [[TMP11]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], i8* [[TMP2]]) 13884// CHECK: ret void 13885void test_vst4_f32(float32_t *a, float32x2x4_t b) { 13886 vst4_f32(a, b); 13887} 13888 13889// CHECK-LABEL: define void @test_vst4_f64(double* %a, [4 x <1 x double>] %b.coerce) #0 { 13890// CHECK: [[B:%.*]] = alloca %struct.float64x1x4_t, align 8 13891// CHECK: [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8 13892// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[B]], i32 0, i32 0 13893// CHECK: store [4 x <1 x double>] [[B]].coerce, [4 x <1 x double>]* [[COERCE_DIVE]], align 8 13894// CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__S1]] to i8* 13895// CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x4_t* [[B]] to i8* 13896// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 13897// CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* 13898// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0 13899// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL]], i64 0, i64 0 13900// CHECK: [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8 13901// CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8> 13902// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0 13903// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL1]], i64 0, i64 1 13904// CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8 13905// CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8> 13906// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0 13907// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL3]], i64 0, i64 2 13908// CHECK: [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8 13909// CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8> 13910// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0 13911// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL5]], i64 0, i64 3 13912// CHECK: [[TMP9:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX6]], align 8 13913// CHECK: [[TMP10:%.*]] = bitcast <1 x double> [[TMP9]] to <8 x i8> 13914// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> 13915// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double> 13916// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double> 13917// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x double> 13918// CHECK: call void @llvm.aarch64.neon.st4.v1f64.p0i8(<1 x double> [[TMP11]], <1 x double> [[TMP12]], <1 x double> [[TMP13]], <1 x double> [[TMP14]], i8* [[TMP2]]) 13919// CHECK: ret void 13920void test_vst4_f64(float64_t *a, float64x1x4_t b) { 13921 vst4_f64(a, b); 13922} 13923 13924// CHECK-LABEL: define void @test_vst4_p8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 { 13925// CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8 13926// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8 13927// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0 13928// CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 13929// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8* 13930// CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8* 13931// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 13932// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 13933// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 13934// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 13935// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 13936// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1 13937// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 13938// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 13939// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2 13940// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 13941// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 13942// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3 13943// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 13944// CHECK: call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a) 13945// CHECK: ret void 13946void test_vst4_p8(poly8_t *a, poly8x8x4_t b) { 13947 vst4_p8(a, b); 13948} 13949 13950// CHECK-LABEL: define void @test_vst4_p16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 { 13951// CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8 13952// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8 13953// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0 13954// CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8 13955// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8* 13956// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8* 13957// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 13958// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 13959// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 13960// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0 13961// CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 13962// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 13963// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 13964// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1 13965// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 13966// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 13967// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 13968// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2 13969// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 13970// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 13971// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 13972// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3 13973// CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 13974// CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8> 13975// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 13976// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 13977// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 13978// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> 13979// CHECK: call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i8* [[TMP2]]) 13980// CHECK: ret void 13981void test_vst4_p16(poly16_t *a, poly16x4x4_t b) { 13982 vst4_p16(a, b); 13983} 13984 13985// CHECK-LABEL: define %struct.uint8x16x2_t @test_vld1q_u8_x2(i8* %a) #0 { 13986// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16 13987// CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 16 13988// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8* 13989// CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %a) 13990// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }* 13991// CHECK: store { <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8> }* [[TMP1]] 13992// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL]] to i8* 13993// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8* 13994// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 16, i1 false) 13995// CHECK: [[TMP4:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16 13996// CHECK: ret %struct.uint8x16x2_t [[TMP4]] 13997uint8x16x2_t test_vld1q_u8_x2(uint8_t const *a) { 13998 return vld1q_u8_x2(a); 13999} 14000 14001// CHECK-LABEL: define %struct.uint16x8x2_t @test_vld1q_u16_x2(i16* %a) #0 { 14002// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16 14003// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16 14004// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8* 14005// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 14006// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 14007// CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* [[TMP2]]) 14008// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }* 14009// CHECK: store { <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16> }* [[TMP3]] 14010// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL]] to i8* 14011// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8* 14012// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 14013// CHECK: [[TMP6:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16 14014// CHECK: ret %struct.uint16x8x2_t [[TMP6]] 14015uint16x8x2_t test_vld1q_u16_x2(uint16_t const *a) { 14016 return vld1q_u16_x2(a); 14017} 14018 14019// CHECK-LABEL: define %struct.uint32x4x2_t @test_vld1q_u32_x2(i32* %a) #0 { 14020// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16 14021// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16 14022// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8* 14023// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 14024// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* 14025// CHECK: [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32* [[TMP2]]) 14026// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }* 14027// CHECK: store { <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32> }* [[TMP3]] 14028// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL]] to i8* 14029// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8* 14030// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 14031// CHECK: [[TMP6:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16 14032// CHECK: ret %struct.uint32x4x2_t [[TMP6]] 14033uint32x4x2_t test_vld1q_u32_x2(uint32_t const *a) { 14034 return vld1q_u32_x2(a); 14035} 14036 14037// CHECK-LABEL: define %struct.uint64x2x2_t @test_vld1q_u64_x2(i64* %a) #0 { 14038// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x2_t, align 16 14039// CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x2_t, align 16 14040// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8* 14041// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 14042// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 14043// CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* [[TMP2]]) 14044// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }* 14045// CHECK: store { <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64> }* [[TMP3]] 14046// CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x2_t* [[RETVAL]] to i8* 14047// CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8* 14048// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 14049// CHECK: [[TMP6:%.*]] = load %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[RETVAL]], align 16 14050// CHECK: ret %struct.uint64x2x2_t [[TMP6]] 14051uint64x2x2_t test_vld1q_u64_x2(uint64_t const *a) { 14052 return vld1q_u64_x2(a); 14053} 14054 14055// CHECK-LABEL: define %struct.int8x16x2_t @test_vld1q_s8_x2(i8* %a) #0 { 14056// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16 14057// CHECK: [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 16 14058// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8* 14059// CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %a) 14060// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }* 14061// CHECK: store { <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8> }* [[TMP1]] 14062// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL]] to i8* 14063// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8* 14064// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 16, i1 false) 14065// CHECK: [[TMP4:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16 14066// CHECK: ret %struct.int8x16x2_t [[TMP4]] 14067int8x16x2_t test_vld1q_s8_x2(int8_t const *a) { 14068 return vld1q_s8_x2(a); 14069} 14070 14071// CHECK-LABEL: define %struct.int16x8x2_t @test_vld1q_s16_x2(i16* %a) #0 { 14072// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16 14073// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16 14074// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8* 14075// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 14076// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 14077// CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* [[TMP2]]) 14078// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }* 14079// CHECK: store { <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16> }* [[TMP3]] 14080// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL]] to i8* 14081// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8* 14082// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 14083// CHECK: [[TMP6:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16 14084// CHECK: ret %struct.int16x8x2_t [[TMP6]] 14085int16x8x2_t test_vld1q_s16_x2(int16_t const *a) { 14086 return vld1q_s16_x2(a); 14087} 14088 14089// CHECK-LABEL: define %struct.int32x4x2_t @test_vld1q_s32_x2(i32* %a) #0 { 14090// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16 14091// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16 14092// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8* 14093// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 14094// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* 14095// CHECK: [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32* [[TMP2]]) 14096// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }* 14097// CHECK: store { <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32> }* [[TMP3]] 14098// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL]] to i8* 14099// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8* 14100// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 14101// CHECK: [[TMP6:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16 14102// CHECK: ret %struct.int32x4x2_t [[TMP6]] 14103int32x4x2_t test_vld1q_s32_x2(int32_t const *a) { 14104 return vld1q_s32_x2(a); 14105} 14106 14107// CHECK-LABEL: define %struct.int64x2x2_t @test_vld1q_s64_x2(i64* %a) #0 { 14108// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x2_t, align 16 14109// CHECK: [[__RET:%.*]] = alloca %struct.int64x2x2_t, align 16 14110// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8* 14111// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 14112// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 14113// CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* [[TMP2]]) 14114// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }* 14115// CHECK: store { <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64> }* [[TMP3]] 14116// CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x2_t* [[RETVAL]] to i8* 14117// CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8* 14118// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 14119// CHECK: [[TMP6:%.*]] = load %struct.int64x2x2_t, %struct.int64x2x2_t* [[RETVAL]], align 16 14120// CHECK: ret %struct.int64x2x2_t [[TMP6]] 14121int64x2x2_t test_vld1q_s64_x2(int64_t const *a) { 14122 return vld1q_s64_x2(a); 14123} 14124 14125// CHECK-LABEL: define %struct.float16x8x2_t @test_vld1q_f16_x2(half* %a) #0 { 14126// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16 14127// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16 14128// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8* 14129// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 14130// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 14131// CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* [[TMP2]]) 14132// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }* 14133// CHECK: store { <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16> }* [[TMP3]] 14134// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8* 14135// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8* 14136// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 14137// CHECK: [[TMP6:%.*]] = load %struct.float16x8x2_t, %struct.float16x8x2_t* [[RETVAL]], align 16 14138// CHECK: ret %struct.float16x8x2_t [[TMP6]] 14139float16x8x2_t test_vld1q_f16_x2(float16_t const *a) { 14140 return vld1q_f16_x2(a); 14141} 14142 14143// CHECK-LABEL: define %struct.float32x4x2_t @test_vld1q_f32_x2(float* %a) #0 { 14144// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16 14145// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16 14146// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8* 14147// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 14148// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float* 14149// CHECK: [[VLD1XN:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float* [[TMP2]]) 14150// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float> }* 14151// CHECK: store { <4 x float>, <4 x float> } [[VLD1XN]], { <4 x float>, <4 x float> }* [[TMP3]] 14152// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL]] to i8* 14153// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8* 14154// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 14155// CHECK: [[TMP6:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16 14156// CHECK: ret %struct.float32x4x2_t [[TMP6]] 14157float32x4x2_t test_vld1q_f32_x2(float32_t const *a) { 14158 return vld1q_f32_x2(a); 14159} 14160 14161// CHECK-LABEL: define %struct.float64x2x2_t @test_vld1q_f64_x2(double* %a) #0 { 14162// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16 14163// CHECK: [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16 14164// CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8* 14165// CHECK: [[TMP1:%.*]] = bitcast double* %a to i8* 14166// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double* 14167// CHECK: [[VLD1XN:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double* [[TMP2]]) 14168// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double> }* 14169// CHECK: store { <2 x double>, <2 x double> } [[VLD1XN]], { <2 x double>, <2 x double> }* [[TMP3]] 14170// CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x2_t* [[RETVAL]] to i8* 14171// CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8* 14172// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 14173// CHECK: [[TMP6:%.*]] = load %struct.float64x2x2_t, %struct.float64x2x2_t* [[RETVAL]], align 16 14174// CHECK: ret %struct.float64x2x2_t [[TMP6]] 14175float64x2x2_t test_vld1q_f64_x2(float64_t const *a) { 14176 return vld1q_f64_x2(a); 14177} 14178 14179// CHECK-LABEL: define %struct.poly8x16x2_t @test_vld1q_p8_x2(i8* %a) #0 { 14180// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16 14181// CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 16 14182// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8* 14183// CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %a) 14184// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }* 14185// CHECK: store { <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8> }* [[TMP1]] 14186// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL]] to i8* 14187// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8* 14188// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 16, i1 false) 14189// CHECK: [[TMP4:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16 14190// CHECK: ret %struct.poly8x16x2_t [[TMP4]] 14191poly8x16x2_t test_vld1q_p8_x2(poly8_t const *a) { 14192 return vld1q_p8_x2(a); 14193} 14194 14195// CHECK-LABEL: define %struct.poly16x8x2_t @test_vld1q_p16_x2(i16* %a) #0 { 14196// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16 14197// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16 14198// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8* 14199// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 14200// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 14201// CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* [[TMP2]]) 14202// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }* 14203// CHECK: store { <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16> }* [[TMP3]] 14204// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL]] to i8* 14205// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8* 14206// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 14207// CHECK: [[TMP6:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16 14208// CHECK: ret %struct.poly16x8x2_t [[TMP6]] 14209poly16x8x2_t test_vld1q_p16_x2(poly16_t const *a) { 14210 return vld1q_p16_x2(a); 14211} 14212 14213// CHECK-LABEL: define %struct.poly64x2x2_t @test_vld1q_p64_x2(i64* %a) #0 { 14214// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x2_t, align 16 14215// CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x2_t, align 16 14216// CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8* 14217// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 14218// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 14219// CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* [[TMP2]]) 14220// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }* 14221// CHECK: store { <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64> }* [[TMP3]] 14222// CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x2x2_t* [[RETVAL]] to i8* 14223// CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8* 14224// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 14225// CHECK: [[TMP6:%.*]] = load %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[RETVAL]], align 16 14226// CHECK: ret %struct.poly64x2x2_t [[TMP6]] 14227poly64x2x2_t test_vld1q_p64_x2(poly64_t const *a) { 14228 return vld1q_p64_x2(a); 14229} 14230 14231// CHECK-LABEL: define %struct.uint8x8x2_t @test_vld1_u8_x2(i8* %a) #0 { 14232// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8 14233// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8 14234// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8* 14235// CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %a) 14236// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }* 14237// CHECK: store { <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8> }* [[TMP1]] 14238// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL]] to i8* 14239// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8* 14240// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 16, i32 8, i1 false) 14241// CHECK: [[TMP4:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8 14242// CHECK: ret %struct.uint8x8x2_t [[TMP4]] 14243uint8x8x2_t test_vld1_u8_x2(uint8_t const *a) { 14244 return vld1_u8_x2(a); 14245} 14246 14247// CHECK-LABEL: define %struct.uint16x4x2_t @test_vld1_u16_x2(i16* %a) #0 { 14248// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8 14249// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8 14250// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* 14251// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 14252// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 14253// CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* [[TMP2]]) 14254// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* 14255// CHECK: store { <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16> }* [[TMP3]] 14256// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL]] to i8* 14257// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* 14258// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 14259// CHECK: [[TMP6:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8 14260// CHECK: ret %struct.uint16x4x2_t [[TMP6]] 14261uint16x4x2_t test_vld1_u16_x2(uint16_t const *a) { 14262 return vld1_u16_x2(a); 14263} 14264 14265// CHECK-LABEL: define %struct.uint32x2x2_t @test_vld1_u32_x2(i32* %a) #0 { 14266// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8 14267// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8 14268// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* 14269// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 14270// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* 14271// CHECK: [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32* [[TMP2]]) 14272// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }* 14273// CHECK: store { <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32> }* [[TMP3]] 14274// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL]] to i8* 14275// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* 14276// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 14277// CHECK: [[TMP6:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8 14278// CHECK: ret %struct.uint32x2x2_t [[TMP6]] 14279uint32x2x2_t test_vld1_u32_x2(uint32_t const *a) { 14280 return vld1_u32_x2(a); 14281} 14282 14283// CHECK-LABEL: define %struct.uint64x1x2_t @test_vld1_u64_x2(i64* %a) #0 { 14284// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x2_t, align 8 14285// CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8 14286// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8* 14287// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 14288// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 14289// CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* [[TMP2]]) 14290// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }* 14291// CHECK: store { <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64> }* [[TMP3]] 14292// CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x2_t* [[RETVAL]] to i8* 14293// CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8* 14294// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 14295// CHECK: [[TMP6:%.*]] = load %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[RETVAL]], align 8 14296// CHECK: ret %struct.uint64x1x2_t [[TMP6]] 14297uint64x1x2_t test_vld1_u64_x2(uint64_t const *a) { 14298 return vld1_u64_x2(a); 14299} 14300 14301// CHECK-LABEL: define %struct.int8x8x2_t @test_vld1_s8_x2(i8* %a) #0 { 14302// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8 14303// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8 14304// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8* 14305// CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %a) 14306// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }* 14307// CHECK: store { <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8> }* [[TMP1]] 14308// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL]] to i8* 14309// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8* 14310// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 16, i32 8, i1 false) 14311// CHECK: [[TMP4:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8 14312// CHECK: ret %struct.int8x8x2_t [[TMP4]] 14313int8x8x2_t test_vld1_s8_x2(int8_t const *a) { 14314 return vld1_s8_x2(a); 14315} 14316 14317// CHECK-LABEL: define %struct.int16x4x2_t @test_vld1_s16_x2(i16* %a) #0 { 14318// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8 14319// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8 14320// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* 14321// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 14322// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 14323// CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* [[TMP2]]) 14324// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* 14325// CHECK: store { <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16> }* [[TMP3]] 14326// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL]] to i8* 14327// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* 14328// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 14329// CHECK: [[TMP6:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8 14330// CHECK: ret %struct.int16x4x2_t [[TMP6]] 14331int16x4x2_t test_vld1_s16_x2(int16_t const *a) { 14332 return vld1_s16_x2(a); 14333} 14334 14335// CHECK-LABEL: define %struct.int32x2x2_t @test_vld1_s32_x2(i32* %a) #0 { 14336// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8 14337// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8 14338// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* 14339// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 14340// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* 14341// CHECK: [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32* [[TMP2]]) 14342// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }* 14343// CHECK: store { <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32> }* [[TMP3]] 14344// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL]] to i8* 14345// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* 14346// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 14347// CHECK: [[TMP6:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8 14348// CHECK: ret %struct.int32x2x2_t [[TMP6]] 14349int32x2x2_t test_vld1_s32_x2(int32_t const *a) { 14350 return vld1_s32_x2(a); 14351} 14352 14353// CHECK-LABEL: define %struct.int64x1x2_t @test_vld1_s64_x2(i64* %a) #0 { 14354// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x2_t, align 8 14355// CHECK: [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8 14356// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8* 14357// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 14358// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 14359// CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* [[TMP2]]) 14360// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }* 14361// CHECK: store { <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64> }* [[TMP3]] 14362// CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x2_t* [[RETVAL]] to i8* 14363// CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8* 14364// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 14365// CHECK: [[TMP6:%.*]] = load %struct.int64x1x2_t, %struct.int64x1x2_t* [[RETVAL]], align 8 14366// CHECK: ret %struct.int64x1x2_t [[TMP6]] 14367int64x1x2_t test_vld1_s64_x2(int64_t const *a) { 14368 return vld1_s64_x2(a); 14369} 14370 14371// CHECK-LABEL: define %struct.float16x4x2_t @test_vld1_f16_x2(half* %a) #0 { 14372// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8 14373// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8 14374// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* 14375// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 14376// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 14377// CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* [[TMP2]]) 14378// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* 14379// CHECK: store { <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16> }* [[TMP3]] 14380// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8* 14381// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* 14382// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 14383// CHECK: [[TMP6:%.*]] = load %struct.float16x4x2_t, %struct.float16x4x2_t* [[RETVAL]], align 8 14384// CHECK: ret %struct.float16x4x2_t [[TMP6]] 14385float16x4x2_t test_vld1_f16_x2(float16_t const *a) { 14386 return vld1_f16_x2(a); 14387} 14388 14389// CHECK-LABEL: define %struct.float32x2x2_t @test_vld1_f32_x2(float* %a) #0 { 14390// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8 14391// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8 14392// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* 14393// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 14394// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float* 14395// CHECK: [[VLD1XN:%.*]] = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float* [[TMP2]]) 14396// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float> }* 14397// CHECK: store { <2 x float>, <2 x float> } [[VLD1XN]], { <2 x float>, <2 x float> }* [[TMP3]] 14398// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL]] to i8* 14399// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* 14400// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 14401// CHECK: [[TMP6:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8 14402// CHECK: ret %struct.float32x2x2_t [[TMP6]] 14403float32x2x2_t test_vld1_f32_x2(float32_t const *a) { 14404 return vld1_f32_x2(a); 14405} 14406 14407// CHECK-LABEL: define %struct.float64x1x2_t @test_vld1_f64_x2(double* %a) #0 { 14408// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8 14409// CHECK: [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8 14410// CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8* 14411// CHECK: [[TMP1:%.*]] = bitcast double* %a to i8* 14412// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double* 14413// CHECK: [[VLD1XN:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double* [[TMP2]]) 14414// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double> }* 14415// CHECK: store { <1 x double>, <1 x double> } [[VLD1XN]], { <1 x double>, <1 x double> }* [[TMP3]] 14416// CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x2_t* [[RETVAL]] to i8* 14417// CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8* 14418// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 14419// CHECK: [[TMP6:%.*]] = load %struct.float64x1x2_t, %struct.float64x1x2_t* [[RETVAL]], align 8 14420// CHECK: ret %struct.float64x1x2_t [[TMP6]] 14421float64x1x2_t test_vld1_f64_x2(float64_t const *a) { 14422 return vld1_f64_x2(a); 14423} 14424 14425// CHECK-LABEL: define %struct.poly8x8x2_t @test_vld1_p8_x2(i8* %a) #0 { 14426// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8 14427// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8 14428// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8* 14429// CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %a) 14430// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }* 14431// CHECK: store { <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8> }* [[TMP1]] 14432// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL]] to i8* 14433// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8* 14434// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 16, i32 8, i1 false) 14435// CHECK: [[TMP4:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8 14436// CHECK: ret %struct.poly8x8x2_t [[TMP4]] 14437poly8x8x2_t test_vld1_p8_x2(poly8_t const *a) { 14438 return vld1_p8_x2(a); 14439} 14440 14441// CHECK-LABEL: define %struct.poly16x4x2_t @test_vld1_p16_x2(i16* %a) #0 { 14442// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8 14443// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8 14444// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* 14445// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 14446// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 14447// CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* [[TMP2]]) 14448// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* 14449// CHECK: store { <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16> }* [[TMP3]] 14450// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL]] to i8* 14451// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* 14452// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 14453// CHECK: [[TMP6:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8 14454// CHECK: ret %struct.poly16x4x2_t [[TMP6]] 14455poly16x4x2_t test_vld1_p16_x2(poly16_t const *a) { 14456 return vld1_p16_x2(a); 14457} 14458 14459// CHECK-LABEL: define %struct.poly64x1x2_t @test_vld1_p64_x2(i64* %a) #0 { 14460// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x2_t, align 8 14461// CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x2_t, align 8 14462// CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8* 14463// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 14464// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 14465// CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* [[TMP2]]) 14466// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }* 14467// CHECK: store { <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64> }* [[TMP3]] 14468// CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x1x2_t* [[RETVAL]] to i8* 14469// CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8* 14470// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 14471// CHECK: [[TMP6:%.*]] = load %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[RETVAL]], align 8 14472// CHECK: ret %struct.poly64x1x2_t [[TMP6]] 14473poly64x1x2_t test_vld1_p64_x2(poly64_t const *a) { 14474 return vld1_p64_x2(a); 14475} 14476 14477// CHECK-LABEL: define %struct.uint8x16x3_t @test_vld1q_u8_x3(i8* %a) #0 { 14478// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x3_t, align 16 14479// CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 16 14480// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8* 14481// CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %a) 14482// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }* 14483// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] 14484// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x3_t* [[RETVAL]] to i8* 14485// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8* 14486// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 48, i32 16, i1 false) 14487// CHECK: [[TMP4:%.*]] = load %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[RETVAL]], align 16 14488// CHECK: ret %struct.uint8x16x3_t [[TMP4]] 14489uint8x16x3_t test_vld1q_u8_x3(uint8_t const *a) { 14490 return vld1q_u8_x3(a); 14491} 14492 14493// CHECK-LABEL: define %struct.uint16x8x3_t @test_vld1q_u16_x3(i16* %a) #0 { 14494// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x3_t, align 16 14495// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16 14496// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8* 14497// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 14498// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 14499// CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* [[TMP2]]) 14500// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }* 14501// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] 14502// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x3_t* [[RETVAL]] to i8* 14503// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8* 14504// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 14505// CHECK: [[TMP6:%.*]] = load %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[RETVAL]], align 16 14506// CHECK: ret %struct.uint16x8x3_t [[TMP6]] 14507uint16x8x3_t test_vld1q_u16_x3(uint16_t const *a) { 14508 return vld1q_u16_x3(a); 14509} 14510 14511// CHECK-LABEL: define %struct.uint32x4x3_t @test_vld1q_u32_x3(i32* %a) #0 { 14512// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x3_t, align 16 14513// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16 14514// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8* 14515// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 14516// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* 14517// CHECK: [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32* [[TMP2]]) 14518// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }* 14519// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]] 14520// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x3_t* [[RETVAL]] to i8* 14521// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8* 14522// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 14523// CHECK: [[TMP6:%.*]] = load %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[RETVAL]], align 16 14524// CHECK: ret %struct.uint32x4x3_t [[TMP6]] 14525uint32x4x3_t test_vld1q_u32_x3(uint32_t const *a) { 14526 return vld1q_u32_x3(a); 14527} 14528 14529// CHECK-LABEL: define %struct.uint64x2x3_t @test_vld1q_u64_x3(i64* %a) #0 { 14530// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x3_t, align 16 14531// CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x3_t, align 16 14532// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8* 14533// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 14534// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 14535// CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* [[TMP2]]) 14536// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }* 14537// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] 14538// CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x3_t* [[RETVAL]] to i8* 14539// CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8* 14540// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 14541// CHECK: [[TMP6:%.*]] = load %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[RETVAL]], align 16 14542// CHECK: ret %struct.uint64x2x3_t [[TMP6]] 14543uint64x2x3_t test_vld1q_u64_x3(uint64_t const *a) { 14544 return vld1q_u64_x3(a); 14545} 14546 14547// CHECK-LABEL: define %struct.int8x16x3_t @test_vld1q_s8_x3(i8* %a) #0 { 14548// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x3_t, align 16 14549// CHECK: [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 16 14550// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8* 14551// CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %a) 14552// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }* 14553// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] 14554// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x3_t* [[RETVAL]] to i8* 14555// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8* 14556// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 48, i32 16, i1 false) 14557// CHECK: [[TMP4:%.*]] = load %struct.int8x16x3_t, %struct.int8x16x3_t* [[RETVAL]], align 16 14558// CHECK: ret %struct.int8x16x3_t [[TMP4]] 14559int8x16x3_t test_vld1q_s8_x3(int8_t const *a) { 14560 return vld1q_s8_x3(a); 14561} 14562 14563// CHECK-LABEL: define %struct.int16x8x3_t @test_vld1q_s16_x3(i16* %a) #0 { 14564// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x3_t, align 16 14565// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16 14566// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8* 14567// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 14568// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 14569// CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* [[TMP2]]) 14570// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }* 14571// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] 14572// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x3_t* [[RETVAL]] to i8* 14573// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8* 14574// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 14575// CHECK: [[TMP6:%.*]] = load %struct.int16x8x3_t, %struct.int16x8x3_t* [[RETVAL]], align 16 14576// CHECK: ret %struct.int16x8x3_t [[TMP6]] 14577int16x8x3_t test_vld1q_s16_x3(int16_t const *a) { 14578 return vld1q_s16_x3(a); 14579} 14580 14581// CHECK-LABEL: define %struct.int32x4x3_t @test_vld1q_s32_x3(i32* %a) #0 { 14582// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x3_t, align 16 14583// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16 14584// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8* 14585// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 14586// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* 14587// CHECK: [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32* [[TMP2]]) 14588// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }* 14589// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]] 14590// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x3_t* [[RETVAL]] to i8* 14591// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8* 14592// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 14593// CHECK: [[TMP6:%.*]] = load %struct.int32x4x3_t, %struct.int32x4x3_t* [[RETVAL]], align 16 14594// CHECK: ret %struct.int32x4x3_t [[TMP6]] 14595int32x4x3_t test_vld1q_s32_x3(int32_t const *a) { 14596 return vld1q_s32_x3(a); 14597} 14598 14599// CHECK-LABEL: define %struct.int64x2x3_t @test_vld1q_s64_x3(i64* %a) #0 { 14600// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x3_t, align 16 14601// CHECK: [[__RET:%.*]] = alloca %struct.int64x2x3_t, align 16 14602// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8* 14603// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 14604// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 14605// CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* [[TMP2]]) 14606// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }* 14607// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] 14608// CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x3_t* [[RETVAL]] to i8* 14609// CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8* 14610// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 14611// CHECK: [[TMP6:%.*]] = load %struct.int64x2x3_t, %struct.int64x2x3_t* [[RETVAL]], align 16 14612// CHECK: ret %struct.int64x2x3_t [[TMP6]] 14613int64x2x3_t test_vld1q_s64_x3(int64_t const *a) { 14614 return vld1q_s64_x3(a); 14615} 14616 14617// CHECK-LABEL: define %struct.float16x8x3_t @test_vld1q_f16_x3(half* %a) #0 { 14618// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x3_t, align 16 14619// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16 14620// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8* 14621// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 14622// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 14623// CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* [[TMP2]]) 14624// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }* 14625// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] 14626// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x3_t* [[RETVAL]] to i8* 14627// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8* 14628// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 14629// CHECK: [[TMP6:%.*]] = load %struct.float16x8x3_t, %struct.float16x8x3_t* [[RETVAL]], align 16 14630// CHECK: ret %struct.float16x8x3_t [[TMP6]] 14631float16x8x3_t test_vld1q_f16_x3(float16_t const *a) { 14632 return vld1q_f16_x3(a); 14633} 14634 14635// CHECK-LABEL: define %struct.float32x4x3_t @test_vld1q_f32_x3(float* %a) #0 { 14636// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x3_t, align 16 14637// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16 14638// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8* 14639// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 14640// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float* 14641// CHECK: [[VLD1XN:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float* [[TMP2]]) 14642// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float> }* 14643// CHECK: store { <4 x float>, <4 x float>, <4 x float> } [[VLD1XN]], { <4 x float>, <4 x float>, <4 x float> }* [[TMP3]] 14644// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x3_t* [[RETVAL]] to i8* 14645// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8* 14646// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 14647// CHECK: [[TMP6:%.*]] = load %struct.float32x4x3_t, %struct.float32x4x3_t* [[RETVAL]], align 16 14648// CHECK: ret %struct.float32x4x3_t [[TMP6]] 14649float32x4x3_t test_vld1q_f32_x3(float32_t const *a) { 14650 return vld1q_f32_x3(a); 14651} 14652 14653// CHECK-LABEL: define %struct.float64x2x3_t @test_vld1q_f64_x3(double* %a) #0 { 14654// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16 14655// CHECK: [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16 14656// CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8* 14657// CHECK: [[TMP1:%.*]] = bitcast double* %a to i8* 14658// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double* 14659// CHECK: [[VLD1XN:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double* [[TMP2]]) 14660// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double>, <2 x double> }* 14661// CHECK: store { <2 x double>, <2 x double>, <2 x double> } [[VLD1XN]], { <2 x double>, <2 x double>, <2 x double> }* [[TMP3]] 14662// CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x3_t* [[RETVAL]] to i8* 14663// CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8* 14664// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 14665// CHECK: [[TMP6:%.*]] = load %struct.float64x2x3_t, %struct.float64x2x3_t* [[RETVAL]], align 16 14666// CHECK: ret %struct.float64x2x3_t [[TMP6]] 14667float64x2x3_t test_vld1q_f64_x3(float64_t const *a) { 14668 return vld1q_f64_x3(a); 14669} 14670 14671// CHECK-LABEL: define %struct.poly8x16x3_t @test_vld1q_p8_x3(i8* %a) #0 { 14672// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x3_t, align 16 14673// CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 16 14674// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8* 14675// CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %a) 14676// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }* 14677// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] 14678// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x3_t* [[RETVAL]] to i8* 14679// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8* 14680// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 48, i32 16, i1 false) 14681// CHECK: [[TMP4:%.*]] = load %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[RETVAL]], align 16 14682// CHECK: ret %struct.poly8x16x3_t [[TMP4]] 14683poly8x16x3_t test_vld1q_p8_x3(poly8_t const *a) { 14684 return vld1q_p8_x3(a); 14685} 14686 14687// CHECK-LABEL: define %struct.poly16x8x3_t @test_vld1q_p16_x3(i16* %a) #0 { 14688// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x3_t, align 16 14689// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16 14690// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8* 14691// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 14692// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 14693// CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* [[TMP2]]) 14694// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }* 14695// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] 14696// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x3_t* [[RETVAL]] to i8* 14697// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8* 14698// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 14699// CHECK: [[TMP6:%.*]] = load %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[RETVAL]], align 16 14700// CHECK: ret %struct.poly16x8x3_t [[TMP6]] 14701poly16x8x3_t test_vld1q_p16_x3(poly16_t const *a) { 14702 return vld1q_p16_x3(a); 14703} 14704 14705// CHECK-LABEL: define %struct.poly64x2x3_t @test_vld1q_p64_x3(i64* %a) #0 { 14706// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x3_t, align 16 14707// CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x3_t, align 16 14708// CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8* 14709// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 14710// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 14711// CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* [[TMP2]]) 14712// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }* 14713// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] 14714// CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x2x3_t* [[RETVAL]] to i8* 14715// CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8* 14716// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 14717// CHECK: [[TMP6:%.*]] = load %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[RETVAL]], align 16 14718// CHECK: ret %struct.poly64x2x3_t [[TMP6]] 14719poly64x2x3_t test_vld1q_p64_x3(poly64_t const *a) { 14720 return vld1q_p64_x3(a); 14721} 14722 14723// CHECK-LABEL: define %struct.uint8x8x3_t @test_vld1_u8_x3(i8* %a) #0 { 14724// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x3_t, align 8 14725// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8 14726// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8* 14727// CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %a) 14728// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }* 14729// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] 14730// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[RETVAL]] to i8* 14731// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8* 14732// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 24, i32 8, i1 false) 14733// CHECK: [[TMP4:%.*]] = load %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[RETVAL]], align 8 14734// CHECK: ret %struct.uint8x8x3_t [[TMP4]] 14735uint8x8x3_t test_vld1_u8_x3(uint8_t const *a) { 14736 return vld1_u8_x3(a); 14737} 14738 14739// CHECK-LABEL: define %struct.uint16x4x3_t @test_vld1_u16_x3(i16* %a) #0 { 14740// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x3_t, align 8 14741// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8 14742// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* 14743// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 14744// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 14745// CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* [[TMP2]]) 14746// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 14747// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] 14748// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x3_t* [[RETVAL]] to i8* 14749// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* 14750// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 14751// CHECK: [[TMP6:%.*]] = load %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[RETVAL]], align 8 14752// CHECK: ret %struct.uint16x4x3_t [[TMP6]] 14753uint16x4x3_t test_vld1_u16_x3(uint16_t const *a) { 14754 return vld1_u16_x3(a); 14755} 14756 14757// CHECK-LABEL: define %struct.uint32x2x3_t @test_vld1_u32_x3(i32* %a) #0 { 14758// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x3_t, align 8 14759// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8 14760// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* 14761// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 14762// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* 14763// CHECK: [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32* [[TMP2]]) 14764// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }* 14765// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]] 14766// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x3_t* [[RETVAL]] to i8* 14767// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* 14768// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 14769// CHECK: [[TMP6:%.*]] = load %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[RETVAL]], align 8 14770// CHECK: ret %struct.uint32x2x3_t [[TMP6]] 14771uint32x2x3_t test_vld1_u32_x3(uint32_t const *a) { 14772 return vld1_u32_x3(a); 14773} 14774 14775// CHECK-LABEL: define %struct.uint64x1x3_t @test_vld1_u64_x3(i64* %a) #0 { 14776// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x3_t, align 8 14777// CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8 14778// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8* 14779// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 14780// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 14781// CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* [[TMP2]]) 14782// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }* 14783// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] 14784// CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x3_t* [[RETVAL]] to i8* 14785// CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8* 14786// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 14787// CHECK: [[TMP6:%.*]] = load %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[RETVAL]], align 8 14788// CHECK: ret %struct.uint64x1x3_t [[TMP6]] 14789uint64x1x3_t test_vld1_u64_x3(uint64_t const *a) { 14790 return vld1_u64_x3(a); 14791} 14792 14793// CHECK-LABEL: define %struct.int8x8x3_t @test_vld1_s8_x3(i8* %a) #0 { 14794// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x3_t, align 8 14795// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8 14796// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8* 14797// CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %a) 14798// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }* 14799// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] 14800// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[RETVAL]] to i8* 14801// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8* 14802// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 24, i32 8, i1 false) 14803// CHECK: [[TMP4:%.*]] = load %struct.int8x8x3_t, %struct.int8x8x3_t* [[RETVAL]], align 8 14804// CHECK: ret %struct.int8x8x3_t [[TMP4]] 14805int8x8x3_t test_vld1_s8_x3(int8_t const *a) { 14806 return vld1_s8_x3(a); 14807} 14808 14809// CHECK-LABEL: define %struct.int16x4x3_t @test_vld1_s16_x3(i16* %a) #0 { 14810// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x3_t, align 8 14811// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8 14812// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* 14813// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 14814// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 14815// CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* [[TMP2]]) 14816// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 14817// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] 14818// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x3_t* [[RETVAL]] to i8* 14819// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* 14820// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 14821// CHECK: [[TMP6:%.*]] = load %struct.int16x4x3_t, %struct.int16x4x3_t* [[RETVAL]], align 8 14822// CHECK: ret %struct.int16x4x3_t [[TMP6]] 14823int16x4x3_t test_vld1_s16_x3(int16_t const *a) { 14824 return vld1_s16_x3(a); 14825} 14826 14827// CHECK-LABEL: define %struct.int32x2x3_t @test_vld1_s32_x3(i32* %a) #0 { 14828// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x3_t, align 8 14829// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8 14830// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* 14831// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 14832// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* 14833// CHECK: [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32* [[TMP2]]) 14834// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }* 14835// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]] 14836// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x3_t* [[RETVAL]] to i8* 14837// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* 14838// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 14839// CHECK: [[TMP6:%.*]] = load %struct.int32x2x3_t, %struct.int32x2x3_t* [[RETVAL]], align 8 14840// CHECK: ret %struct.int32x2x3_t [[TMP6]] 14841int32x2x3_t test_vld1_s32_x3(int32_t const *a) { 14842 return vld1_s32_x3(a); 14843} 14844 14845// CHECK-LABEL: define %struct.int64x1x3_t @test_vld1_s64_x3(i64* %a) #0 { 14846// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x3_t, align 8 14847// CHECK: [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8 14848// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8* 14849// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 14850// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 14851// CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* [[TMP2]]) 14852// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }* 14853// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] 14854// CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x3_t* [[RETVAL]] to i8* 14855// CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8* 14856// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 14857// CHECK: [[TMP6:%.*]] = load %struct.int64x1x3_t, %struct.int64x1x3_t* [[RETVAL]], align 8 14858// CHECK: ret %struct.int64x1x3_t [[TMP6]] 14859int64x1x3_t test_vld1_s64_x3(int64_t const *a) { 14860 return vld1_s64_x3(a); 14861} 14862 14863// CHECK-LABEL: define %struct.float16x4x3_t @test_vld1_f16_x3(half* %a) #0 { 14864// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x3_t, align 8 14865// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8 14866// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* 14867// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 14868// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 14869// CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* [[TMP2]]) 14870// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 14871// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] 14872// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x3_t* [[RETVAL]] to i8* 14873// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* 14874// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 14875// CHECK: [[TMP6:%.*]] = load %struct.float16x4x3_t, %struct.float16x4x3_t* [[RETVAL]], align 8 14876// CHECK: ret %struct.float16x4x3_t [[TMP6]] 14877float16x4x3_t test_vld1_f16_x3(float16_t const *a) { 14878 return vld1_f16_x3(a); 14879} 14880 14881// CHECK-LABEL: define %struct.float32x2x3_t @test_vld1_f32_x3(float* %a) #0 { 14882// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x3_t, align 8 14883// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8 14884// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* 14885// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 14886// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float* 14887// CHECK: [[VLD1XN:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float* [[TMP2]]) 14888// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float> }* 14889// CHECK: store { <2 x float>, <2 x float>, <2 x float> } [[VLD1XN]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP3]] 14890// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x3_t* [[RETVAL]] to i8* 14891// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* 14892// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 14893// CHECK: [[TMP6:%.*]] = load %struct.float32x2x3_t, %struct.float32x2x3_t* [[RETVAL]], align 8 14894// CHECK: ret %struct.float32x2x3_t [[TMP6]] 14895float32x2x3_t test_vld1_f32_x3(float32_t const *a) { 14896 return vld1_f32_x3(a); 14897} 14898 14899// CHECK-LABEL: define %struct.float64x1x3_t @test_vld1_f64_x3(double* %a) #0 { 14900// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8 14901// CHECK: [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8 14902// CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8* 14903// CHECK: [[TMP1:%.*]] = bitcast double* %a to i8* 14904// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double* 14905// CHECK: [[VLD1XN:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double* [[TMP2]]) 14906// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double>, <1 x double> }* 14907// CHECK: store { <1 x double>, <1 x double>, <1 x double> } [[VLD1XN]], { <1 x double>, <1 x double>, <1 x double> }* [[TMP3]] 14908// CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x3_t* [[RETVAL]] to i8* 14909// CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8* 14910// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 14911// CHECK: [[TMP6:%.*]] = load %struct.float64x1x3_t, %struct.float64x1x3_t* [[RETVAL]], align 8 14912// CHECK: ret %struct.float64x1x3_t [[TMP6]] 14913float64x1x3_t test_vld1_f64_x3(float64_t const *a) { 14914 return vld1_f64_x3(a); 14915} 14916 14917// CHECK-LABEL: define %struct.poly8x8x3_t @test_vld1_p8_x3(i8* %a) #0 { 14918// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x3_t, align 8 14919// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8 14920// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8* 14921// CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %a) 14922// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }* 14923// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] 14924// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[RETVAL]] to i8* 14925// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8* 14926// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 24, i32 8, i1 false) 14927// CHECK: [[TMP4:%.*]] = load %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[RETVAL]], align 8 14928// CHECK: ret %struct.poly8x8x3_t [[TMP4]] 14929poly8x8x3_t test_vld1_p8_x3(poly8_t const *a) { 14930 return vld1_p8_x3(a); 14931} 14932 14933// CHECK-LABEL: define %struct.poly16x4x3_t @test_vld1_p16_x3(i16* %a) #0 { 14934// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x3_t, align 8 14935// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8 14936// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* 14937// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 14938// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 14939// CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* [[TMP2]]) 14940// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 14941// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] 14942// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x3_t* [[RETVAL]] to i8* 14943// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* 14944// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 14945// CHECK: [[TMP6:%.*]] = load %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[RETVAL]], align 8 14946// CHECK: ret %struct.poly16x4x3_t [[TMP6]] 14947poly16x4x3_t test_vld1_p16_x3(poly16_t const *a) { 14948 return vld1_p16_x3(a); 14949} 14950 14951// CHECK-LABEL: define %struct.poly64x1x3_t @test_vld1_p64_x3(i64* %a) #0 { 14952// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x3_t, align 8 14953// CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x3_t, align 8 14954// CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8* 14955// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 14956// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 14957// CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* [[TMP2]]) 14958// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }* 14959// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] 14960// CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x1x3_t* [[RETVAL]] to i8* 14961// CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8* 14962// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 14963// CHECK: [[TMP6:%.*]] = load %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[RETVAL]], align 8 14964// CHECK: ret %struct.poly64x1x3_t [[TMP6]] 14965poly64x1x3_t test_vld1_p64_x3(poly64_t const *a) { 14966 return vld1_p64_x3(a); 14967} 14968 14969// CHECK-LABEL: define %struct.uint8x16x4_t @test_vld1q_u8_x4(i8* %a) #0 { 14970// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x4_t, align 16 14971// CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 16 14972// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8* 14973// CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8* %a) 14974// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* 14975// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] 14976// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x4_t* [[RETVAL]] to i8* 14977// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8* 14978// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 64, i32 16, i1 false) 14979// CHECK: [[TMP4:%.*]] = load %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[RETVAL]], align 16 14980// CHECK: ret %struct.uint8x16x4_t [[TMP4]] 14981uint8x16x4_t test_vld1q_u8_x4(uint8_t const *a) { 14982 return vld1q_u8_x4(a); 14983} 14984 14985// CHECK-LABEL: define %struct.uint16x8x4_t @test_vld1q_u16_x4(i16* %a) #0 { 14986// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x4_t, align 16 14987// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16 14988// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8* 14989// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 14990// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 14991// CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* [[TMP2]]) 14992// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* 14993// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] 14994// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x4_t* [[RETVAL]] to i8* 14995// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8* 14996// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 14997// CHECK: [[TMP6:%.*]] = load %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[RETVAL]], align 16 14998// CHECK: ret %struct.uint16x8x4_t [[TMP6]] 14999uint16x8x4_t test_vld1q_u16_x4(uint16_t const *a) { 15000 return vld1q_u16_x4(a); 15001} 15002 15003// CHECK-LABEL: define %struct.uint32x4x4_t @test_vld1q_u32_x4(i32* %a) #0 { 15004// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x4_t, align 16 15005// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16 15006// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8* 15007// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 15008// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* 15009// CHECK: [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32* [[TMP2]]) 15010// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* 15011// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]] 15012// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x4_t* [[RETVAL]] to i8* 15013// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8* 15014// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 15015// CHECK: [[TMP6:%.*]] = load %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[RETVAL]], align 16 15016// CHECK: ret %struct.uint32x4x4_t [[TMP6]] 15017uint32x4x4_t test_vld1q_u32_x4(uint32_t const *a) { 15018 return vld1q_u32_x4(a); 15019} 15020 15021// CHECK-LABEL: define %struct.uint64x2x4_t @test_vld1q_u64_x4(i64* %a) #0 { 15022// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x4_t, align 16 15023// CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x4_t, align 16 15024// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8* 15025// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 15026// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 15027// CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* [[TMP2]]) 15028// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* 15029// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] 15030// CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x4_t* [[RETVAL]] to i8* 15031// CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8* 15032// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 15033// CHECK: [[TMP6:%.*]] = load %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[RETVAL]], align 16 15034// CHECK: ret %struct.uint64x2x4_t [[TMP6]] 15035uint64x2x4_t test_vld1q_u64_x4(uint64_t const *a) { 15036 return vld1q_u64_x4(a); 15037} 15038 15039// CHECK-LABEL: define %struct.int8x16x4_t @test_vld1q_s8_x4(i8* %a) #0 { 15040// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x4_t, align 16 15041// CHECK: [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 16 15042// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8* 15043// CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8* %a) 15044// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* 15045// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] 15046// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x4_t* [[RETVAL]] to i8* 15047// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8* 15048// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 64, i32 16, i1 false) 15049// CHECK: [[TMP4:%.*]] = load %struct.int8x16x4_t, %struct.int8x16x4_t* [[RETVAL]], align 16 15050// CHECK: ret %struct.int8x16x4_t [[TMP4]] 15051int8x16x4_t test_vld1q_s8_x4(int8_t const *a) { 15052 return vld1q_s8_x4(a); 15053} 15054 15055// CHECK-LABEL: define %struct.int16x8x4_t @test_vld1q_s16_x4(i16* %a) #0 { 15056// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x4_t, align 16 15057// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16 15058// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8* 15059// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 15060// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 15061// CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* [[TMP2]]) 15062// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* 15063// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] 15064// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x4_t* [[RETVAL]] to i8* 15065// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8* 15066// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 15067// CHECK: [[TMP6:%.*]] = load %struct.int16x8x4_t, %struct.int16x8x4_t* [[RETVAL]], align 16 15068// CHECK: ret %struct.int16x8x4_t [[TMP6]] 15069int16x8x4_t test_vld1q_s16_x4(int16_t const *a) { 15070 return vld1q_s16_x4(a); 15071} 15072 15073// CHECK-LABEL: define %struct.int32x4x4_t @test_vld1q_s32_x4(i32* %a) #0 { 15074// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x4_t, align 16 15075// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16 15076// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8* 15077// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 15078// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* 15079// CHECK: [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32* [[TMP2]]) 15080// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* 15081// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]] 15082// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x4_t* [[RETVAL]] to i8* 15083// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8* 15084// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 15085// CHECK: [[TMP6:%.*]] = load %struct.int32x4x4_t, %struct.int32x4x4_t* [[RETVAL]], align 16 15086// CHECK: ret %struct.int32x4x4_t [[TMP6]] 15087int32x4x4_t test_vld1q_s32_x4(int32_t const *a) { 15088 return vld1q_s32_x4(a); 15089} 15090 15091// CHECK-LABEL: define %struct.int64x2x4_t @test_vld1q_s64_x4(i64* %a) #0 { 15092// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x4_t, align 16 15093// CHECK: [[__RET:%.*]] = alloca %struct.int64x2x4_t, align 16 15094// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8* 15095// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 15096// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 15097// CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* [[TMP2]]) 15098// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* 15099// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] 15100// CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x4_t* [[RETVAL]] to i8* 15101// CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8* 15102// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 15103// CHECK: [[TMP6:%.*]] = load %struct.int64x2x4_t, %struct.int64x2x4_t* [[RETVAL]], align 16 15104// CHECK: ret %struct.int64x2x4_t [[TMP6]] 15105int64x2x4_t test_vld1q_s64_x4(int64_t const *a) { 15106 return vld1q_s64_x4(a); 15107} 15108 15109// CHECK-LABEL: define %struct.float16x8x4_t @test_vld1q_f16_x4(half* %a) #0 { 15110// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x4_t, align 16 15111// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16 15112// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8* 15113// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 15114// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 15115// CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* [[TMP2]]) 15116// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* 15117// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] 15118// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x4_t* [[RETVAL]] to i8* 15119// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8* 15120// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 15121// CHECK: [[TMP6:%.*]] = load %struct.float16x8x4_t, %struct.float16x8x4_t* [[RETVAL]], align 16 15122// CHECK: ret %struct.float16x8x4_t [[TMP6]] 15123float16x8x4_t test_vld1q_f16_x4(float16_t const *a) { 15124 return vld1q_f16_x4(a); 15125} 15126 15127// CHECK-LABEL: define %struct.float32x4x4_t @test_vld1q_f32_x4(float* %a) #0 { 15128// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x4_t, align 16 15129// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16 15130// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8* 15131// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 15132// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float* 15133// CHECK: [[VLD1XN:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float* [[TMP2]]) 15134// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* 15135// CHECK: store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD1XN]], { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* [[TMP3]] 15136// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x4_t* [[RETVAL]] to i8* 15137// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8* 15138// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 15139// CHECK: [[TMP6:%.*]] = load %struct.float32x4x4_t, %struct.float32x4x4_t* [[RETVAL]], align 16 15140// CHECK: ret %struct.float32x4x4_t [[TMP6]] 15141float32x4x4_t test_vld1q_f32_x4(float32_t const *a) { 15142 return vld1q_f32_x4(a); 15143} 15144 15145// CHECK-LABEL: define %struct.float64x2x4_t @test_vld1q_f64_x4(double* %a) #0 { 15146// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16 15147// CHECK: [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16 15148// CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8* 15149// CHECK: [[TMP1:%.*]] = bitcast double* %a to i8* 15150// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double* 15151// CHECK: [[VLD1XN:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double* [[TMP2]]) 15152// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double>, <2 x double>, <2 x double> }* 15153// CHECK: store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD1XN]], { <2 x double>, <2 x double>, <2 x double>, <2 x double> }* [[TMP3]] 15154// CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x4_t* [[RETVAL]] to i8* 15155// CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8* 15156// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 15157// CHECK: [[TMP6:%.*]] = load %struct.float64x2x4_t, %struct.float64x2x4_t* [[RETVAL]], align 16 15158// CHECK: ret %struct.float64x2x4_t [[TMP6]] 15159float64x2x4_t test_vld1q_f64_x4(float64_t const *a) { 15160 return vld1q_f64_x4(a); 15161} 15162 15163// CHECK-LABEL: define %struct.poly8x16x4_t @test_vld1q_p8_x4(i8* %a) #0 { 15164// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x4_t, align 16 15165// CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 16 15166// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8* 15167// CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8* %a) 15168// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* 15169// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] 15170// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x4_t* [[RETVAL]] to i8* 15171// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8* 15172// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 64, i32 16, i1 false) 15173// CHECK: [[TMP4:%.*]] = load %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[RETVAL]], align 16 15174// CHECK: ret %struct.poly8x16x4_t [[TMP4]] 15175poly8x16x4_t test_vld1q_p8_x4(poly8_t const *a) { 15176 return vld1q_p8_x4(a); 15177} 15178 15179// CHECK-LABEL: define %struct.poly16x8x4_t @test_vld1q_p16_x4(i16* %a) #0 { 15180// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x4_t, align 16 15181// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16 15182// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8* 15183// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 15184// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 15185// CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* [[TMP2]]) 15186// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* 15187// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] 15188// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x4_t* [[RETVAL]] to i8* 15189// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8* 15190// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 15191// CHECK: [[TMP6:%.*]] = load %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[RETVAL]], align 16 15192// CHECK: ret %struct.poly16x8x4_t [[TMP6]] 15193poly16x8x4_t test_vld1q_p16_x4(poly16_t const *a) { 15194 return vld1q_p16_x4(a); 15195} 15196 15197// CHECK-LABEL: define %struct.poly64x2x4_t @test_vld1q_p64_x4(i64* %a) #0 { 15198// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x4_t, align 16 15199// CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x4_t, align 16 15200// CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8* 15201// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 15202// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 15203// CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* [[TMP2]]) 15204// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* 15205// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] 15206// CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x2x4_t* [[RETVAL]] to i8* 15207// CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8* 15208// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 15209// CHECK: [[TMP6:%.*]] = load %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[RETVAL]], align 16 15210// CHECK: ret %struct.poly64x2x4_t [[TMP6]] 15211poly64x2x4_t test_vld1q_p64_x4(poly64_t const *a) { 15212 return vld1q_p64_x4(a); 15213} 15214 15215// CHECK-LABEL: define %struct.uint8x8x4_t @test_vld1_u8_x4(i8* %a) #0 { 15216// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x4_t, align 8 15217// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8 15218// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* 15219// CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8* %a) 15220// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* 15221// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] 15222// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[RETVAL]] to i8* 15223// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* 15224// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 8, i1 false) 15225// CHECK: [[TMP4:%.*]] = load %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[RETVAL]], align 8 15226// CHECK: ret %struct.uint8x8x4_t [[TMP4]] 15227uint8x8x4_t test_vld1_u8_x4(uint8_t const *a) { 15228 return vld1_u8_x4(a); 15229} 15230 15231// CHECK-LABEL: define %struct.uint16x4x4_t @test_vld1_u16_x4(i16* %a) #0 { 15232// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x4_t, align 8 15233// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8 15234// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* 15235// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 15236// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 15237// CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* [[TMP2]]) 15238// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 15239// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] 15240// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x4_t* [[RETVAL]] to i8* 15241// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* 15242// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 15243// CHECK: [[TMP6:%.*]] = load %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[RETVAL]], align 8 15244// CHECK: ret %struct.uint16x4x4_t [[TMP6]] 15245uint16x4x4_t test_vld1_u16_x4(uint16_t const *a) { 15246 return vld1_u16_x4(a); 15247} 15248 15249// CHECK-LABEL: define %struct.uint32x2x4_t @test_vld1_u32_x4(i32* %a) #0 { 15250// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x4_t, align 8 15251// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8 15252// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* 15253// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 15254// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* 15255// CHECK: [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32* [[TMP2]]) 15256// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* 15257// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]] 15258// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x4_t* [[RETVAL]] to i8* 15259// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* 15260// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 15261// CHECK: [[TMP6:%.*]] = load %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[RETVAL]], align 8 15262// CHECK: ret %struct.uint32x2x4_t [[TMP6]] 15263uint32x2x4_t test_vld1_u32_x4(uint32_t const *a) { 15264 return vld1_u32_x4(a); 15265} 15266 15267// CHECK-LABEL: define %struct.uint64x1x4_t @test_vld1_u64_x4(i64* %a) #0 { 15268// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x4_t, align 8 15269// CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8 15270// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8* 15271// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 15272// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 15273// CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* [[TMP2]]) 15274// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* 15275// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] 15276// CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x4_t* [[RETVAL]] to i8* 15277// CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8* 15278// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 15279// CHECK: [[TMP6:%.*]] = load %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[RETVAL]], align 8 15280// CHECK: ret %struct.uint64x1x4_t [[TMP6]] 15281uint64x1x4_t test_vld1_u64_x4(uint64_t const *a) { 15282 return vld1_u64_x4(a); 15283} 15284 15285// CHECK-LABEL: define %struct.int8x8x4_t @test_vld1_s8_x4(i8* %a) #0 { 15286// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x4_t, align 8 15287// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8 15288// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* 15289// CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8* %a) 15290// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* 15291// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] 15292// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[RETVAL]] to i8* 15293// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* 15294// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 8, i1 false) 15295// CHECK: [[TMP4:%.*]] = load %struct.int8x8x4_t, %struct.int8x8x4_t* [[RETVAL]], align 8 15296// CHECK: ret %struct.int8x8x4_t [[TMP4]] 15297int8x8x4_t test_vld1_s8_x4(int8_t const *a) { 15298 return vld1_s8_x4(a); 15299} 15300 15301// CHECK-LABEL: define %struct.int16x4x4_t @test_vld1_s16_x4(i16* %a) #0 { 15302// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x4_t, align 8 15303// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8 15304// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* 15305// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 15306// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 15307// CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* [[TMP2]]) 15308// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 15309// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] 15310// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x4_t* [[RETVAL]] to i8* 15311// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* 15312// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 15313// CHECK: [[TMP6:%.*]] = load %struct.int16x4x4_t, %struct.int16x4x4_t* [[RETVAL]], align 8 15314// CHECK: ret %struct.int16x4x4_t [[TMP6]] 15315int16x4x4_t test_vld1_s16_x4(int16_t const *a) { 15316 return vld1_s16_x4(a); 15317} 15318 15319// CHECK-LABEL: define %struct.int32x2x4_t @test_vld1_s32_x4(i32* %a) #0 { 15320// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x4_t, align 8 15321// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8 15322// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* 15323// CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 15324// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* 15325// CHECK: [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32* [[TMP2]]) 15326// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* 15327// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]] 15328// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x4_t* [[RETVAL]] to i8* 15329// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* 15330// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 15331// CHECK: [[TMP6:%.*]] = load %struct.int32x2x4_t, %struct.int32x2x4_t* [[RETVAL]], align 8 15332// CHECK: ret %struct.int32x2x4_t [[TMP6]] 15333int32x2x4_t test_vld1_s32_x4(int32_t const *a) { 15334 return vld1_s32_x4(a); 15335} 15336 15337// CHECK-LABEL: define %struct.int64x1x4_t @test_vld1_s64_x4(i64* %a) #0 { 15338// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x4_t, align 8 15339// CHECK: [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8 15340// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8* 15341// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 15342// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 15343// CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* [[TMP2]]) 15344// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* 15345// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] 15346// CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x4_t* [[RETVAL]] to i8* 15347// CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8* 15348// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 15349// CHECK: [[TMP6:%.*]] = load %struct.int64x1x4_t, %struct.int64x1x4_t* [[RETVAL]], align 8 15350// CHECK: ret %struct.int64x1x4_t [[TMP6]] 15351int64x1x4_t test_vld1_s64_x4(int64_t const *a) { 15352 return vld1_s64_x4(a); 15353} 15354 15355// CHECK-LABEL: define %struct.float16x4x4_t @test_vld1_f16_x4(half* %a) #0 { 15356// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x4_t, align 8 15357// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8 15358// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* 15359// CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 15360// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 15361// CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* [[TMP2]]) 15362// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 15363// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] 15364// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x4_t* [[RETVAL]] to i8* 15365// CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* 15366// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 15367// CHECK: [[TMP6:%.*]] = load %struct.float16x4x4_t, %struct.float16x4x4_t* [[RETVAL]], align 8 15368// CHECK: ret %struct.float16x4x4_t [[TMP6]] 15369float16x4x4_t test_vld1_f16_x4(float16_t const *a) { 15370 return vld1_f16_x4(a); 15371} 15372 15373// CHECK-LABEL: define %struct.float32x2x4_t @test_vld1_f32_x4(float* %a) #0 { 15374// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x4_t, align 8 15375// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8 15376// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* 15377// CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 15378// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float* 15379// CHECK: [[VLD1XN:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float* [[TMP2]]) 15380// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* 15381// CHECK: store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD1XN]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP3]] 15382// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x4_t* [[RETVAL]] to i8* 15383// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* 15384// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 15385// CHECK: [[TMP6:%.*]] = load %struct.float32x2x4_t, %struct.float32x2x4_t* [[RETVAL]], align 8 15386// CHECK: ret %struct.float32x2x4_t [[TMP6]] 15387float32x2x4_t test_vld1_f32_x4(float32_t const *a) { 15388 return vld1_f32_x4(a); 15389} 15390 15391// CHECK-LABEL: define %struct.float64x1x4_t @test_vld1_f64_x4(double* %a) #0 { 15392// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8 15393// CHECK: [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8 15394// CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8* 15395// CHECK: [[TMP1:%.*]] = bitcast double* %a to i8* 15396// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double* 15397// CHECK: [[VLD1XN:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double* [[TMP2]]) 15398// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double>, <1 x double>, <1 x double> }* 15399// CHECK: store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD1XN]], { <1 x double>, <1 x double>, <1 x double>, <1 x double> }* [[TMP3]] 15400// CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x4_t* [[RETVAL]] to i8* 15401// CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8* 15402// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 15403// CHECK: [[TMP6:%.*]] = load %struct.float64x1x4_t, %struct.float64x1x4_t* [[RETVAL]], align 8 15404// CHECK: ret %struct.float64x1x4_t [[TMP6]] 15405float64x1x4_t test_vld1_f64_x4(float64_t const *a) { 15406 return vld1_f64_x4(a); 15407} 15408 15409// CHECK-LABEL: define %struct.poly8x8x4_t @test_vld1_p8_x4(i8* %a) #0 { 15410// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x4_t, align 8 15411// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8 15412// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* 15413// CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8* %a) 15414// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* 15415// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] 15416// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[RETVAL]] to i8* 15417// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* 15418// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 8, i1 false) 15419// CHECK: [[TMP4:%.*]] = load %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[RETVAL]], align 8 15420// CHECK: ret %struct.poly8x8x4_t [[TMP4]] 15421poly8x8x4_t test_vld1_p8_x4(poly8_t const *a) { 15422 return vld1_p8_x4(a); 15423} 15424 15425// CHECK-LABEL: define %struct.poly16x4x4_t @test_vld1_p16_x4(i16* %a) #0 { 15426// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x4_t, align 8 15427// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8 15428// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* 15429// CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 15430// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 15431// CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* [[TMP2]]) 15432// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 15433// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] 15434// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x4_t* [[RETVAL]] to i8* 15435// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* 15436// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 15437// CHECK: [[TMP6:%.*]] = load %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[RETVAL]], align 8 15438// CHECK: ret %struct.poly16x4x4_t [[TMP6]] 15439poly16x4x4_t test_vld1_p16_x4(poly16_t const *a) { 15440 return vld1_p16_x4(a); 15441} 15442 15443// CHECK-LABEL: define %struct.poly64x1x4_t @test_vld1_p64_x4(i64* %a) #0 { 15444// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x4_t, align 8 15445// CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x4_t, align 8 15446// CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8* 15447// CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 15448// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 15449// CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* [[TMP2]]) 15450// CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* 15451// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] 15452// CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x1x4_t* [[RETVAL]] to i8* 15453// CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8* 15454// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 15455// CHECK: [[TMP6:%.*]] = load %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[RETVAL]], align 8 15456// CHECK: ret %struct.poly64x1x4_t [[TMP6]] 15457poly64x1x4_t test_vld1_p64_x4(poly64_t const *a) { 15458 return vld1_p64_x4(a); 15459} 15460 15461// CHECK-LABEL: define void @test_vst1q_u8_x2(i8* %a, [2 x <16 x i8>] %b.coerce) #0 { 15462// CHECK: [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16 15463// CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16 15464// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0 15465// CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 15466// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__S1]] to i8* 15467// CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x2_t* [[B]] to i8* 15468// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 15469// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0 15470// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 15471// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 15472// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0 15473// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1 15474// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 15475// CHECK: call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a) 15476// CHECK: ret void 15477void test_vst1q_u8_x2(uint8_t *a, uint8x16x2_t b) { 15478 vst1q_u8_x2(a, b); 15479} 15480 15481// CHECK-LABEL: define void @test_vst1q_u16_x2(i16* %a, [2 x <8 x i16>] %b.coerce) #0 { 15482// CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16 15483// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16 15484// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0 15485// CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16 15486// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8* 15487// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8* 15488// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 15489// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 15490// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0 15491// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0 15492// CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 15493// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 15494// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0 15495// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1 15496// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 15497// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 15498// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 15499// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 15500// CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16* 15501// CHECK: call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i16* [[TMP9]]) 15502// CHECK: ret void 15503void test_vst1q_u16_x2(uint16_t *a, uint16x8x2_t b) { 15504 vst1q_u16_x2(a, b); 15505} 15506 15507// CHECK-LABEL: define void @test_vst1q_u32_x2(i32* %a, [2 x <4 x i32>] %b.coerce) #0 { 15508// CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16 15509// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16 15510// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0 15511// CHECK: store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16 15512// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8* 15513// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8* 15514// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 15515// CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 15516// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0 15517// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0 15518// CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 15519// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> 15520// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0 15521// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i64 0, i64 1 15522// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 15523// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 15524// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> 15525// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 15526// CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i32* 15527// CHECK: call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], i32* [[TMP9]]) 15528// CHECK: ret void 15529void test_vst1q_u32_x2(uint32_t *a, uint32x4x2_t b) { 15530 vst1q_u32_x2(a, b); 15531} 15532 15533// CHECK-LABEL: define void @test_vst1q_u64_x2(i64* %a, [2 x <2 x i64>] %b.coerce) #0 { 15534// CHECK: [[B:%.*]] = alloca %struct.uint64x2x2_t, align 16 15535// CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x2_t, align 16 15536// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[B]], i32 0, i32 0 15537// CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16 15538// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__S1]] to i8* 15539// CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x2_t* [[B]] to i8* 15540// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 15541// CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 15542// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0 15543// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0 15544// CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 15545// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 15546// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0 15547// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1 15548// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 15549// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 15550// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 15551// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 15552// CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i64* 15553// CHECK: call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i64* [[TMP9]]) 15554// CHECK: ret void 15555void test_vst1q_u64_x2(uint64_t *a, uint64x2x2_t b) { 15556 vst1q_u64_x2(a, b); 15557} 15558 15559// CHECK-LABEL: define void @test_vst1q_s8_x2(i8* %a, [2 x <16 x i8>] %b.coerce) #0 { 15560// CHECK: [[B:%.*]] = alloca %struct.int8x16x2_t, align 16 15561// CHECK: [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16 15562// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0 15563// CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 15564// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__S1]] to i8* 15565// CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x2_t* [[B]] to i8* 15566// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 15567// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0 15568// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 15569// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 15570// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0 15571// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1 15572// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 15573// CHECK: call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a) 15574// CHECK: ret void 15575void test_vst1q_s8_x2(int8_t *a, int8x16x2_t b) { 15576 vst1q_s8_x2(a, b); 15577} 15578 15579// CHECK-LABEL: define void @test_vst1q_s16_x2(i16* %a, [2 x <8 x i16>] %b.coerce) #0 { 15580// CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16 15581// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16 15582// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0 15583// CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16 15584// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8* 15585// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8* 15586// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 15587// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 15588// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0 15589// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0 15590// CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 15591// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 15592// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0 15593// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1 15594// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 15595// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 15596// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 15597// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 15598// CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16* 15599// CHECK: call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i16* [[TMP9]]) 15600// CHECK: ret void 15601void test_vst1q_s16_x2(int16_t *a, int16x8x2_t b) { 15602 vst1q_s16_x2(a, b); 15603} 15604 15605// CHECK-LABEL: define void @test_vst1q_s32_x2(i32* %a, [2 x <4 x i32>] %b.coerce) #0 { 15606// CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16 15607// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16 15608// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0 15609// CHECK: store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16 15610// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8* 15611// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8* 15612// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 15613// CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 15614// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0 15615// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0 15616// CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 15617// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> 15618// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0 15619// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i64 0, i64 1 15620// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 15621// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 15622// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> 15623// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 15624// CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i32* 15625// CHECK: call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], i32* [[TMP9]]) 15626// CHECK: ret void 15627void test_vst1q_s32_x2(int32_t *a, int32x4x2_t b) { 15628 vst1q_s32_x2(a, b); 15629} 15630 15631// CHECK-LABEL: define void @test_vst1q_s64_x2(i64* %a, [2 x <2 x i64>] %b.coerce) #0 { 15632// CHECK: [[B:%.*]] = alloca %struct.int64x2x2_t, align 16 15633// CHECK: [[__S1:%.*]] = alloca %struct.int64x2x2_t, align 16 15634// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[B]], i32 0, i32 0 15635// CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16 15636// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__S1]] to i8* 15637// CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x2_t* [[B]] to i8* 15638// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 15639// CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 15640// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0 15641// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0 15642// CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 15643// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 15644// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0 15645// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1 15646// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 15647// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 15648// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 15649// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 15650// CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i64* 15651// CHECK: call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i64* [[TMP9]]) 15652// CHECK: ret void 15653void test_vst1q_s64_x2(int64_t *a, int64x2x2_t b) { 15654 vst1q_s64_x2(a, b); 15655} 15656 15657// CHECK-LABEL: define void @test_vst1q_f16_x2(half* %a, [2 x <8 x half>] %b.coerce) #0 { 15658// CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16 15659// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16 15660// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0 15661// CHECK: store [2 x <8 x half>] [[B]].coerce, [2 x <8 x half>]* [[COERCE_DIVE]], align 16 15662// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8* 15663// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8* 15664// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 15665// CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* 15666// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0 15667// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i64 0, i64 0 15668// CHECK: [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 15669// CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8> 15670// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0 15671// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i64 0, i64 1 15672// CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 15673// CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> 15674// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 15675// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 15676// CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16* 15677// CHECK: call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i16* [[TMP9]]) 15678// CHECK: ret void 15679void test_vst1q_f16_x2(float16_t *a, float16x8x2_t b) { 15680 vst1q_f16_x2(a, b); 15681} 15682 15683// CHECK-LABEL: define void @test_vst1q_f32_x2(float* %a, [2 x <4 x float>] %b.coerce) #0 { 15684// CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16 15685// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16 15686// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0 15687// CHECK: store [2 x <4 x float>] [[B]].coerce, [2 x <4 x float>]* [[COERCE_DIVE]], align 16 15688// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8* 15689// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8* 15690// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 15691// CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* 15692// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0 15693// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i64 0, i64 0 15694// CHECK: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 15695// CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8> 15696// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0 15697// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i64 0, i64 1 15698// CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 15699// CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8> 15700// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float> 15701// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float> 15702// CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to float* 15703// CHECK: call void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float> [[TMP7]], <4 x float> [[TMP8]], float* [[TMP9]]) 15704// CHECK: ret void 15705void test_vst1q_f32_x2(float32_t *a, float32x4x2_t b) { 15706 vst1q_f32_x2(a, b); 15707} 15708 15709// CHECK-LABEL: define void @test_vst1q_f64_x2(double* %a, [2 x <2 x double>] %b.coerce) #0 { 15710// CHECK: [[B:%.*]] = alloca %struct.float64x2x2_t, align 16 15711// CHECK: [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16 15712// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[B]], i32 0, i32 0 15713// CHECK: store [2 x <2 x double>] [[B]].coerce, [2 x <2 x double>]* [[COERCE_DIVE]], align 16 15714// CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__S1]] to i8* 15715// CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x2_t* [[B]] to i8* 15716// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 15717// CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* 15718// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0 15719// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL]], i64 0, i64 0 15720// CHECK: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16 15721// CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8> 15722// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0 15723// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL1]], i64 0, i64 1 15724// CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16 15725// CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8> 15726// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double> 15727// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double> 15728// CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to double* 15729// CHECK: call void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double> [[TMP7]], <2 x double> [[TMP8]], double* [[TMP9]]) 15730// CHECK: ret void 15731void test_vst1q_f64_x2(float64_t *a, float64x2x2_t b) { 15732 vst1q_f64_x2(a, b); 15733} 15734 15735// CHECK-LABEL: define void @test_vst1q_p8_x2(i8* %a, [2 x <16 x i8>] %b.coerce) #0 { 15736// CHECK: [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16 15737// CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16 15738// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0 15739// CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 15740// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__S1]] to i8* 15741// CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x2_t* [[B]] to i8* 15742// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 15743// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0 15744// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 15745// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 15746// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0 15747// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1 15748// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 15749// CHECK: call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a) 15750// CHECK: ret void 15751void test_vst1q_p8_x2(poly8_t *a, poly8x16x2_t b) { 15752 vst1q_p8_x2(a, b); 15753} 15754 15755// CHECK-LABEL: define void @test_vst1q_p16_x2(i16* %a, [2 x <8 x i16>] %b.coerce) #0 { 15756// CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16 15757// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16 15758// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0 15759// CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16 15760// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8* 15761// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8* 15762// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 15763// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 15764// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0 15765// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0 15766// CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 15767// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 15768// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0 15769// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1 15770// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 15771// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 15772// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 15773// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 15774// CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16* 15775// CHECK: call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i16* [[TMP9]]) 15776// CHECK: ret void 15777void test_vst1q_p16_x2(poly16_t *a, poly16x8x2_t b) { 15778 vst1q_p16_x2(a, b); 15779} 15780 15781// CHECK-LABEL: define void @test_vst1q_p64_x2(i64* %a, [2 x <2 x i64>] %b.coerce) #0 { 15782// CHECK: [[B:%.*]] = alloca %struct.poly64x2x2_t, align 16 15783// CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x2_t, align 16 15784// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[B]], i32 0, i32 0 15785// CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16 15786// CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__S1]] to i8* 15787// CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x2_t* [[B]] to i8* 15788// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 15789// CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 15790// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0 15791// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0 15792// CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 15793// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 15794// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0 15795// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1 15796// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 15797// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 15798// CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 15799// CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 15800// CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i64* 15801// CHECK: call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i64* [[TMP9]]) 15802// CHECK: ret void 15803void test_vst1q_p64_x2(poly64_t *a, poly64x2x2_t b) { 15804 vst1q_p64_x2(a, b); 15805} 15806 15807// CHECK-LABEL: define void @test_vst1_u8_x2(i8* %a, [2 x <8 x i8>] %b.coerce) #0 { 15808// CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8 15809// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8 15810// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0 15811// CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 15812// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8* 15813// CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8* 15814// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 15815// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0 15816// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 15817// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 15818// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0 15819// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1 15820// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 15821// CHECK: call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a) 15822// CHECK: ret void 15823void test_vst1_u8_x2(uint8_t *a, uint8x8x2_t b) { 15824 vst1_u8_x2(a, b); 15825} 15826 15827// CHECK-LABEL: define void @test_vst1_u16_x2(i16* %a, [2 x <4 x i16>] %b.coerce) #0 { 15828// CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8 15829// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8 15830// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0 15831// CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8 15832// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8* 15833// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8* 15834// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 15835// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 15836// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0 15837// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0 15838// CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 15839// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 15840// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0 15841// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1 15842// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 15843// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 15844// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 15845// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 15846// CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16* 15847// CHECK: call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i16* [[TMP9]]) 15848// CHECK: ret void 15849void test_vst1_u16_x2(uint16_t *a, uint16x4x2_t b) { 15850 vst1_u16_x2(a, b); 15851} 15852 15853// CHECK-LABEL: define void @test_vst1_u32_x2(i32* %a, [2 x <2 x i32>] %b.coerce) #0 { 15854// CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8 15855// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8 15856// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0 15857// CHECK: store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8 15858// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8* 15859// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8* 15860// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 15861// CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 15862// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0 15863// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0 15864// CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 15865// CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> 15866// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0 15867// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i64 0, i64 1 15868// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 15869// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 15870// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> 15871// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 15872// CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i32* 15873// CHECK: call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i32* [[TMP9]]) 15874// CHECK: ret void 15875void test_vst1_u32_x2(uint32_t *a, uint32x2x2_t b) { 15876 vst1_u32_x2(a, b); 15877} 15878 15879// CHECK-LABEL: define void @test_vst1_u64_x2(i64* %a, [2 x <1 x i64>] %b.coerce) #0 { 15880// CHECK: [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8 15881// CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8 15882// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[B]], i32 0, i32 0 15883// CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8 15884// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__S1]] to i8* 15885// CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x2_t* [[B]] to i8* 15886// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 15887// CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 15888// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0 15889// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0 15890// CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 15891// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 15892// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0 15893// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1 15894// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 15895// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 15896// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 15897// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 15898// CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i64* 15899// CHECK: call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i64* [[TMP9]]) 15900// CHECK: ret void 15901void test_vst1_u64_x2(uint64_t *a, uint64x1x2_t b) { 15902 vst1_u64_x2(a, b); 15903} 15904 15905// CHECK-LABEL: define void @test_vst1_s8_x2(i8* %a, [2 x <8 x i8>] %b.coerce) #0 { 15906// CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8 15907// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8 15908// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0 15909// CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 15910// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8* 15911// CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8* 15912// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 15913// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0 15914// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 15915// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 15916// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0 15917// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1 15918// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 15919// CHECK: call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a) 15920// CHECK: ret void 15921void test_vst1_s8_x2(int8_t *a, int8x8x2_t b) { 15922 vst1_s8_x2(a, b); 15923} 15924 15925// CHECK-LABEL: define void @test_vst1_s16_x2(i16* %a, [2 x <4 x i16>] %b.coerce) #0 { 15926// CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8 15927// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8 15928// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0 15929// CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8 15930// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8* 15931// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8* 15932// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 15933// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 15934// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0 15935// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0 15936// CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 15937// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 15938// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0 15939// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1 15940// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 15941// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 15942// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 15943// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 15944// CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16* 15945// CHECK: call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i16* [[TMP9]]) 15946// CHECK: ret void 15947void test_vst1_s16_x2(int16_t *a, int16x4x2_t b) { 15948 vst1_s16_x2(a, b); 15949} 15950 15951// CHECK-LABEL: define void @test_vst1_s32_x2(i32* %a, [2 x <2 x i32>] %b.coerce) #0 { 15952// CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8 15953// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8 15954// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0 15955// CHECK: store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8 15956// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8* 15957// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8* 15958// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 15959// CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 15960// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0 15961// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0 15962// CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 15963// CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> 15964// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0 15965// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i64 0, i64 1 15966// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 15967// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 15968// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> 15969// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 15970// CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i32* 15971// CHECK: call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i32* [[TMP9]]) 15972// CHECK: ret void 15973void test_vst1_s32_x2(int32_t *a, int32x2x2_t b) { 15974 vst1_s32_x2(a, b); 15975} 15976 15977// CHECK-LABEL: define void @test_vst1_s64_x2(i64* %a, [2 x <1 x i64>] %b.coerce) #0 { 15978// CHECK: [[B:%.*]] = alloca %struct.int64x1x2_t, align 8 15979// CHECK: [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8 15980// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[B]], i32 0, i32 0 15981// CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8 15982// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__S1]] to i8* 15983// CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x2_t* [[B]] to i8* 15984// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 15985// CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 15986// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0 15987// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0 15988// CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 15989// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 15990// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0 15991// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1 15992// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 15993// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 15994// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 15995// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 15996// CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i64* 15997// CHECK: call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i64* [[TMP9]]) 15998// CHECK: ret void 15999void test_vst1_s64_x2(int64_t *a, int64x1x2_t b) { 16000 vst1_s64_x2(a, b); 16001} 16002 16003// CHECK-LABEL: define void @test_vst1_f16_x2(half* %a, [2 x <4 x half>] %b.coerce) #0 { 16004// CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8 16005// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8 16006// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0 16007// CHECK: store [2 x <4 x half>] [[B]].coerce, [2 x <4 x half>]* [[COERCE_DIVE]], align 8 16008// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8* 16009// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8* 16010// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 16011// CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* 16012// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0 16013// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i64 0, i64 0 16014// CHECK: [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 16015// CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8> 16016// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0 16017// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i64 0, i64 1 16018// CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 16019// CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> 16020// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 16021// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 16022// CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16* 16023// CHECK: call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i16* [[TMP9]]) 16024// CHECK: ret void 16025void test_vst1_f16_x2(float16_t *a, float16x4x2_t b) { 16026 vst1_f16_x2(a, b); 16027} 16028 16029// CHECK-LABEL: define void @test_vst1_f32_x2(float* %a, [2 x <2 x float>] %b.coerce) #0 { 16030// CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8 16031// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8 16032// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0 16033// CHECK: store [2 x <2 x float>] [[B]].coerce, [2 x <2 x float>]* [[COERCE_DIVE]], align 8 16034// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8* 16035// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8* 16036// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 16037// CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* 16038// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0 16039// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i64 0, i64 0 16040// CHECK: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 16041// CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8> 16042// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0 16043// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL1]], i64 0, i64 1 16044// CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 16045// CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8> 16046// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float> 16047// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float> 16048// CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to float* 16049// CHECK: call void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float> [[TMP7]], <2 x float> [[TMP8]], float* [[TMP9]]) 16050// CHECK: ret void 16051void test_vst1_f32_x2(float32_t *a, float32x2x2_t b) { 16052 vst1_f32_x2(a, b); 16053} 16054 16055// CHECK-LABEL: define void @test_vst1_f64_x2(double* %a, [2 x <1 x double>] %b.coerce) #0 { 16056// CHECK: [[B:%.*]] = alloca %struct.float64x1x2_t, align 8 16057// CHECK: [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8 16058// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[B]], i32 0, i32 0 16059// CHECK: store [2 x <1 x double>] [[B]].coerce, [2 x <1 x double>]* [[COERCE_DIVE]], align 8 16060// CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__S1]] to i8* 16061// CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x2_t* [[B]] to i8* 16062// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 16063// CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* 16064// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0 16065// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL]], i64 0, i64 0 16066// CHECK: [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8 16067// CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8> 16068// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0 16069// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL1]], i64 0, i64 1 16070// CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8 16071// CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8> 16072// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> 16073// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double> 16074// CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to double* 16075// CHECK: call void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double> [[TMP7]], <1 x double> [[TMP8]], double* [[TMP9]]) 16076// CHECK: ret void 16077void test_vst1_f64_x2(float64_t *a, float64x1x2_t b) { 16078 vst1_f64_x2(a, b); 16079} 16080 16081// CHECK-LABEL: define void @test_vst1_p8_x2(i8* %a, [2 x <8 x i8>] %b.coerce) #0 { 16082// CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8 16083// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8 16084// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0 16085// CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 16086// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8* 16087// CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8* 16088// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 16089// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0 16090// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 16091// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 16092// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0 16093// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1 16094// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 16095// CHECK: call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a) 16096// CHECK: ret void 16097void test_vst1_p8_x2(poly8_t *a, poly8x8x2_t b) { 16098 vst1_p8_x2(a, b); 16099} 16100 16101// CHECK-LABEL: define void @test_vst1_p16_x2(i16* %a, [2 x <4 x i16>] %b.coerce) #0 { 16102// CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8 16103// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8 16104// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0 16105// CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8 16106// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8* 16107// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8* 16108// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 16109// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 16110// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0 16111// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0 16112// CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 16113// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 16114// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0 16115// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1 16116// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 16117// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 16118// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 16119// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 16120// CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16* 16121// CHECK: call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i16* [[TMP9]]) 16122// CHECK: ret void 16123void test_vst1_p16_x2(poly16_t *a, poly16x4x2_t b) { 16124 vst1_p16_x2(a, b); 16125} 16126 16127// CHECK-LABEL: define void @test_vst1_p64_x2(i64* %a, [2 x <1 x i64>] %b.coerce) #0 { 16128// CHECK: [[B:%.*]] = alloca %struct.poly64x1x2_t, align 8 16129// CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x2_t, align 8 16130// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[B]], i32 0, i32 0 16131// CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8 16132// CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__S1]] to i8* 16133// CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x2_t* [[B]] to i8* 16134// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 16135// CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 16136// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0 16137// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0 16138// CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 16139// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 16140// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0 16141// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1 16142// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 16143// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 16144// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 16145// CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 16146// CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i64* 16147// CHECK: call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i64* [[TMP9]]) 16148// CHECK: ret void 16149void test_vst1_p64_x2(poly64_t *a, poly64x1x2_t b) { 16150 vst1_p64_x2(a, b); 16151} 16152 16153// CHECK-LABEL: define void @test_vst1q_u8_x3(i8* %a, [3 x <16 x i8>] %b.coerce) #0 { 16154// CHECK: [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16 16155// CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16 16156// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0 16157// CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 16158// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__S1]] to i8* 16159// CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x3_t* [[B]] to i8* 16160// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 16161// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0 16162// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 16163// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 16164// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0 16165// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1 16166// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 16167// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0 16168// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2 16169// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 16170// CHECK: call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a) 16171// CHECK: ret void 16172void test_vst1q_u8_x3(uint8_t *a, uint8x16x3_t b) { 16173 vst1q_u8_x3(a, b); 16174} 16175 16176// CHECK-LABEL: define void @test_vst1q_u16_x3(i16* %a, [3 x <8 x i16>] %b.coerce) #0 { 16177// CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16 16178// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16 16179// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0 16180// CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16 16181// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8* 16182// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8* 16183// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 16184// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 16185// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 16186// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0 16187// CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 16188// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 16189// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 16190// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1 16191// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 16192// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 16193// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 16194// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2 16195// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 16196// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 16197// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 16198// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 16199// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 16200// CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16* 16201// CHECK: call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i16* [[TMP12]]) 16202// CHECK: ret void 16203void test_vst1q_u16_x3(uint16_t *a, uint16x8x3_t b) { 16204 vst1q_u16_x3(a, b); 16205} 16206 16207// CHECK-LABEL: define void @test_vst1q_u32_x3(i32* %a, [3 x <4 x i32>] %b.coerce) #0 { 16208// CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16 16209// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16 16210// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0 16211// CHECK: store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16 16212// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8* 16213// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8* 16214// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 16215// CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 16216// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 16217// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0 16218// CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 16219// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> 16220// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 16221// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i64 0, i64 1 16222// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 16223// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 16224// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 16225// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i64 0, i64 2 16226// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 16227// CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> 16228// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> 16229// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 16230// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> 16231// CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i32* 16232// CHECK: call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i32* [[TMP12]]) 16233// CHECK: ret void 16234void test_vst1q_u32_x3(uint32_t *a, uint32x4x3_t b) { 16235 vst1q_u32_x3(a, b); 16236} 16237 16238// CHECK-LABEL: define void @test_vst1q_u64_x3(i64* %a, [3 x <2 x i64>] %b.coerce) #0 { 16239// CHECK: [[B:%.*]] = alloca %struct.uint64x2x3_t, align 16 16240// CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x3_t, align 16 16241// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[B]], i32 0, i32 0 16242// CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16 16243// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__S1]] to i8* 16244// CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x3_t* [[B]] to i8* 16245// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 16246// CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 16247// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0 16248// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0 16249// CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 16250// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 16251// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0 16252// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1 16253// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 16254// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 16255// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0 16256// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2 16257// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 16258// CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> 16259// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 16260// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 16261// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> 16262// CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64* 16263// CHECK: call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i64* [[TMP12]]) 16264// CHECK: ret void 16265void test_vst1q_u64_x3(uint64_t *a, uint64x2x3_t b) { 16266 vst1q_u64_x3(a, b); 16267} 16268 16269// CHECK-LABEL: define void @test_vst1q_s8_x3(i8* %a, [3 x <16 x i8>] %b.coerce) #0 { 16270// CHECK: [[B:%.*]] = alloca %struct.int8x16x3_t, align 16 16271// CHECK: [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16 16272// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0 16273// CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 16274// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__S1]] to i8* 16275// CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x3_t* [[B]] to i8* 16276// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 16277// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0 16278// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 16279// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 16280// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0 16281// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1 16282// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 16283// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0 16284// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2 16285// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 16286// CHECK: call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a) 16287// CHECK: ret void 16288void test_vst1q_s8_x3(int8_t *a, int8x16x3_t b) { 16289 vst1q_s8_x3(a, b); 16290} 16291 16292// CHECK-LABEL: define void @test_vst1q_s16_x3(i16* %a, [3 x <8 x i16>] %b.coerce) #0 { 16293// CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16 16294// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16 16295// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0 16296// CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16 16297// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8* 16298// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8* 16299// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 16300// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 16301// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 16302// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0 16303// CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 16304// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 16305// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 16306// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1 16307// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 16308// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 16309// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 16310// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2 16311// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 16312// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 16313// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 16314// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 16315// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 16316// CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16* 16317// CHECK: call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i16* [[TMP12]]) 16318// CHECK: ret void 16319void test_vst1q_s16_x3(int16_t *a, int16x8x3_t b) { 16320 vst1q_s16_x3(a, b); 16321} 16322 16323// CHECK-LABEL: define void @test_vst1q_s32_x3(i32* %a, [3 x <4 x i32>] %b.coerce) #0 { 16324// CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16 16325// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16 16326// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0 16327// CHECK: store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16 16328// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8* 16329// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8* 16330// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 16331// CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 16332// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 16333// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0 16334// CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 16335// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> 16336// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 16337// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i64 0, i64 1 16338// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 16339// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 16340// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 16341// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i64 0, i64 2 16342// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 16343// CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> 16344// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> 16345// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 16346// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> 16347// CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i32* 16348// CHECK: call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i32* [[TMP12]]) 16349// CHECK: ret void 16350void test_vst1q_s32_x3(int32_t *a, int32x4x3_t b) { 16351 vst1q_s32_x3(a, b); 16352} 16353 16354// CHECK-LABEL: define void @test_vst1q_s64_x3(i64* %a, [3 x <2 x i64>] %b.coerce) #0 { 16355// CHECK: [[B:%.*]] = alloca %struct.int64x2x3_t, align 16 16356// CHECK: [[__S1:%.*]] = alloca %struct.int64x2x3_t, align 16 16357// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[B]], i32 0, i32 0 16358// CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16 16359// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__S1]] to i8* 16360// CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x3_t* [[B]] to i8* 16361// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 16362// CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 16363// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0 16364// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0 16365// CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 16366// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 16367// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0 16368// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1 16369// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 16370// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 16371// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0 16372// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2 16373// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 16374// CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> 16375// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 16376// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 16377// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> 16378// CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64* 16379// CHECK: call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i64* [[TMP12]]) 16380// CHECK: ret void 16381void test_vst1q_s64_x3(int64_t *a, int64x2x3_t b) { 16382 vst1q_s64_x3(a, b); 16383} 16384 16385// CHECK-LABEL: define void @test_vst1q_f16_x3(half* %a, [3 x <8 x half>] %b.coerce) #0 { 16386// CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16 16387// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16 16388// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0 16389// CHECK: store [3 x <8 x half>] [[B]].coerce, [3 x <8 x half>]* [[COERCE_DIVE]], align 16 16390// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8* 16391// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8* 16392// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 16393// CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* 16394// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 16395// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i64 0, i64 0 16396// CHECK: [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 16397// CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8> 16398// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 16399// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i64 0, i64 1 16400// CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 16401// CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> 16402// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 16403// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i64 0, i64 2 16404// CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16 16405// CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8> 16406// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 16407// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 16408// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 16409// CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16* 16410// CHECK: call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i16* [[TMP12]]) 16411// CHECK: ret void 16412void test_vst1q_f16_x3(float16_t *a, float16x8x3_t b) { 16413 vst1q_f16_x3(a, b); 16414} 16415 16416// CHECK-LABEL: define void @test_vst1q_f32_x3(float* %a, [3 x <4 x float>] %b.coerce) #0 { 16417// CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16 16418// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16 16419// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0 16420// CHECK: store [3 x <4 x float>] [[B]].coerce, [3 x <4 x float>]* [[COERCE_DIVE]], align 16 16421// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8* 16422// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8* 16423// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 16424// CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* 16425// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 16426// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i64 0, i64 0 16427// CHECK: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 16428// CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8> 16429// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 16430// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i64 0, i64 1 16431// CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 16432// CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8> 16433// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 16434// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i64 0, i64 2 16435// CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16 16436// CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8> 16437// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float> 16438// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float> 16439// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float> 16440// CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to float* 16441// CHECK: call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], float* [[TMP12]]) 16442// CHECK: ret void 16443void test_vst1q_f32_x3(float32_t *a, float32x4x3_t b) { 16444 vst1q_f32_x3(a, b); 16445} 16446 16447// CHECK-LABEL: define void @test_vst1q_f64_x3(double* %a, [3 x <2 x double>] %b.coerce) #0 { 16448// CHECK: [[B:%.*]] = alloca %struct.float64x2x3_t, align 16 16449// CHECK: [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16 16450// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[B]], i32 0, i32 0 16451// CHECK: store [3 x <2 x double>] [[B]].coerce, [3 x <2 x double>]* [[COERCE_DIVE]], align 16 16452// CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__S1]] to i8* 16453// CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x3_t* [[B]] to i8* 16454// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 16455// CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* 16456// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0 16457// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL]], i64 0, i64 0 16458// CHECK: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16 16459// CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8> 16460// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0 16461// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL1]], i64 0, i64 1 16462// CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16 16463// CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8> 16464// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0 16465// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL3]], i64 0, i64 2 16466// CHECK: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16 16467// CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8> 16468// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double> 16469// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double> 16470// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double> 16471// CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to double* 16472// CHECK: call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> [[TMP9]], <2 x double> [[TMP10]], <2 x double> [[TMP11]], double* [[TMP12]]) 16473// CHECK: ret void 16474void test_vst1q_f64_x3(float64_t *a, float64x2x3_t b) { 16475 vst1q_f64_x3(a, b); 16476} 16477 16478// CHECK-LABEL: define void @test_vst1q_p8_x3(i8* %a, [3 x <16 x i8>] %b.coerce) #0 { 16479// CHECK: [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16 16480// CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16 16481// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0 16482// CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 16483// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__S1]] to i8* 16484// CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x3_t* [[B]] to i8* 16485// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 16486// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0 16487// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 16488// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 16489// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0 16490// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1 16491// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 16492// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0 16493// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2 16494// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 16495// CHECK: call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a) 16496// CHECK: ret void 16497void test_vst1q_p8_x3(poly8_t *a, poly8x16x3_t b) { 16498 vst1q_p8_x3(a, b); 16499} 16500 16501// CHECK-LABEL: define void @test_vst1q_p16_x3(i16* %a, [3 x <8 x i16>] %b.coerce) #0 { 16502// CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16 16503// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16 16504// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0 16505// CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16 16506// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8* 16507// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8* 16508// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 16509// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 16510// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 16511// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0 16512// CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 16513// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 16514// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 16515// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1 16516// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 16517// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 16518// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 16519// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2 16520// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 16521// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 16522// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 16523// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 16524// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 16525// CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16* 16526// CHECK: call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i16* [[TMP12]]) 16527// CHECK: ret void 16528void test_vst1q_p16_x3(poly16_t *a, poly16x8x3_t b) { 16529 vst1q_p16_x3(a, b); 16530} 16531 16532// CHECK-LABEL: define void @test_vst1q_p64_x3(i64* %a, [3 x <2 x i64>] %b.coerce) #0 { 16533// CHECK: [[B:%.*]] = alloca %struct.poly64x2x3_t, align 16 16534// CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x3_t, align 16 16535// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[B]], i32 0, i32 0 16536// CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16 16537// CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__S1]] to i8* 16538// CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x3_t* [[B]] to i8* 16539// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 16540// CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 16541// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0 16542// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0 16543// CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 16544// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 16545// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0 16546// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1 16547// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 16548// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 16549// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0 16550// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2 16551// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 16552// CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> 16553// CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 16554// CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 16555// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> 16556// CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64* 16557// CHECK: call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i64* [[TMP12]]) 16558// CHECK: ret void 16559void test_vst1q_p64_x3(poly64_t *a, poly64x2x3_t b) { 16560 vst1q_p64_x3(a, b); 16561} 16562 16563// CHECK-LABEL: define void @test_vst1_u8_x3(i8* %a, [3 x <8 x i8>] %b.coerce) #0 { 16564// CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8 16565// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8 16566// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0 16567// CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 16568// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8* 16569// CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8* 16570// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 16571// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 16572// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 16573// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 16574// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 16575// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1 16576// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 16577// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 16578// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2 16579// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 16580// CHECK: call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a) 16581// CHECK: ret void 16582void test_vst1_u8_x3(uint8_t *a, uint8x8x3_t b) { 16583 vst1_u8_x3(a, b); 16584} 16585 16586// CHECK-LABEL: define void @test_vst1_u16_x3(i16* %a, [3 x <4 x i16>] %b.coerce) #0 { 16587// CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8 16588// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8 16589// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0 16590// CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8 16591// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8* 16592// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8* 16593// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 16594// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 16595// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 16596// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0 16597// CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 16598// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 16599// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 16600// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1 16601// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 16602// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 16603// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 16604// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2 16605// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 16606// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 16607// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 16608// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 16609// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 16610// CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16* 16611// CHECK: call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i16* [[TMP12]]) 16612// CHECK: ret void 16613void test_vst1_u16_x3(uint16_t *a, uint16x4x3_t b) { 16614 vst1_u16_x3(a, b); 16615} 16616 16617// CHECK-LABEL: define void @test_vst1_u32_x3(i32* %a, [3 x <2 x i32>] %b.coerce) #0 { 16618// CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8 16619// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8 16620// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0 16621// CHECK: store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8 16622// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8* 16623// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8* 16624// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 16625// CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 16626// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 16627// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0 16628// CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 16629// CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> 16630// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 16631// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i64 0, i64 1 16632// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 16633// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 16634// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 16635// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i64 0, i64 2 16636// CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 16637// CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> 16638// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> 16639// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 16640// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> 16641// CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i32* 16642// CHECK: call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i32* [[TMP12]]) 16643// CHECK: ret void 16644void test_vst1_u32_x3(uint32_t *a, uint32x2x3_t b) { 16645 vst1_u32_x3(a, b); 16646} 16647 16648// CHECK-LABEL: define void @test_vst1_u64_x3(i64* %a, [3 x <1 x i64>] %b.coerce) #0 { 16649// CHECK: [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8 16650// CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8 16651// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[B]], i32 0, i32 0 16652// CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8 16653// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__S1]] to i8* 16654// CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x3_t* [[B]] to i8* 16655// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 16656// CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 16657// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0 16658// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0 16659// CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 16660// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 16661// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0 16662// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1 16663// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 16664// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 16665// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0 16666// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2 16667// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 16668// CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> 16669// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 16670// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 16671// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> 16672// CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64* 16673// CHECK: call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i64* [[TMP12]]) 16674// CHECK: ret void 16675void test_vst1_u64_x3(uint64_t *a, uint64x1x3_t b) { 16676 vst1_u64_x3(a, b); 16677} 16678 16679// CHECK-LABEL: define void @test_vst1_s8_x3(i8* %a, [3 x <8 x i8>] %b.coerce) #0 { 16680// CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8 16681// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8 16682// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0 16683// CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 16684// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8* 16685// CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8* 16686// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 16687// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 16688// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 16689// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 16690// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 16691// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1 16692// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 16693// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 16694// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2 16695// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 16696// CHECK: call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a) 16697// CHECK: ret void 16698void test_vst1_s8_x3(int8_t *a, int8x8x3_t b) { 16699 vst1_s8_x3(a, b); 16700} 16701 16702// CHECK-LABEL: define void @test_vst1_s16_x3(i16* %a, [3 x <4 x i16>] %b.coerce) #0 { 16703// CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8 16704// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8 16705// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0 16706// CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8 16707// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8* 16708// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8* 16709// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 16710// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 16711// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 16712// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0 16713// CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 16714// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 16715// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 16716// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1 16717// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 16718// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 16719// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 16720// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2 16721// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 16722// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 16723// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 16724// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 16725// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 16726// CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16* 16727// CHECK: call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i16* [[TMP12]]) 16728// CHECK: ret void 16729void test_vst1_s16_x3(int16_t *a, int16x4x3_t b) { 16730 vst1_s16_x3(a, b); 16731} 16732 16733// CHECK-LABEL: define void @test_vst1_s32_x3(i32* %a, [3 x <2 x i32>] %b.coerce) #0 { 16734// CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8 16735// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8 16736// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0 16737// CHECK: store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8 16738// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8* 16739// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8* 16740// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 16741// CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 16742// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 16743// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0 16744// CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 16745// CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> 16746// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 16747// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i64 0, i64 1 16748// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 16749// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 16750// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 16751// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i64 0, i64 2 16752// CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 16753// CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> 16754// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> 16755// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 16756// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> 16757// CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i32* 16758// CHECK: call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i32* [[TMP12]]) 16759// CHECK: ret void 16760void test_vst1_s32_x3(int32_t *a, int32x2x3_t b) { 16761 vst1_s32_x3(a, b); 16762} 16763 16764// CHECK-LABEL: define void @test_vst1_s64_x3(i64* %a, [3 x <1 x i64>] %b.coerce) #0 { 16765// CHECK: [[B:%.*]] = alloca %struct.int64x1x3_t, align 8 16766// CHECK: [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8 16767// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[B]], i32 0, i32 0 16768// CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8 16769// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__S1]] to i8* 16770// CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x3_t* [[B]] to i8* 16771// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 16772// CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 16773// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0 16774// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0 16775// CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 16776// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 16777// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0 16778// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1 16779// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 16780// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 16781// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0 16782// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2 16783// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 16784// CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> 16785// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 16786// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 16787// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> 16788// CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64* 16789// CHECK: call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i64* [[TMP12]]) 16790// CHECK: ret void 16791void test_vst1_s64_x3(int64_t *a, int64x1x3_t b) { 16792 vst1_s64_x3(a, b); 16793} 16794 16795// CHECK-LABEL: define void @test_vst1_f16_x3(half* %a, [3 x <4 x half>] %b.coerce) #0 { 16796// CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8 16797// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8 16798// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0 16799// CHECK: store [3 x <4 x half>] [[B]].coerce, [3 x <4 x half>]* [[COERCE_DIVE]], align 8 16800// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8* 16801// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8* 16802// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 16803// CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* 16804// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 16805// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i64 0, i64 0 16806// CHECK: [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 16807// CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8> 16808// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 16809// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL1]], i64 0, i64 1 16810// CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 16811// CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> 16812// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 16813// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i64 0, i64 2 16814// CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8 16815// CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8> 16816// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 16817// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 16818// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 16819// CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16* 16820// CHECK: call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i16* [[TMP12]]) 16821// CHECK: ret void 16822void test_vst1_f16_x3(float16_t *a, float16x4x3_t b) { 16823 vst1_f16_x3(a, b); 16824} 16825 16826// CHECK-LABEL: define void @test_vst1_f32_x3(float* %a, [3 x <2 x float>] %b.coerce) #0 { 16827// CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8 16828// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8 16829// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0 16830// CHECK: store [3 x <2 x float>] [[B]].coerce, [3 x <2 x float>]* [[COERCE_DIVE]], align 8 16831// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8* 16832// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8* 16833// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 16834// CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* 16835// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 16836// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i64 0, i64 0 16837// CHECK: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 16838// CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8> 16839// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 16840// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL1]], i64 0, i64 1 16841// CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 16842// CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8> 16843// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 16844// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL3]], i64 0, i64 2 16845// CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8 16846// CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8> 16847// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float> 16848// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float> 16849// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float> 16850// CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to float* 16851// CHECK: call void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], float* [[TMP12]]) 16852// CHECK: ret void 16853void test_vst1_f32_x3(float32_t *a, float32x2x3_t b) { 16854 vst1_f32_x3(a, b); 16855} 16856 16857// CHECK-LABEL: define void @test_vst1_f64_x3(double* %a, [3 x <1 x double>] %b.coerce) #0 { 16858// CHECK: [[B:%.*]] = alloca %struct.float64x1x3_t, align 8 16859// CHECK: [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8 16860// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[B]], i32 0, i32 0 16861// CHECK: store [3 x <1 x double>] [[B]].coerce, [3 x <1 x double>]* [[COERCE_DIVE]], align 8 16862// CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__S1]] to i8* 16863// CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x3_t* [[B]] to i8* 16864// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 16865// CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* 16866// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0 16867// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL]], i64 0, i64 0 16868// CHECK: [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8 16869// CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8> 16870// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0 16871// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL1]], i64 0, i64 1 16872// CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8 16873// CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8> 16874// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0 16875// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL3]], i64 0, i64 2 16876// CHECK: [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8 16877// CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8> 16878// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> 16879// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double> 16880// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double> 16881// CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to double* 16882// CHECK: call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> [[TMP9]], <1 x double> [[TMP10]], <1 x double> [[TMP11]], double* [[TMP12]]) 16883// CHECK: ret void 16884void test_vst1_f64_x3(float64_t *a, float64x1x3_t b) { 16885 vst1_f64_x3(a, b); 16886} 16887 16888// CHECK-LABEL: define void @test_vst1_p8_x3(i8* %a, [3 x <8 x i8>] %b.coerce) #0 { 16889// CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8 16890// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8 16891// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0 16892// CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 16893// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8* 16894// CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8* 16895// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 16896// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 16897// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 16898// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 16899// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 16900// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1 16901// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 16902// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 16903// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2 16904// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 16905// CHECK: call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a) 16906// CHECK: ret void 16907void test_vst1_p8_x3(poly8_t *a, poly8x8x3_t b) { 16908 vst1_p8_x3(a, b); 16909} 16910 16911// CHECK-LABEL: define void @test_vst1_p16_x3(i16* %a, [3 x <4 x i16>] %b.coerce) #0 { 16912// CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8 16913// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8 16914// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0 16915// CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8 16916// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8* 16917// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8* 16918// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 16919// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 16920// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 16921// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0 16922// CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 16923// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 16924// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 16925// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1 16926// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 16927// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 16928// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 16929// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2 16930// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 16931// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 16932// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 16933// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 16934// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 16935// CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16* 16936// CHECK: call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i16* [[TMP12]]) 16937// CHECK: ret void 16938void test_vst1_p16_x3(poly16_t *a, poly16x4x3_t b) { 16939 vst1_p16_x3(a, b); 16940} 16941 16942// CHECK-LABEL: define void @test_vst1_p64_x3(i64* %a, [3 x <1 x i64>] %b.coerce) #0 { 16943// CHECK: [[B:%.*]] = alloca %struct.poly64x1x3_t, align 8 16944// CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x3_t, align 8 16945// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[B]], i32 0, i32 0 16946// CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8 16947// CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__S1]] to i8* 16948// CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x3_t* [[B]] to i8* 16949// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 16950// CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 16951// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0 16952// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0 16953// CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 16954// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 16955// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0 16956// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1 16957// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 16958// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 16959// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0 16960// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2 16961// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 16962// CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> 16963// CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 16964// CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 16965// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> 16966// CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64* 16967// CHECK: call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i64* [[TMP12]]) 16968// CHECK: ret void 16969void test_vst1_p64_x3(poly64_t *a, poly64x1x3_t b) { 16970 vst1_p64_x3(a, b); 16971} 16972 16973// CHECK-LABEL: define void @test_vst1q_u8_x4(i8* %a, [4 x <16 x i8>] %b.coerce) #0 { 16974// CHECK: [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16 16975// CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16 16976// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0 16977// CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 16978// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__S1]] to i8* 16979// CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x4_t* [[B]] to i8* 16980// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 16981// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 16982// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 16983// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 16984// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 16985// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1 16986// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 16987// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 16988// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2 16989// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 16990// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 16991// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3 16992// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16 16993// CHECK: call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a) 16994// CHECK: ret void 16995void test_vst1q_u8_x4(uint8_t *a, uint8x16x4_t b) { 16996 vst1q_u8_x4(a, b); 16997} 16998 16999// CHECK-LABEL: define void @test_vst1q_u16_x4(i16* %a, [4 x <8 x i16>] %b.coerce) #0 { 17000// CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16 17001// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16 17002// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0 17003// CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16 17004// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8* 17005// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8* 17006// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 17007// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 17008// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 17009// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0 17010// CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 17011// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 17012// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 17013// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1 17014// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 17015// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 17016// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 17017// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2 17018// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 17019// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 17020// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 17021// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3 17022// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 17023// CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> 17024// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 17025// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 17026// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 17027// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> 17028// CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16* 17029// CHECK: call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i16* [[TMP15]]) 17030// CHECK: ret void 17031void test_vst1q_u16_x4(uint16_t *a, uint16x8x4_t b) { 17032 vst1q_u16_x4(a, b); 17033} 17034 17035// CHECK-LABEL: define void @test_vst1q_u32_x4(i32* %a, [4 x <4 x i32>] %b.coerce) #0 { 17036// CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16 17037// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16 17038// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0 17039// CHECK: store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16 17040// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8* 17041// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8* 17042// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 17043// CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 17044// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 17045// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0 17046// CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 17047// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> 17048// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 17049// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i64 0, i64 1 17050// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 17051// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 17052// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 17053// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i64 0, i64 2 17054// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 17055// CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> 17056// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 17057// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i64 0, i64 3 17058// CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16 17059// CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8> 17060// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> 17061// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 17062// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> 17063// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32> 17064// CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i32* 17065// CHECK: call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], i32* [[TMP15]]) 17066// CHECK: ret void 17067void test_vst1q_u32_x4(uint32_t *a, uint32x4x4_t b) { 17068 vst1q_u32_x4(a, b); 17069} 17070 17071// CHECK-LABEL: define void @test_vst1q_u64_x4(i64* %a, [4 x <2 x i64>] %b.coerce) #0 { 17072// CHECK: [[B:%.*]] = alloca %struct.uint64x2x4_t, align 16 17073// CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x4_t, align 16 17074// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[B]], i32 0, i32 0 17075// CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16 17076// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__S1]] to i8* 17077// CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x4_t* [[B]] to i8* 17078// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 17079// CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 17080// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 17081// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0 17082// CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 17083// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 17084// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 17085// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1 17086// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 17087// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 17088// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 17089// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2 17090// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 17091// CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> 17092// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 17093// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3 17094// CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16 17095// CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8> 17096// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 17097// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 17098// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> 17099// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64> 17100// CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64* 17101// CHECK: call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i64* [[TMP15]]) 17102// CHECK: ret void 17103void test_vst1q_u64_x4(uint64_t *a, uint64x2x4_t b) { 17104 vst1q_u64_x4(a, b); 17105} 17106 17107// CHECK-LABEL: define void @test_vst1q_s8_x4(i8* %a, [4 x <16 x i8>] %b.coerce) #0 { 17108// CHECK: [[B:%.*]] = alloca %struct.int8x16x4_t, align 16 17109// CHECK: [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16 17110// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0 17111// CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 17112// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__S1]] to i8* 17113// CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x4_t* [[B]] to i8* 17114// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 17115// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 17116// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 17117// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 17118// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 17119// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1 17120// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 17121// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 17122// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2 17123// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 17124// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 17125// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3 17126// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16 17127// CHECK: call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a) 17128// CHECK: ret void 17129void test_vst1q_s8_x4(int8_t *a, int8x16x4_t b) { 17130 vst1q_s8_x4(a, b); 17131} 17132 17133// CHECK-LABEL: define void @test_vst1q_s16_x4(i16* %a, [4 x <8 x i16>] %b.coerce) #0 { 17134// CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16 17135// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16 17136// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0 17137// CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16 17138// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8* 17139// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8* 17140// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 17141// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 17142// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 17143// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0 17144// CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 17145// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 17146// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 17147// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1 17148// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 17149// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 17150// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 17151// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2 17152// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 17153// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 17154// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 17155// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3 17156// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 17157// CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> 17158// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 17159// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 17160// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 17161// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> 17162// CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16* 17163// CHECK: call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i16* [[TMP15]]) 17164// CHECK: ret void 17165void test_vst1q_s16_x4(int16_t *a, int16x8x4_t b) { 17166 vst1q_s16_x4(a, b); 17167} 17168 17169// CHECK-LABEL: define void @test_vst1q_s32_x4(i32* %a, [4 x <4 x i32>] %b.coerce) #0 { 17170// CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16 17171// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16 17172// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0 17173// CHECK: store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16 17174// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8* 17175// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8* 17176// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 17177// CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 17178// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 17179// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0 17180// CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 17181// CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> 17182// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 17183// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i64 0, i64 1 17184// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 17185// CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 17186// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 17187// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i64 0, i64 2 17188// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 17189// CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> 17190// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 17191// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i64 0, i64 3 17192// CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16 17193// CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8> 17194// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> 17195// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 17196// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> 17197// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32> 17198// CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i32* 17199// CHECK: call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], i32* [[TMP15]]) 17200// CHECK: ret void 17201void test_vst1q_s32_x4(int32_t *a, int32x4x4_t b) { 17202 vst1q_s32_x4(a, b); 17203} 17204 17205// CHECK-LABEL: define void @test_vst1q_s64_x4(i64* %a, [4 x <2 x i64>] %b.coerce) #0 { 17206// CHECK: [[B:%.*]] = alloca %struct.int64x2x4_t, align 16 17207// CHECK: [[__S1:%.*]] = alloca %struct.int64x2x4_t, align 16 17208// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[B]], i32 0, i32 0 17209// CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16 17210// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__S1]] to i8* 17211// CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x4_t* [[B]] to i8* 17212// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 17213// CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 17214// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 17215// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0 17216// CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 17217// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 17218// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 17219// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1 17220// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 17221// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 17222// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 17223// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2 17224// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 17225// CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> 17226// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 17227// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3 17228// CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16 17229// CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8> 17230// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 17231// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 17232// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> 17233// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64> 17234// CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64* 17235// CHECK: call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i64* [[TMP15]]) 17236// CHECK: ret void 17237void test_vst1q_s64_x4(int64_t *a, int64x2x4_t b) { 17238 vst1q_s64_x4(a, b); 17239} 17240 17241// CHECK-LABEL: define void @test_vst1q_f16_x4(half* %a, [4 x <8 x half>] %b.coerce) #0 { 17242// CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16 17243// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16 17244// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0 17245// CHECK: store [4 x <8 x half>] [[B]].coerce, [4 x <8 x half>]* [[COERCE_DIVE]], align 16 17246// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8* 17247// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8* 17248// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 17249// CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* 17250// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 17251// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i64 0, i64 0 17252// CHECK: [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 17253// CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8> 17254// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 17255// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i64 0, i64 1 17256// CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 17257// CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> 17258// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 17259// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i64 0, i64 2 17260// CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16 17261// CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8> 17262// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 17263// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i64 0, i64 3 17264// CHECK: [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16 17265// CHECK: [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8> 17266// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 17267// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 17268// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 17269// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> 17270// CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16* 17271// CHECK: call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i16* [[TMP15]]) 17272// CHECK: ret void 17273void test_vst1q_f16_x4(float16_t *a, float16x8x4_t b) { 17274 vst1q_f16_x4(a, b); 17275} 17276 17277// CHECK-LABEL: define void @test_vst1q_f32_x4(float* %a, [4 x <4 x float>] %b.coerce) #0 { 17278// CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16 17279// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16 17280// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0 17281// CHECK: store [4 x <4 x float>] [[B]].coerce, [4 x <4 x float>]* [[COERCE_DIVE]], align 16 17282// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8* 17283// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8* 17284// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 17285// CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* 17286// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 17287// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i64 0, i64 0 17288// CHECK: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 17289// CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8> 17290// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 17291// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i64 0, i64 1 17292// CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 17293// CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8> 17294// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 17295// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i64 0, i64 2 17296// CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16 17297// CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8> 17298// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 17299// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i64 0, i64 3 17300// CHECK: [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16 17301// CHECK: [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8> 17302// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float> 17303// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float> 17304// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float> 17305// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float> 17306// CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to float* 17307// CHECK: call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], float* [[TMP15]]) 17308// CHECK: ret void 17309void test_vst1q_f32_x4(float32_t *a, float32x4x4_t b) { 17310 vst1q_f32_x4(a, b); 17311} 17312 17313// CHECK-LABEL: define void @test_vst1q_f64_x4(double* %a, [4 x <2 x double>] %b.coerce) #0 { 17314// CHECK: [[B:%.*]] = alloca %struct.float64x2x4_t, align 16 17315// CHECK: [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16 17316// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[B]], i32 0, i32 0 17317// CHECK: store [4 x <2 x double>] [[B]].coerce, [4 x <2 x double>]* [[COERCE_DIVE]], align 16 17318// CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__S1]] to i8* 17319// CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x4_t* [[B]] to i8* 17320// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 17321// CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* 17322// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0 17323// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL]], i64 0, i64 0 17324// CHECK: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16 17325// CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8> 17326// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0 17327// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL1]], i64 0, i64 1 17328// CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16 17329// CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8> 17330// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0 17331// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL3]], i64 0, i64 2 17332// CHECK: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16 17333// CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8> 17334// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0 17335// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL5]], i64 0, i64 3 17336// CHECK: [[TMP9:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX6]], align 16 17337// CHECK: [[TMP10:%.*]] = bitcast <2 x double> [[TMP9]] to <16 x i8> 17338// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double> 17339// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double> 17340// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double> 17341// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x double> 17342// CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to double* 17343// CHECK: call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x double> [[TMP13]], <2 x double> [[TMP14]], double* [[TMP15]]) 17344// CHECK: ret void 17345void test_vst1q_f64_x4(float64_t *a, float64x2x4_t b) { 17346 vst1q_f64_x4(a, b); 17347} 17348 17349// CHECK-LABEL: define void @test_vst1q_p8_x4(i8* %a, [4 x <16 x i8>] %b.coerce) #0 { 17350// CHECK: [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16 17351// CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16 17352// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0 17353// CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 17354// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__S1]] to i8* 17355// CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x4_t* [[B]] to i8* 17356// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 17357// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 17358// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 17359// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 17360// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 17361// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1 17362// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 17363// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 17364// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2 17365// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 17366// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 17367// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3 17368// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16 17369// CHECK: call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a) 17370// CHECK: ret void 17371void test_vst1q_p8_x4(poly8_t *a, poly8x16x4_t b) { 17372 vst1q_p8_x4(a, b); 17373} 17374 17375// CHECK-LABEL: define void @test_vst1q_p16_x4(i16* %a, [4 x <8 x i16>] %b.coerce) #0 { 17376// CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16 17377// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16 17378// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0 17379// CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16 17380// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8* 17381// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8* 17382// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 17383// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 17384// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 17385// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0 17386// CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 17387// CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 17388// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 17389// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1 17390// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 17391// CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 17392// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 17393// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2 17394// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 17395// CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 17396// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 17397// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3 17398// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 17399// CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> 17400// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 17401// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 17402// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 17403// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> 17404// CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16* 17405// CHECK: call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i16* [[TMP15]]) 17406// CHECK: ret void 17407void test_vst1q_p16_x4(poly16_t *a, poly16x8x4_t b) { 17408 vst1q_p16_x4(a, b); 17409} 17410 17411// CHECK-LABEL: define void @test_vst1q_p64_x4(i64* %a, [4 x <2 x i64>] %b.coerce) #0 { 17412// CHECK: [[B:%.*]] = alloca %struct.poly64x2x4_t, align 16 17413// CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x4_t, align 16 17414// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[B]], i32 0, i32 0 17415// CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16 17416// CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__S1]] to i8* 17417// CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x4_t* [[B]] to i8* 17418// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 17419// CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 17420// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0 17421// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0 17422// CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 17423// CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 17424// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0 17425// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1 17426// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 17427// CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 17428// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0 17429// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2 17430// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 17431// CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> 17432// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0 17433// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3 17434// CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16 17435// CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8> 17436// CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 17437// CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 17438// CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> 17439// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64> 17440// CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64* 17441// CHECK: call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i64* [[TMP15]]) 17442// CHECK: ret void 17443void test_vst1q_p64_x4(poly64_t *a, poly64x2x4_t b) { 17444 vst1q_p64_x4(a, b); 17445} 17446 17447// CHECK-LABEL: define void @test_vst1_u8_x4(i8* %a, [4 x <8 x i8>] %b.coerce) #0 { 17448// CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8 17449// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8 17450// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0 17451// CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 17452// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8* 17453// CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8* 17454// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 17455// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 17456// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 17457// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 17458// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 17459// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1 17460// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 17461// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 17462// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2 17463// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 17464// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 17465// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3 17466// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 17467// CHECK: call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a) 17468// CHECK: ret void 17469void test_vst1_u8_x4(uint8_t *a, uint8x8x4_t b) { 17470 vst1_u8_x4(a, b); 17471} 17472 17473// CHECK-LABEL: define void @test_vst1_u16_x4(i16* %a, [4 x <4 x i16>] %b.coerce) #0 { 17474// CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8 17475// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8 17476// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0 17477// CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8 17478// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8* 17479// CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8* 17480// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 17481// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 17482// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 17483// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0 17484// CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 17485// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 17486// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 17487// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1 17488// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 17489// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 17490// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 17491// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2 17492// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 17493// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 17494// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 17495// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3 17496// CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 17497// CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8> 17498// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 17499// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 17500// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 17501// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> 17502// CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16* 17503// CHECK: call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i16* [[TMP15]]) 17504// CHECK: ret void 17505void test_vst1_u16_x4(uint16_t *a, uint16x4x4_t b) { 17506 vst1_u16_x4(a, b); 17507} 17508 17509// CHECK-LABEL: define void @test_vst1_u32_x4(i32* %a, [4 x <2 x i32>] %b.coerce) #0 { 17510// CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8 17511// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8 17512// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0 17513// CHECK: store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8 17514// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8* 17515// CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8* 17516// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 17517// CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 17518// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 17519// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0 17520// CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 17521// CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> 17522// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 17523// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i64 0, i64 1 17524// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 17525// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 17526// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 17527// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i64 0, i64 2 17528// CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 17529// CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> 17530// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 17531// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i64 0, i64 3 17532// CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8 17533// CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8> 17534// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> 17535// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 17536// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> 17537// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32> 17538// CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i32* 17539// CHECK: call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], i32* [[TMP15]]) 17540// CHECK: ret void 17541void test_vst1_u32_x4(uint32_t *a, uint32x2x4_t b) { 17542 vst1_u32_x4(a, b); 17543} 17544 17545// CHECK-LABEL: define void @test_vst1_u64_x4(i64* %a, [4 x <1 x i64>] %b.coerce) #0 { 17546// CHECK: [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8 17547// CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8 17548// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[B]], i32 0, i32 0 17549// CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8 17550// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__S1]] to i8* 17551// CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x4_t* [[B]] to i8* 17552// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 17553// CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 17554// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 17555// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0 17556// CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 17557// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 17558// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 17559// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1 17560// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 17561// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 17562// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 17563// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2 17564// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 17565// CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> 17566// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 17567// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3 17568// CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8 17569// CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8> 17570// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 17571// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 17572// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> 17573// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64> 17574// CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64* 17575// CHECK: call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i64* [[TMP15]]) 17576// CHECK: ret void 17577void test_vst1_u64_x4(uint64_t *a, uint64x1x4_t b) { 17578 vst1_u64_x4(a, b); 17579} 17580 17581// CHECK-LABEL: define void @test_vst1_s8_x4(i8* %a, [4 x <8 x i8>] %b.coerce) #0 { 17582// CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8 17583// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8 17584// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0 17585// CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 17586// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8* 17587// CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8* 17588// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 17589// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 17590// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 17591// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 17592// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 17593// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1 17594// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 17595// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 17596// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2 17597// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 17598// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 17599// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3 17600// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 17601// CHECK: call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a) 17602// CHECK: ret void 17603void test_vst1_s8_x4(int8_t *a, int8x8x4_t b) { 17604 vst1_s8_x4(a, b); 17605} 17606 17607// CHECK-LABEL: define void @test_vst1_s16_x4(i16* %a, [4 x <4 x i16>] %b.coerce) #0 { 17608// CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8 17609// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8 17610// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0 17611// CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8 17612// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8* 17613// CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8* 17614// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 17615// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 17616// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 17617// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0 17618// CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 17619// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 17620// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 17621// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1 17622// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 17623// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 17624// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 17625// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2 17626// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 17627// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 17628// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 17629// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3 17630// CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 17631// CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8> 17632// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 17633// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 17634// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 17635// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> 17636// CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16* 17637// CHECK: call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i16* [[TMP15]]) 17638// CHECK: ret void 17639void test_vst1_s16_x4(int16_t *a, int16x4x4_t b) { 17640 vst1_s16_x4(a, b); 17641} 17642 17643// CHECK-LABEL: define void @test_vst1_s32_x4(i32* %a, [4 x <2 x i32>] %b.coerce) #0 { 17644// CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8 17645// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8 17646// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0 17647// CHECK: store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8 17648// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8* 17649// CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8* 17650// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 17651// CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 17652// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 17653// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0 17654// CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 17655// CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> 17656// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 17657// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i64 0, i64 1 17658// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 17659// CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 17660// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 17661// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i64 0, i64 2 17662// CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 17663// CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> 17664// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 17665// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i64 0, i64 3 17666// CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8 17667// CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8> 17668// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> 17669// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 17670// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> 17671// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32> 17672// CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i32* 17673// CHECK: call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], i32* [[TMP15]]) 17674// CHECK: ret void 17675void test_vst1_s32_x4(int32_t *a, int32x2x4_t b) { 17676 vst1_s32_x4(a, b); 17677} 17678 17679// CHECK-LABEL: define void @test_vst1_s64_x4(i64* %a, [4 x <1 x i64>] %b.coerce) #0 { 17680// CHECK: [[B:%.*]] = alloca %struct.int64x1x4_t, align 8 17681// CHECK: [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8 17682// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[B]], i32 0, i32 0 17683// CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8 17684// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__S1]] to i8* 17685// CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x4_t* [[B]] to i8* 17686// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 17687// CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 17688// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 17689// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0 17690// CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 17691// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 17692// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 17693// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1 17694// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 17695// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 17696// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 17697// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2 17698// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 17699// CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> 17700// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 17701// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3 17702// CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8 17703// CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8> 17704// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 17705// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 17706// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> 17707// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64> 17708// CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64* 17709// CHECK: call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i64* [[TMP15]]) 17710// CHECK: ret void 17711void test_vst1_s64_x4(int64_t *a, int64x1x4_t b) { 17712 vst1_s64_x4(a, b); 17713} 17714 17715// CHECK-LABEL: define void @test_vst1_f16_x4(half* %a, [4 x <4 x half>] %b.coerce) #0 { 17716// CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8 17717// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8 17718// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0 17719// CHECK: store [4 x <4 x half>] [[B]].coerce, [4 x <4 x half>]* [[COERCE_DIVE]], align 8 17720// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8* 17721// CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8* 17722// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 17723// CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* 17724// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 17725// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i64 0, i64 0 17726// CHECK: [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 17727// CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8> 17728// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 17729// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL1]], i64 0, i64 1 17730// CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 17731// CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> 17732// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 17733// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL3]], i64 0, i64 2 17734// CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8 17735// CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8> 17736// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 17737// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i64 0, i64 3 17738// CHECK: [[TMP9:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8 17739// CHECK: [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8> 17740// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 17741// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 17742// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 17743// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> 17744// CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16* 17745// CHECK: call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i16* [[TMP15]]) 17746// CHECK: ret void 17747void test_vst1_f16_x4(float16_t *a, float16x4x4_t b) { 17748 vst1_f16_x4(a, b); 17749} 17750 17751// CHECK-LABEL: define void @test_vst1_f32_x4(float* %a, [4 x <2 x float>] %b.coerce) #0 { 17752// CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8 17753// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8 17754// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0 17755// CHECK: store [4 x <2 x float>] [[B]].coerce, [4 x <2 x float>]* [[COERCE_DIVE]], align 8 17756// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8* 17757// CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8* 17758// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 17759// CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* 17760// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 17761// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i64 0, i64 0 17762// CHECK: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 17763// CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8> 17764// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 17765// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL1]], i64 0, i64 1 17766// CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 17767// CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8> 17768// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 17769// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL3]], i64 0, i64 2 17770// CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8 17771// CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8> 17772// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 17773// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL5]], i64 0, i64 3 17774// CHECK: [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX6]], align 8 17775// CHECK: [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8> 17776// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float> 17777// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float> 17778// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float> 17779// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float> 17780// CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to float* 17781// CHECK: call void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float> [[TMP11]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], float* [[TMP15]]) 17782// CHECK: ret void 17783void test_vst1_f32_x4(float32_t *a, float32x2x4_t b) { 17784 vst1_f32_x4(a, b); 17785} 17786 17787// CHECK-LABEL: define void @test_vst1_f64_x4(double* %a, [4 x <1 x double>] %b.coerce) #0 { 17788// CHECK: [[B:%.*]] = alloca %struct.float64x1x4_t, align 8 17789// CHECK: [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8 17790// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[B]], i32 0, i32 0 17791// CHECK: store [4 x <1 x double>] [[B]].coerce, [4 x <1 x double>]* [[COERCE_DIVE]], align 8 17792// CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__S1]] to i8* 17793// CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x4_t* [[B]] to i8* 17794// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 17795// CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* 17796// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0 17797// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL]], i64 0, i64 0 17798// CHECK: [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8 17799// CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8> 17800// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0 17801// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL1]], i64 0, i64 1 17802// CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8 17803// CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8> 17804// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0 17805// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL3]], i64 0, i64 2 17806// CHECK: [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8 17807// CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8> 17808// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0 17809// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL5]], i64 0, i64 3 17810// CHECK: [[TMP9:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX6]], align 8 17811// CHECK: [[TMP10:%.*]] = bitcast <1 x double> [[TMP9]] to <8 x i8> 17812// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> 17813// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double> 17814// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double> 17815// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x double> 17816// CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to double* 17817// CHECK: call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> [[TMP11]], <1 x double> [[TMP12]], <1 x double> [[TMP13]], <1 x double> [[TMP14]], double* [[TMP15]]) 17818// CHECK: ret void 17819void test_vst1_f64_x4(float64_t *a, float64x1x4_t b) { 17820 vst1_f64_x4(a, b); 17821} 17822 17823// CHECK-LABEL: define void @test_vst1_p8_x4(i8* %a, [4 x <8 x i8>] %b.coerce) #0 { 17824// CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8 17825// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8 17826// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0 17827// CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 17828// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8* 17829// CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8* 17830// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 17831// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 17832// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 17833// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 17834// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 17835// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1 17836// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 17837// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 17838// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2 17839// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 17840// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 17841// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3 17842// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 17843// CHECK: call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a) 17844// CHECK: ret void 17845void test_vst1_p8_x4(poly8_t *a, poly8x8x4_t b) { 17846 vst1_p8_x4(a, b); 17847} 17848 17849// CHECK-LABEL: define void @test_vst1_p16_x4(i16* %a, [4 x <4 x i16>] %b.coerce) #0 { 17850// CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8 17851// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8 17852// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0 17853// CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8 17854// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8* 17855// CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8* 17856// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 17857// CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 17858// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 17859// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0 17860// CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 17861// CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 17862// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 17863// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1 17864// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 17865// CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 17866// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 17867// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2 17868// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 17869// CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 17870// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 17871// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3 17872// CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 17873// CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8> 17874// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 17875// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 17876// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 17877// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> 17878// CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16* 17879// CHECK: call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i16* [[TMP15]]) 17880// CHECK: ret void 17881void test_vst1_p16_x4(poly16_t *a, poly16x4x4_t b) { 17882 vst1_p16_x4(a, b); 17883} 17884 17885// CHECK-LABEL: define void @test_vst1_p64_x4(i64* %a, [4 x <1 x i64>] %b.coerce) #0 { 17886// CHECK: [[B:%.*]] = alloca %struct.poly64x1x4_t, align 8 17887// CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x4_t, align 8 17888// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[B]], i32 0, i32 0 17889// CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8 17890// CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__S1]] to i8* 17891// CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x4_t* [[B]] to i8* 17892// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 17893// CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 17894// CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0 17895// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0 17896// CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 17897// CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 17898// CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0 17899// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1 17900// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 17901// CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 17902// CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0 17903// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2 17904// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 17905// CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> 17906// CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0 17907// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3 17908// CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8 17909// CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8> 17910// CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 17911// CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 17912// CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> 17913// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64> 17914// CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64* 17915// CHECK: call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i64* [[TMP15]]) 17916// CHECK: ret void 17917void test_vst1_p64_x4(poly64_t *a, poly64x1x4_t b) { 17918 vst1_p64_x4(a, b); 17919} 17920 17921// CHECK-LABEL: define i64 @test_vceqd_s64(i64 %a, i64 %b) #0 { 17922// CHECK: [[TMP0:%.*]] = icmp eq i64 %a, %b 17923// CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64 17924// CHECK: ret i64 [[VCEQD_I]] 17925int64_t test_vceqd_s64(int64_t a, int64_t b) { 17926 return (int64_t)vceqd_s64(a, b); 17927} 17928 17929// CHECK-LABEL: define i64 @test_vceqd_u64(i64 %a, i64 %b) #0 { 17930// CHECK: [[TMP0:%.*]] = icmp eq i64 %a, %b 17931// CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64 17932// CHECK: ret i64 [[VCEQD_I]] 17933uint64_t test_vceqd_u64(uint64_t a, uint64_t b) { 17934 return (int64_t)vceqd_u64(a, b); 17935} 17936 17937// CHECK-LABEL: define i64 @test_vceqzd_s64(i64 %a) #0 { 17938// CHECK: [[TMP0:%.*]] = icmp eq i64 %a, 0 17939// CHECK: [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i64 17940// CHECK: ret i64 [[VCEQZ_I]] 17941int64_t test_vceqzd_s64(int64_t a) { 17942 return (int64_t)vceqzd_s64(a); 17943} 17944 17945// CHECK-LABEL: define i64 @test_vceqzd_u64(i64 %a) #0 { 17946// CHECK: [[TMP0:%.*]] = icmp eq i64 %a, 0 17947// CHECK: [[VCEQZD_I:%.*]] = sext i1 [[TMP0]] to i64 17948// CHECK: ret i64 [[VCEQZD_I]] 17949int64_t test_vceqzd_u64(int64_t a) { 17950 return (int64_t)vceqzd_u64(a); 17951} 17952 17953// CHECK-LABEL: define i64 @test_vcged_s64(i64 %a, i64 %b) #0 { 17954// CHECK: [[TMP0:%.*]] = icmp sge i64 %a, %b 17955// CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64 17956// CHECK: ret i64 [[VCEQD_I]] 17957int64_t test_vcged_s64(int64_t a, int64_t b) { 17958 return (int64_t)vcged_s64(a, b); 17959} 17960 17961// CHECK-LABEL: define i64 @test_vcged_u64(i64 %a, i64 %b) #0 { 17962// CHECK: [[TMP0:%.*]] = icmp uge i64 %a, %b 17963// CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64 17964// CHECK: ret i64 [[VCEQD_I]] 17965uint64_t test_vcged_u64(uint64_t a, uint64_t b) { 17966 return (uint64_t)vcged_u64(a, b); 17967} 17968 17969// CHECK-LABEL: define i64 @test_vcgezd_s64(i64 %a) #0 { 17970// CHECK: [[TMP0:%.*]] = icmp sge i64 %a, 0 17971// CHECK: [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i64 17972// CHECK: ret i64 [[VCGEZ_I]] 17973int64_t test_vcgezd_s64(int64_t a) { 17974 return (int64_t)vcgezd_s64(a); 17975} 17976 17977// CHECK-LABEL: define i64 @test_vcgtd_s64(i64 %a, i64 %b) #0 { 17978// CHECK: [[TMP0:%.*]] = icmp sgt i64 %a, %b 17979// CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64 17980// CHECK: ret i64 [[VCEQD_I]] 17981int64_t test_vcgtd_s64(int64_t a, int64_t b) { 17982 return (int64_t)vcgtd_s64(a, b); 17983} 17984 17985// CHECK-LABEL: define i64 @test_vcgtd_u64(i64 %a, i64 %b) #0 { 17986// CHECK: [[TMP0:%.*]] = icmp ugt i64 %a, %b 17987// CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64 17988// CHECK: ret i64 [[VCEQD_I]] 17989uint64_t test_vcgtd_u64(uint64_t a, uint64_t b) { 17990 return (uint64_t)vcgtd_u64(a, b); 17991} 17992 17993// CHECK-LABEL: define i64 @test_vcgtzd_s64(i64 %a) #0 { 17994// CHECK: [[TMP0:%.*]] = icmp sgt i64 %a, 0 17995// CHECK: [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i64 17996// CHECK: ret i64 [[VCGTZ_I]] 17997int64_t test_vcgtzd_s64(int64_t a) { 17998 return (int64_t)vcgtzd_s64(a); 17999} 18000 18001// CHECK-LABEL: define i64 @test_vcled_s64(i64 %a, i64 %b) #0 { 18002// CHECK: [[TMP0:%.*]] = icmp sle i64 %a, %b 18003// CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64 18004// CHECK: ret i64 [[VCEQD_I]] 18005int64_t test_vcled_s64(int64_t a, int64_t b) { 18006 return (int64_t)vcled_s64(a, b); 18007} 18008 18009// CHECK-LABEL: define i64 @test_vcled_u64(i64 %a, i64 %b) #0 { 18010// CHECK: [[TMP0:%.*]] = icmp ule i64 %a, %b 18011// CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64 18012// CHECK: ret i64 [[VCEQD_I]] 18013uint64_t test_vcled_u64(uint64_t a, uint64_t b) { 18014 return (uint64_t)vcled_u64(a, b); 18015} 18016 18017// CHECK-LABEL: define i64 @test_vclezd_s64(i64 %a) #0 { 18018// CHECK: [[TMP0:%.*]] = icmp sle i64 %a, 0 18019// CHECK: [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i64 18020// CHECK: ret i64 [[VCLEZ_I]] 18021int64_t test_vclezd_s64(int64_t a) { 18022 return (int64_t)vclezd_s64(a); 18023} 18024 18025// CHECK-LABEL: define i64 @test_vcltd_s64(i64 %a, i64 %b) #0 { 18026// CHECK: [[TMP0:%.*]] = icmp slt i64 %a, %b 18027// CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64 18028// CHECK: ret i64 [[VCEQD_I]] 18029int64_t test_vcltd_s64(int64_t a, int64_t b) { 18030 return (int64_t)vcltd_s64(a, b); 18031} 18032 18033// CHECK-LABEL: define i64 @test_vcltd_u64(i64 %a, i64 %b) #0 { 18034// CHECK: [[TMP0:%.*]] = icmp ult i64 %a, %b 18035// CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64 18036// CHECK: ret i64 [[VCEQD_I]] 18037uint64_t test_vcltd_u64(uint64_t a, uint64_t b) { 18038 return (uint64_t)vcltd_u64(a, b); 18039} 18040 18041// CHECK-LABEL: define i64 @test_vcltzd_s64(i64 %a) #0 { 18042// CHECK: [[TMP0:%.*]] = icmp slt i64 %a, 0 18043// CHECK: [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i64 18044// CHECK: ret i64 [[VCLTZ_I]] 18045int64_t test_vcltzd_s64(int64_t a) { 18046 return (int64_t)vcltzd_s64(a); 18047} 18048 18049// CHECK-LABEL: define i64 @test_vtstd_s64(i64 %a, i64 %b) #0 { 18050// CHECK: [[TMP0:%.*]] = and i64 %a, %b 18051// CHECK: [[TMP1:%.*]] = icmp ne i64 [[TMP0]], 0 18052// CHECK: [[VTSTD_I:%.*]] = sext i1 [[TMP1]] to i64 18053// CHECK: ret i64 [[VTSTD_I]] 18054int64_t test_vtstd_s64(int64_t a, int64_t b) { 18055 return (int64_t)vtstd_s64(a, b); 18056} 18057 18058// CHECK-LABEL: define i64 @test_vtstd_u64(i64 %a, i64 %b) #0 { 18059// CHECK: [[TMP0:%.*]] = and i64 %a, %b 18060// CHECK: [[TMP1:%.*]] = icmp ne i64 [[TMP0]], 0 18061// CHECK: [[VTSTD_I:%.*]] = sext i1 [[TMP1]] to i64 18062// CHECK: ret i64 [[VTSTD_I]] 18063uint64_t test_vtstd_u64(uint64_t a, uint64_t b) { 18064 return (uint64_t)vtstd_u64(a, b); 18065} 18066 18067// CHECK-LABEL: define i64 @test_vabsd_s64(i64 %a) #0 { 18068// CHECK: [[VABSD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.abs.i64(i64 %a) #4 18069// CHECK: ret i64 [[VABSD_S64_I]] 18070int64_t test_vabsd_s64(int64_t a) { 18071 return (int64_t)vabsd_s64(a); 18072} 18073 18074// CHECK-LABEL: define i8 @test_vqabsb_s8(i8 %a) #0 { 18075// CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 18076// CHECK: [[VQABSB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqabs.v8i8(<8 x i8> [[TMP0]]) #4 18077// CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQABSB_S8_I]], i64 0 18078// CHECK: ret i8 [[TMP1]] 18079int8_t test_vqabsb_s8(int8_t a) { 18080 return (int8_t)vqabsb_s8(a); 18081} 18082 18083// CHECK-LABEL: define i16 @test_vqabsh_s16(i16 %a) #0 { 18084// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 18085// CHECK: [[VQABSH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16> [[TMP0]]) #4 18086// CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQABSH_S16_I]], i64 0 18087// CHECK: ret i16 [[TMP1]] 18088int16_t test_vqabsh_s16(int16_t a) { 18089 return (int16_t)vqabsh_s16(a); 18090} 18091 18092// CHECK-LABEL: define i32 @test_vqabss_s32(i32 %a) #0 { 18093// CHECK: [[VQABSS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a) #4 18094// CHECK: ret i32 [[VQABSS_S32_I]] 18095int32_t test_vqabss_s32(int32_t a) { 18096 return (int32_t)vqabss_s32(a); 18097} 18098 18099// CHECK-LABEL: define i64 @test_vqabsd_s64(i64 %a) #0 { 18100// CHECK: [[VQABSD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqabs.i64(i64 %a) #4 18101// CHECK: ret i64 [[VQABSD_S64_I]] 18102int64_t test_vqabsd_s64(int64_t a) { 18103 return (int64_t)vqabsd_s64(a); 18104} 18105 18106// CHECK-LABEL: define i64 @test_vnegd_s64(i64 %a) #0 { 18107// CHECK: [[VNEGD_I:%.*]] = sub i64 0, %a 18108// CHECK: ret i64 [[VNEGD_I]] 18109int64_t test_vnegd_s64(int64_t a) { 18110 return (int64_t)vnegd_s64(a); 18111} 18112 18113// CHECK-LABEL: define i8 @test_vqnegb_s8(i8 %a) #0 { 18114// CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 18115// CHECK: [[VQNEGB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqneg.v8i8(<8 x i8> [[TMP0]]) #4 18116// CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQNEGB_S8_I]], i64 0 18117// CHECK: ret i8 [[TMP1]] 18118int8_t test_vqnegb_s8(int8_t a) { 18119 return (int8_t)vqnegb_s8(a); 18120} 18121 18122// CHECK-LABEL: define i16 @test_vqnegh_s16(i16 %a) #0 { 18123// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 18124// CHECK: [[VQNEGH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16> [[TMP0]]) #4 18125// CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQNEGH_S16_I]], i64 0 18126// CHECK: ret i16 [[TMP1]] 18127int16_t test_vqnegh_s16(int16_t a) { 18128 return (int16_t)vqnegh_s16(a); 18129} 18130 18131// CHECK-LABEL: define i32 @test_vqnegs_s32(i32 %a) #0 { 18132// CHECK: [[VQNEGS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqneg.i32(i32 %a) #4 18133// CHECK: ret i32 [[VQNEGS_S32_I]] 18134int32_t test_vqnegs_s32(int32_t a) { 18135 return (int32_t)vqnegs_s32(a); 18136} 18137 18138// CHECK-LABEL: define i64 @test_vqnegd_s64(i64 %a) #0 { 18139// CHECK: [[VQNEGD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqneg.i64(i64 %a) #4 18140// CHECK: ret i64 [[VQNEGD_S64_I]] 18141int64_t test_vqnegd_s64(int64_t a) { 18142 return (int64_t)vqnegd_s64(a); 18143} 18144 18145// CHECK-LABEL: define i8 @test_vuqaddb_s8(i8 %a, i8 %b) #0 { 18146// CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 18147// CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0 18148// CHECK: [[VUQADDB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4 18149// CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VUQADDB_S8_I]], i64 0 18150// CHECK: ret i8 [[TMP2]] 18151int8_t test_vuqaddb_s8(int8_t a, int8_t b) { 18152 return (int8_t)vuqaddb_s8(a, b); 18153} 18154 18155// CHECK-LABEL: define i16 @test_vuqaddh_s16(i16 %a, i16 %b) #0 { 18156// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 18157// CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 18158// CHECK: [[VUQADDH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 18159// CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VUQADDH_S16_I]], i64 0 18160// CHECK: ret i16 [[TMP2]] 18161int16_t test_vuqaddh_s16(int16_t a, int16_t b) { 18162 return (int16_t)vuqaddh_s16(a, b); 18163} 18164 18165// CHECK-LABEL: define i32 @test_vuqadds_s32(i32 %a, i32 %b) #0 { 18166// CHECK: [[VUQADDS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.suqadd.i32(i32 %a, i32 %b) #4 18167// CHECK: ret i32 [[VUQADDS_S32_I]] 18168int32_t test_vuqadds_s32(int32_t a, int32_t b) { 18169 return (int32_t)vuqadds_s32(a, b); 18170} 18171 18172// CHECK-LABEL: define i64 @test_vuqaddd_s64(i64 %a, i64 %b) #0 { 18173// CHECK: [[VUQADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.suqadd.i64(i64 %a, i64 %b) #4 18174// CHECK: ret i64 [[VUQADDD_S64_I]] 18175int64_t test_vuqaddd_s64(int64_t a, int64_t b) { 18176 return (int64_t)vuqaddd_s64(a, b); 18177} 18178 18179// CHECK-LABEL: define i8 @test_vsqaddb_u8(i8 %a, i8 %b) #0 { 18180// CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 18181// CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0 18182// CHECK: [[VSQADDB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4 18183// CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VSQADDB_U8_I]], i64 0 18184// CHECK: ret i8 [[TMP2]] 18185uint8_t test_vsqaddb_u8(uint8_t a, uint8_t b) { 18186 return (uint8_t)vsqaddb_u8(a, b); 18187} 18188 18189// CHECK-LABEL: define i16 @test_vsqaddh_u16(i16 %a, i16 %b) #0 { 18190// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 18191// CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 18192// CHECK: [[VSQADDH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 18193// CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VSQADDH_U16_I]], i64 0 18194// CHECK: ret i16 [[TMP2]] 18195uint16_t test_vsqaddh_u16(uint16_t a, uint16_t b) { 18196 return (uint16_t)vsqaddh_u16(a, b); 18197} 18198 18199// CHECK-LABEL: define i32 @test_vsqadds_u32(i32 %a, i32 %b) #0 { 18200// CHECK: [[VSQADDS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.usqadd.i32(i32 %a, i32 %b) #4 18201// CHECK: ret i32 [[VSQADDS_U32_I]] 18202uint32_t test_vsqadds_u32(uint32_t a, uint32_t b) { 18203 return (uint32_t)vsqadds_u32(a, b); 18204} 18205 18206// CHECK-LABEL: define i64 @test_vsqaddd_u64(i64 %a, i64 %b) #0 { 18207// CHECK: [[VSQADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.usqadd.i64(i64 %a, i64 %b) #4 18208// CHECK: ret i64 [[VSQADDD_U64_I]] 18209uint64_t test_vsqaddd_u64(uint64_t a, uint64_t b) { 18210 return (uint64_t)vsqaddd_u64(a, b); 18211} 18212 18213// CHECK-LABEL: define i32 @test_vqdmlalh_s16(i32 %a, i16 %b, i16 %c) #0 { 18214// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 18215// CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %c, i64 0 18216// CHECK: [[VQDMLXL_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 18217// CHECK: [[LANE0_I:%.*]] = extractelement <4 x i32> [[VQDMLXL_I]], i64 0 18218// CHECK: [[VQDMLXL1_I:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 [[LANE0_I]]) #4 18219// CHECK: ret i32 [[VQDMLXL1_I]] 18220int32_t test_vqdmlalh_s16(int32_t a, int16_t b, int16_t c) { 18221 18222 return (int32_t)vqdmlalh_s16(a, b, c); 18223} 18224 18225// CHECK-LABEL: define i64 @test_vqdmlals_s32(i64 %a, i32 %b, i32 %c) #0 { 18226// CHECK: [[VQDMLXL_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 %c) #4 18227// CHECK: [[VQDMLXL1_I:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 [[VQDMLXL_I]]) #4 18228// CHECK: ret i64 [[VQDMLXL1_I]] 18229int64_t test_vqdmlals_s32(int64_t a, int32_t b, int32_t c) { 18230 return (int64_t)vqdmlals_s32(a, b, c); 18231} 18232 18233// CHECK-LABEL: define i32 @test_vqdmlslh_s16(i32 %a, i16 %b, i16 %c) #0 { 18234// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 18235// CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %c, i64 0 18236// CHECK: [[VQDMLXL_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 18237// CHECK: [[LANE0_I:%.*]] = extractelement <4 x i32> [[VQDMLXL_I]], i64 0 18238// CHECK: [[VQDMLXL1_I:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 [[LANE0_I]]) #4 18239// CHECK: ret i32 [[VQDMLXL1_I]] 18240int32_t test_vqdmlslh_s16(int32_t a, int16_t b, int16_t c) { 18241 18242 return (int32_t)vqdmlslh_s16(a, b, c); 18243} 18244 18245// CHECK-LABEL: define i64 @test_vqdmlsls_s32(i64 %a, i32 %b, i32 %c) #0 { 18246// CHECK: [[VQDMLXL_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 %c) #4 18247// CHECK: [[VQDMLXL1_I:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 [[VQDMLXL_I]]) #4 18248// CHECK: ret i64 [[VQDMLXL1_I]] 18249int64_t test_vqdmlsls_s32(int64_t a, int32_t b, int32_t c) { 18250 return (int64_t)vqdmlsls_s32(a, b, c); 18251} 18252 18253// CHECK-LABEL: define i32 @test_vqdmullh_s16(i16 %a, i16 %b) #0 { 18254// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 18255// CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 18256// CHECK: [[VQDMULLH_S16_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 18257// CHECK: [[TMP2:%.*]] = extractelement <4 x i32> [[VQDMULLH_S16_I]], i64 0 18258// CHECK: ret i32 [[TMP2]] 18259int32_t test_vqdmullh_s16(int16_t a, int16_t b) { 18260 return (int32_t)vqdmullh_s16(a, b); 18261} 18262 18263// CHECK-LABEL: define i64 @test_vqdmulls_s32(i32 %a, i32 %b) #0 { 18264// CHECK: [[VQDMULLS_S32_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %a, i32 %b) #4 18265// CHECK: ret i64 [[VQDMULLS_S32_I]] 18266int64_t test_vqdmulls_s32(int32_t a, int32_t b) { 18267 return (int64_t)vqdmulls_s32(a, b); 18268} 18269 18270// CHECK-LABEL: define i8 @test_vqmovunh_s16(i16 %a) #0 { 18271// CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0 18272// CHECK: [[VQMOVUNH_S16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> [[TMP0]]) #4 18273// CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVUNH_S16_I]], i64 0 18274// CHECK: ret i8 [[TMP1]] 18275int8_t test_vqmovunh_s16(int16_t a) { 18276 return (int8_t)vqmovunh_s16(a); 18277} 18278 18279// CHECK-LABEL: define i16 @test_vqmovuns_s32(i32 %a) #0 { 18280// CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0 18281// CHECK: [[VQMOVUNS_S32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[TMP0]]) #4 18282// CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVUNS_S32_I]], i64 0 18283// CHECK: ret i16 [[TMP1]] 18284int16_t test_vqmovuns_s32(int32_t a) { 18285 return (int16_t)vqmovuns_s32(a); 18286} 18287 18288// CHECK-LABEL: define i32 @test_vqmovund_s64(i64 %a) #0 { 18289// CHECK: [[VQMOVUND_S64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.sqxtun.i32.i64(i64 %a) #4 18290// CHECK: ret i32 [[VQMOVUND_S64_I]] 18291int32_t test_vqmovund_s64(int64_t a) { 18292 return (int32_t)vqmovund_s64(a); 18293} 18294 18295// CHECK-LABEL: define i8 @test_vqmovnh_s16(i16 %a) #0 { 18296// CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0 18297// CHECK: [[VQMOVNH_S16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> [[TMP0]]) #4 18298// CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVNH_S16_I]], i64 0 18299// CHECK: ret i8 [[TMP1]] 18300int8_t test_vqmovnh_s16(int16_t a) { 18301 return (int8_t)vqmovnh_s16(a); 18302} 18303 18304// CHECK-LABEL: define i16 @test_vqmovns_s32(i32 %a) #0 { 18305// CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0 18306// CHECK: [[VQMOVNS_S32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> [[TMP0]]) #4 18307// CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVNS_S32_I]], i64 0 18308// CHECK: ret i16 [[TMP1]] 18309int16_t test_vqmovns_s32(int32_t a) { 18310 return (int16_t)vqmovns_s32(a); 18311} 18312 18313// CHECK-LABEL: define i32 @test_vqmovnd_s64(i64 %a) #0 { 18314// CHECK: [[VQMOVND_S64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64 %a) #4 18315// CHECK: ret i32 [[VQMOVND_S64_I]] 18316int32_t test_vqmovnd_s64(int64_t a) { 18317 return (int32_t)vqmovnd_s64(a); 18318} 18319 18320// CHECK-LABEL: define i8 @test_vqmovnh_u16(i16 %a) #0 { 18321// CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0 18322// CHECK: [[VQMOVNH_U16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> [[TMP0]]) #4 18323// CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVNH_U16_I]], i64 0 18324// CHECK: ret i8 [[TMP1]] 18325int8_t test_vqmovnh_u16(int16_t a) { 18326 return (int8_t)vqmovnh_u16(a); 18327} 18328 18329// CHECK-LABEL: define i16 @test_vqmovns_u32(i32 %a) #0 { 18330// CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0 18331// CHECK: [[VQMOVNS_U32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> [[TMP0]]) #4 18332// CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVNS_U32_I]], i64 0 18333// CHECK: ret i16 [[TMP1]] 18334int16_t test_vqmovns_u32(int32_t a) { 18335 return (int16_t)vqmovns_u32(a); 18336} 18337 18338// CHECK-LABEL: define i32 @test_vqmovnd_u64(i64 %a) #0 { 18339// CHECK: [[VQMOVND_U64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.uqxtn.i32.i64(i64 %a) #4 18340// CHECK: ret i32 [[VQMOVND_U64_I]] 18341int32_t test_vqmovnd_u64(int64_t a) { 18342 return (int32_t)vqmovnd_u64(a); 18343} 18344 18345// CHECK-LABEL: define i32 @test_vceqs_f32(float %a, float %b) #0 { 18346// CHECK: [[TMP0:%.*]] = fcmp oeq float %a, %b 18347// CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32 18348// CHECK: ret i32 [[VCMPD_I]] 18349uint32_t test_vceqs_f32(float32_t a, float32_t b) { 18350 return (uint32_t)vceqs_f32(a, b); 18351} 18352 18353// CHECK-LABEL: define i64 @test_vceqd_f64(double %a, double %b) #0 { 18354// CHECK: [[TMP0:%.*]] = fcmp oeq double %a, %b 18355// CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64 18356// CHECK: ret i64 [[VCMPD_I]] 18357uint64_t test_vceqd_f64(float64_t a, float64_t b) { 18358 return (uint64_t)vceqd_f64(a, b); 18359} 18360 18361// CHECK-LABEL: define i32 @test_vceqzs_f32(float %a) #0 { 18362// CHECK: [[TMP0:%.*]] = fcmp oeq float %a, 0.000000e+00 18363// CHECK: [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i32 18364// CHECK: ret i32 [[VCEQZ_I]] 18365uint32_t test_vceqzs_f32(float32_t a) { 18366 return (uint32_t)vceqzs_f32(a); 18367} 18368 18369// CHECK-LABEL: define i64 @test_vceqzd_f64(double %a) #0 { 18370// CHECK: [[TMP0:%.*]] = fcmp oeq double %a, 0.000000e+00 18371// CHECK: [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i64 18372// CHECK: ret i64 [[VCEQZ_I]] 18373uint64_t test_vceqzd_f64(float64_t a) { 18374 return (uint64_t)vceqzd_f64(a); 18375} 18376 18377// CHECK-LABEL: define i32 @test_vcges_f32(float %a, float %b) #0 { 18378// CHECK: [[TMP0:%.*]] = fcmp oge float %a, %b 18379// CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32 18380// CHECK: ret i32 [[VCMPD_I]] 18381uint32_t test_vcges_f32(float32_t a, float32_t b) { 18382 return (uint32_t)vcges_f32(a, b); 18383} 18384 18385// CHECK-LABEL: define i64 @test_vcged_f64(double %a, double %b) #0 { 18386// CHECK: [[TMP0:%.*]] = fcmp oge double %a, %b 18387// CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64 18388// CHECK: ret i64 [[VCMPD_I]] 18389uint64_t test_vcged_f64(float64_t a, float64_t b) { 18390 return (uint64_t)vcged_f64(a, b); 18391} 18392 18393// CHECK-LABEL: define i32 @test_vcgezs_f32(float %a) #0 { 18394// CHECK: [[TMP0:%.*]] = fcmp oge float %a, 0.000000e+00 18395// CHECK: [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i32 18396// CHECK: ret i32 [[VCGEZ_I]] 18397uint32_t test_vcgezs_f32(float32_t a) { 18398 return (uint32_t)vcgezs_f32(a); 18399} 18400 18401// CHECK-LABEL: define i64 @test_vcgezd_f64(double %a) #0 { 18402// CHECK: [[TMP0:%.*]] = fcmp oge double %a, 0.000000e+00 18403// CHECK: [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i64 18404// CHECK: ret i64 [[VCGEZ_I]] 18405uint64_t test_vcgezd_f64(float64_t a) { 18406 return (uint64_t)vcgezd_f64(a); 18407} 18408 18409// CHECK-LABEL: define i32 @test_vcgts_f32(float %a, float %b) #0 { 18410// CHECK: [[TMP0:%.*]] = fcmp ogt float %a, %b 18411// CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32 18412// CHECK: ret i32 [[VCMPD_I]] 18413uint32_t test_vcgts_f32(float32_t a, float32_t b) { 18414 return (uint32_t)vcgts_f32(a, b); 18415} 18416 18417// CHECK-LABEL: define i64 @test_vcgtd_f64(double %a, double %b) #0 { 18418// CHECK: [[TMP0:%.*]] = fcmp ogt double %a, %b 18419// CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64 18420// CHECK: ret i64 [[VCMPD_I]] 18421uint64_t test_vcgtd_f64(float64_t a, float64_t b) { 18422 return (uint64_t)vcgtd_f64(a, b); 18423} 18424 18425// CHECK-LABEL: define i32 @test_vcgtzs_f32(float %a) #0 { 18426// CHECK: [[TMP0:%.*]] = fcmp ogt float %a, 0.000000e+00 18427// CHECK: [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i32 18428// CHECK: ret i32 [[VCGTZ_I]] 18429uint32_t test_vcgtzs_f32(float32_t a) { 18430 return (uint32_t)vcgtzs_f32(a); 18431} 18432 18433// CHECK-LABEL: define i64 @test_vcgtzd_f64(double %a) #0 { 18434// CHECK: [[TMP0:%.*]] = fcmp ogt double %a, 0.000000e+00 18435// CHECK: [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i64 18436// CHECK: ret i64 [[VCGTZ_I]] 18437uint64_t test_vcgtzd_f64(float64_t a) { 18438 return (uint64_t)vcgtzd_f64(a); 18439} 18440 18441// CHECK-LABEL: define i32 @test_vcles_f32(float %a, float %b) #0 { 18442// CHECK: [[TMP0:%.*]] = fcmp ole float %a, %b 18443// CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32 18444// CHECK: ret i32 [[VCMPD_I]] 18445uint32_t test_vcles_f32(float32_t a, float32_t b) { 18446 return (uint32_t)vcles_f32(a, b); 18447} 18448 18449// CHECK-LABEL: define i64 @test_vcled_f64(double %a, double %b) #0 { 18450// CHECK: [[TMP0:%.*]] = fcmp ole double %a, %b 18451// CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64 18452// CHECK: ret i64 [[VCMPD_I]] 18453uint64_t test_vcled_f64(float64_t a, float64_t b) { 18454 return (uint64_t)vcled_f64(a, b); 18455} 18456 18457// CHECK-LABEL: define i32 @test_vclezs_f32(float %a) #0 { 18458// CHECK: [[TMP0:%.*]] = fcmp ole float %a, 0.000000e+00 18459// CHECK: [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i32 18460// CHECK: ret i32 [[VCLEZ_I]] 18461uint32_t test_vclezs_f32(float32_t a) { 18462 return (uint32_t)vclezs_f32(a); 18463} 18464 18465// CHECK-LABEL: define i64 @test_vclezd_f64(double %a) #0 { 18466// CHECK: [[TMP0:%.*]] = fcmp ole double %a, 0.000000e+00 18467// CHECK: [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i64 18468// CHECK: ret i64 [[VCLEZ_I]] 18469uint64_t test_vclezd_f64(float64_t a) { 18470 return (uint64_t)vclezd_f64(a); 18471} 18472 18473// CHECK-LABEL: define i32 @test_vclts_f32(float %a, float %b) #0 { 18474// CHECK: [[TMP0:%.*]] = fcmp olt float %a, %b 18475// CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32 18476// CHECK: ret i32 [[VCMPD_I]] 18477uint32_t test_vclts_f32(float32_t a, float32_t b) { 18478 return (uint32_t)vclts_f32(a, b); 18479} 18480 18481// CHECK-LABEL: define i64 @test_vcltd_f64(double %a, double %b) #0 { 18482// CHECK: [[TMP0:%.*]] = fcmp olt double %a, %b 18483// CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64 18484// CHECK: ret i64 [[VCMPD_I]] 18485uint64_t test_vcltd_f64(float64_t a, float64_t b) { 18486 return (uint64_t)vcltd_f64(a, b); 18487} 18488 18489// CHECK-LABEL: define i32 @test_vcltzs_f32(float %a) #0 { 18490// CHECK: [[TMP0:%.*]] = fcmp olt float %a, 0.000000e+00 18491// CHECK: [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i32 18492// CHECK: ret i32 [[VCLTZ_I]] 18493uint32_t test_vcltzs_f32(float32_t a) { 18494 return (uint32_t)vcltzs_f32(a); 18495} 18496 18497// CHECK-LABEL: define i64 @test_vcltzd_f64(double %a) #0 { 18498// CHECK: [[TMP0:%.*]] = fcmp olt double %a, 0.000000e+00 18499// CHECK: [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i64 18500// CHECK: ret i64 [[VCLTZ_I]] 18501uint64_t test_vcltzd_f64(float64_t a) { 18502 return (uint64_t)vcltzd_f64(a); 18503} 18504 18505// CHECK-LABEL: define i32 @test_vcages_f32(float %a, float %b) #0 { 18506// CHECK: [[VCAGES_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f32(float %a, float %b) #4 18507// CHECK: ret i32 [[VCAGES_F32_I]] 18508uint32_t test_vcages_f32(float32_t a, float32_t b) { 18509 return (uint32_t)vcages_f32(a, b); 18510} 18511 18512// CHECK-LABEL: define i64 @test_vcaged_f64(double %a, double %b) #0 { 18513// CHECK: [[VCAGED_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facge.i64.f64(double %a, double %b) #4 18514// CHECK: ret i64 [[VCAGED_F64_I]] 18515uint64_t test_vcaged_f64(float64_t a, float64_t b) { 18516 return (uint64_t)vcaged_f64(a, b); 18517} 18518 18519// CHECK-LABEL: define i32 @test_vcagts_f32(float %a, float %b) #0 { 18520// CHECK: [[VCAGTS_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f32(float %a, float %b) #4 18521// CHECK: ret i32 [[VCAGTS_F32_I]] 18522uint32_t test_vcagts_f32(float32_t a, float32_t b) { 18523 return (uint32_t)vcagts_f32(a, b); 18524} 18525 18526// CHECK-LABEL: define i64 @test_vcagtd_f64(double %a, double %b) #0 { 18527// CHECK: [[VCAGTD_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facgt.i64.f64(double %a, double %b) #4 18528// CHECK: ret i64 [[VCAGTD_F64_I]] 18529uint64_t test_vcagtd_f64(float64_t a, float64_t b) { 18530 return (uint64_t)vcagtd_f64(a, b); 18531} 18532 18533// CHECK-LABEL: define i32 @test_vcales_f32(float %a, float %b) #0 { 18534// CHECK: [[VCALES_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f32(float %b, float %a) #4 18535// CHECK: ret i32 [[VCALES_F32_I]] 18536uint32_t test_vcales_f32(float32_t a, float32_t b) { 18537 return (uint32_t)vcales_f32(a, b); 18538} 18539 18540// CHECK-LABEL: define i64 @test_vcaled_f64(double %a, double %b) #0 { 18541// CHECK: [[VCALED_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facge.i64.f64(double %b, double %a) #4 18542// CHECK: ret i64 [[VCALED_F64_I]] 18543uint64_t test_vcaled_f64(float64_t a, float64_t b) { 18544 return (uint64_t)vcaled_f64(a, b); 18545} 18546 18547// CHECK-LABEL: define i32 @test_vcalts_f32(float %a, float %b) #0 { 18548// CHECK: [[VCALTS_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f32(float %b, float %a) #4 18549// CHECK: ret i32 [[VCALTS_F32_I]] 18550uint32_t test_vcalts_f32(float32_t a, float32_t b) { 18551 return (uint32_t)vcalts_f32(a, b); 18552} 18553 18554// CHECK-LABEL: define i64 @test_vcaltd_f64(double %a, double %b) #0 { 18555// CHECK: [[VCALTD_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facgt.i64.f64(double %b, double %a) #4 18556// CHECK: ret i64 [[VCALTD_F64_I]] 18557uint64_t test_vcaltd_f64(float64_t a, float64_t b) { 18558 return (uint64_t)vcaltd_f64(a, b); 18559} 18560 18561// CHECK-LABEL: define i64 @test_vshrd_n_s64(i64 %a) #0 { 18562// CHECK: [[SHRD_N:%.*]] = ashr i64 %a, 1 18563// CHECK: ret i64 [[SHRD_N]] 18564int64_t test_vshrd_n_s64(int64_t a) { 18565 return (int64_t)vshrd_n_s64(a, 1); 18566} 18567 18568// CHECK-LABEL: define <1 x i64> @test_vshr_n_s64(<1 x i64> %a) #0 { 18569// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 18570// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 18571// CHECK: [[VSHR_N:%.*]] = ashr <1 x i64> [[TMP1]], <i64 1> 18572// CHECK: ret <1 x i64> [[VSHR_N]] 18573int64x1_t test_vshr_n_s64(int64x1_t a) { 18574 return vshr_n_s64(a, 1); 18575} 18576 18577// CHECK-LABEL: define i64 @test_vshrd_n_u64(i64 %a) #0 { 18578// CHECK: ret i64 0 18579uint64_t test_vshrd_n_u64(uint64_t a) { 18580 18581 return (uint64_t)vshrd_n_u64(a, 64); 18582} 18583 18584// CHECK-LABEL: define i64 @test_vshrd_n_u64_2() #0 { 18585// CHECK: ret i64 0 18586uint64_t test_vshrd_n_u64_2() { 18587 18588 uint64_t a = UINT64_C(0xf000000000000000); 18589 return vshrd_n_u64(a, 64); 18590} 18591 18592// CHECK-LABEL: define <1 x i64> @test_vshr_n_u64(<1 x i64> %a) #0 { 18593// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 18594// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 18595// CHECK: [[VSHR_N:%.*]] = lshr <1 x i64> [[TMP1]], <i64 1> 18596// CHECK: ret <1 x i64> [[VSHR_N]] 18597uint64x1_t test_vshr_n_u64(uint64x1_t a) { 18598 return vshr_n_u64(a, 1); 18599} 18600 18601// CHECK-LABEL: define i64 @test_vrshrd_n_s64(i64 %a) #0 { 18602// CHECK: [[VRSHR_N:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %a, i64 -63) 18603// CHECK: ret i64 [[VRSHR_N]] 18604int64_t test_vrshrd_n_s64(int64_t a) { 18605 return (int64_t)vrshrd_n_s64(a, 63); 18606} 18607 18608// CHECK-LABEL: define <1 x i64> @test_vrshr_n_s64(<1 x i64> %a) #0 { 18609// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 18610// CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 18611// CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>) 18612// CHECK: ret <1 x i64> [[VRSHR_N1]] 18613int64x1_t test_vrshr_n_s64(int64x1_t a) { 18614 return vrshr_n_s64(a, 1); 18615} 18616 18617// CHECK-LABEL: define i64 @test_vrshrd_n_u64(i64 %a) #0 { 18618// CHECK: [[VRSHR_N:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %a, i64 -63) 18619// CHECK: ret i64 [[VRSHR_N]] 18620uint64_t test_vrshrd_n_u64(uint64_t a) { 18621 return (uint64_t)vrshrd_n_u64(a, 63); 18622} 18623 18624// CHECK-LABEL: define <1 x i64> @test_vrshr_n_u64(<1 x i64> %a) #0 { 18625// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 18626// CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 18627// CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>) 18628// CHECK: ret <1 x i64> [[VRSHR_N1]] 18629uint64x1_t test_vrshr_n_u64(uint64x1_t a) { 18630 return vrshr_n_u64(a, 1); 18631} 18632 18633// CHECK-LABEL: define i64 @test_vsrad_n_s64(i64 %a, i64 %b) #0 { 18634// CHECK: [[SHRD_N:%.*]] = ashr i64 %b, 63 18635// CHECK: [[TMP0:%.*]] = add i64 %a, [[SHRD_N]] 18636// CHECK: ret i64 [[TMP0]] 18637int64_t test_vsrad_n_s64(int64_t a, int64_t b) { 18638 return (int64_t)vsrad_n_s64(a, b, 63); 18639} 18640 18641// CHECK-LABEL: define <1 x i64> @test_vsra_n_s64(<1 x i64> %a, <1 x i64> %b) #0 { 18642// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 18643// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 18644// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 18645// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 18646// CHECK: [[VSRA_N:%.*]] = ashr <1 x i64> [[TMP3]], <i64 1> 18647// CHECK: [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]] 18648// CHECK: ret <1 x i64> [[TMP4]] 18649int64x1_t test_vsra_n_s64(int64x1_t a, int64x1_t b) { 18650 return vsra_n_s64(a, b, 1); 18651} 18652 18653// CHECK-LABEL: define i64 @test_vsrad_n_u64(i64 %a, i64 %b) #0 { 18654// CHECK: [[SHRD_N:%.*]] = lshr i64 %b, 63 18655// CHECK: [[TMP0:%.*]] = add i64 %a, [[SHRD_N]] 18656// CHECK: ret i64 [[TMP0]] 18657uint64_t test_vsrad_n_u64(uint64_t a, uint64_t b) { 18658 return (uint64_t)vsrad_n_u64(a, b, 63); 18659} 18660 18661// CHECK-LABEL: define i64 @test_vsrad_n_u64_2(i64 %a, i64 %b) #0 { 18662// CHECK: ret i64 %a 18663uint64_t test_vsrad_n_u64_2(uint64_t a, uint64_t b) { 18664 18665 return (uint64_t)vsrad_n_u64(a, b, 64); 18666} 18667 18668// CHECK-LABEL: define <1 x i64> @test_vsra_n_u64(<1 x i64> %a, <1 x i64> %b) #0 { 18669// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 18670// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 18671// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 18672// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 18673// CHECK: [[VSRA_N:%.*]] = lshr <1 x i64> [[TMP3]], <i64 1> 18674// CHECK: [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]] 18675// CHECK: ret <1 x i64> [[TMP4]] 18676uint64x1_t test_vsra_n_u64(uint64x1_t a, uint64x1_t b) { 18677 return vsra_n_u64(a, b, 1); 18678} 18679 18680// CHECK-LABEL: define i64 @test_vrsrad_n_s64(i64 %a, i64 %b) #0 { 18681// CHECK: [[TMP0:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %b, i64 -63) 18682// CHECK: [[TMP1:%.*]] = add i64 %a, [[TMP0]] 18683// CHECK: ret i64 [[TMP1]] 18684int64_t test_vrsrad_n_s64(int64_t a, int64_t b) { 18685 return (int64_t)vrsrad_n_s64(a, b, 63); 18686} 18687 18688// CHECK-LABEL: define <1 x i64> @test_vrsra_n_s64(<1 x i64> %a, <1 x i64> %b) #0 { 18689// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 18690// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 18691// CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 18692// CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>) 18693// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 18694// CHECK: [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]] 18695// CHECK: ret <1 x i64> [[TMP3]] 18696int64x1_t test_vrsra_n_s64(int64x1_t a, int64x1_t b) { 18697 return vrsra_n_s64(a, b, 1); 18698} 18699 18700// CHECK-LABEL: define i64 @test_vrsrad_n_u64(i64 %a, i64 %b) #0 { 18701// CHECK: [[TMP0:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %b, i64 -63) 18702// CHECK: [[TMP1:%.*]] = add i64 %a, [[TMP0]] 18703// CHECK: ret i64 [[TMP1]] 18704uint64_t test_vrsrad_n_u64(uint64_t a, uint64_t b) { 18705 return (uint64_t)vrsrad_n_u64(a, b, 63); 18706} 18707 18708// CHECK-LABEL: define <1 x i64> @test_vrsra_n_u64(<1 x i64> %a, <1 x i64> %b) #0 { 18709// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 18710// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 18711// CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 18712// CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>) 18713// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 18714// CHECK: [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]] 18715// CHECK: ret <1 x i64> [[TMP3]] 18716uint64x1_t test_vrsra_n_u64(uint64x1_t a, uint64x1_t b) { 18717 return vrsra_n_u64(a, b, 1); 18718} 18719 18720// CHECK-LABEL: define i64 @test_vshld_n_s64(i64 %a) #0 { 18721// CHECK: [[SHLD_N:%.*]] = shl i64 %a, 1 18722// CHECK: ret i64 [[SHLD_N]] 18723int64_t test_vshld_n_s64(int64_t a) { 18724 return (int64_t)vshld_n_s64(a, 1); 18725} 18726// CHECK-LABEL: define <1 x i64> @test_vshl_n_s64(<1 x i64> %a) #0 { 18727// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 18728// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 18729// CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], <i64 1> 18730// CHECK: ret <1 x i64> [[VSHL_N]] 18731int64x1_t test_vshl_n_s64(int64x1_t a) { 18732 return vshl_n_s64(a, 1); 18733} 18734 18735// CHECK-LABEL: define i64 @test_vshld_n_u64(i64 %a) #0 { 18736// CHECK: [[SHLD_N:%.*]] = shl i64 %a, 63 18737// CHECK: ret i64 [[SHLD_N]] 18738uint64_t test_vshld_n_u64(uint64_t a) { 18739 return (uint64_t)vshld_n_u64(a, 63); 18740} 18741 18742// CHECK-LABEL: define <1 x i64> @test_vshl_n_u64(<1 x i64> %a) #0 { 18743// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 18744// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 18745// CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], <i64 1> 18746// CHECK: ret <1 x i64> [[VSHL_N]] 18747uint64x1_t test_vshl_n_u64(uint64x1_t a) { 18748 return vshl_n_u64(a, 1); 18749} 18750 18751// CHECK-LABEL: define i8 @test_vqshlb_n_s8(i8 %a) #0 { 18752// CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 18753// CHECK: [[VQSHLB_N_S8:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>) 18754// CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLB_N_S8]], i64 0 18755// CHECK: ret i8 [[TMP1]] 18756int8_t test_vqshlb_n_s8(int8_t a) { 18757 return (int8_t)vqshlb_n_s8(a, 7); 18758} 18759 18760// CHECK-LABEL: define i16 @test_vqshlh_n_s16(i16 %a) #0 { 18761// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 18762// CHECK: [[VQSHLH_N_S16:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 undef, i16 undef, i16 undef>) 18763// CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLH_N_S16]], i64 0 18764// CHECK: ret i16 [[TMP1]] 18765int16_t test_vqshlh_n_s16(int16_t a) { 18766 return (int16_t)vqshlh_n_s16(a, 15); 18767} 18768 18769// CHECK-LABEL: define i32 @test_vqshls_n_s32(i32 %a) #0 { 18770// CHECK: [[VQSHLS_N_S32:%.*]] = call i32 @llvm.aarch64.neon.sqshl.i32(i32 %a, i32 31) 18771// CHECK: ret i32 [[VQSHLS_N_S32]] 18772int32_t test_vqshls_n_s32(int32_t a) { 18773 return (int32_t)vqshls_n_s32(a, 31); 18774} 18775 18776// CHECK-LABEL: define i64 @test_vqshld_n_s64(i64 %a) #0 { 18777// CHECK: [[VQSHL_N:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %a, i64 63) 18778// CHECK: ret i64 [[VQSHL_N]] 18779int64_t test_vqshld_n_s64(int64_t a) { 18780 return (int64_t)vqshld_n_s64(a, 63); 18781} 18782 18783// CHECK-LABEL: define <8 x i8> @test_vqshl_n_s8(<8 x i8> %a) #0 { 18784// CHECK: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer) 18785// CHECK: ret <8 x i8> [[VQSHL_N]] 18786int8x8_t test_vqshl_n_s8(int8x8_t a) { 18787 return vqshl_n_s8(a, 0); 18788} 18789 18790// CHECK-LABEL: define <16 x i8> @test_vqshlq_n_s8(<16 x i8> %a) #0 { 18791// CHECK: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer) 18792// CHECK: ret <16 x i8> [[VQSHL_N]] 18793int8x16_t test_vqshlq_n_s8(int8x16_t a) { 18794 return vqshlq_n_s8(a, 0); 18795} 18796 18797// CHECK-LABEL: define <4 x i16> @test_vqshl_n_s16(<4 x i16> %a) #0 { 18798// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 18799// CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 18800// CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> zeroinitializer) 18801// CHECK: ret <4 x i16> [[VQSHL_N1]] 18802int16x4_t test_vqshl_n_s16(int16x4_t a) { 18803 return vqshl_n_s16(a, 0); 18804} 18805 18806// CHECK-LABEL: define <8 x i16> @test_vqshlq_n_s16(<8 x i16> %a) #0 { 18807// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 18808// CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 18809// CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> zeroinitializer) 18810// CHECK: ret <8 x i16> [[VQSHL_N1]] 18811int16x8_t test_vqshlq_n_s16(int16x8_t a) { 18812 return vqshlq_n_s16(a, 0); 18813} 18814 18815// CHECK-LABEL: define <2 x i32> @test_vqshl_n_s32(<2 x i32> %a) #0 { 18816// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 18817// CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 18818// CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> zeroinitializer) 18819// CHECK: ret <2 x i32> [[VQSHL_N1]] 18820int32x2_t test_vqshl_n_s32(int32x2_t a) { 18821 return vqshl_n_s32(a, 0); 18822} 18823 18824// CHECK-LABEL: define <4 x i32> @test_vqshlq_n_s32(<4 x i32> %a) #0 { 18825// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 18826// CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 18827// CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> zeroinitializer) 18828// CHECK: ret <4 x i32> [[VQSHL_N1]] 18829int32x4_t test_vqshlq_n_s32(int32x4_t a) { 18830 return vqshlq_n_s32(a, 0); 18831} 18832 18833// CHECK-LABEL: define <2 x i64> @test_vqshlq_n_s64(<2 x i64> %a) #0 { 18834// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 18835// CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 18836// CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> zeroinitializer) 18837// CHECK: ret <2 x i64> [[VQSHL_N1]] 18838int64x2_t test_vqshlq_n_s64(int64x2_t a) { 18839 return vqshlq_n_s64(a, 0); 18840} 18841 18842// CHECK-LABEL: define <8 x i8> @test_vqshl_n_u8(<8 x i8> %a) #0 { 18843// CHECK: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer) 18844// CHECK: ret <8 x i8> [[VQSHL_N]] 18845uint8x8_t test_vqshl_n_u8(uint8x8_t a) { 18846 return vqshl_n_u8(a, 0); 18847} 18848 18849// CHECK-LABEL: define <16 x i8> @test_vqshlq_n_u8(<16 x i8> %a) #0 { 18850// CHECK: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer) 18851// CHECK: ret <16 x i8> [[VQSHL_N]] 18852uint8x16_t test_vqshlq_n_u8(uint8x16_t a) { 18853 return vqshlq_n_u8(a, 0); 18854} 18855 18856// CHECK-LABEL: define <4 x i16> @test_vqshl_n_u16(<4 x i16> %a) #0 { 18857// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 18858// CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 18859// CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> zeroinitializer) 18860// CHECK: ret <4 x i16> [[VQSHL_N1]] 18861uint16x4_t test_vqshl_n_u16(uint16x4_t a) { 18862 return vqshl_n_u16(a, 0); 18863} 18864 18865// CHECK-LABEL: define <8 x i16> @test_vqshlq_n_u16(<8 x i16> %a) #0 { 18866// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 18867// CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 18868// CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> zeroinitializer) 18869// CHECK: ret <8 x i16> [[VQSHL_N1]] 18870uint16x8_t test_vqshlq_n_u16(uint16x8_t a) { 18871 return vqshlq_n_u16(a, 0); 18872} 18873 18874// CHECK-LABEL: define <2 x i32> @test_vqshl_n_u32(<2 x i32> %a) #0 { 18875// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 18876// CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 18877// CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> zeroinitializer) 18878// CHECK: ret <2 x i32> [[VQSHL_N1]] 18879uint32x2_t test_vqshl_n_u32(uint32x2_t a) { 18880 return vqshl_n_u32(a, 0); 18881} 18882 18883// CHECK-LABEL: define <4 x i32> @test_vqshlq_n_u32(<4 x i32> %a) #0 { 18884// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 18885// CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 18886// CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> zeroinitializer) 18887// CHECK: ret <4 x i32> [[VQSHL_N1]] 18888uint32x4_t test_vqshlq_n_u32(uint32x4_t a) { 18889 return vqshlq_n_u32(a, 0); 18890} 18891 18892// CHECK-LABEL: define <2 x i64> @test_vqshlq_n_u64(<2 x i64> %a) #0 { 18893// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 18894// CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 18895// CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> zeroinitializer) 18896// CHECK: ret <2 x i64> [[VQSHL_N1]] 18897uint64x2_t test_vqshlq_n_u64(uint64x2_t a) { 18898 return vqshlq_n_u64(a, 0); 18899} 18900 18901// CHECK-LABEL: define <1 x i64> @test_vqshl_n_s64(<1 x i64> %a) #0 { 18902// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 18903// CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 18904// CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> <i64 1>) 18905// CHECK: ret <1 x i64> [[VQSHL_N1]] 18906int64x1_t test_vqshl_n_s64(int64x1_t a) { 18907 return vqshl_n_s64(a, 1); 18908} 18909 18910// CHECK-LABEL: define i8 @test_vqshlb_n_u8(i8 %a) #0 { 18911// CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 18912// CHECK: [[VQSHLB_N_U8:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>) 18913// CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLB_N_U8]], i64 0 18914// CHECK: ret i8 [[TMP1]] 18915uint8_t test_vqshlb_n_u8(uint8_t a) { 18916 return (uint8_t)vqshlb_n_u8(a, 7); 18917} 18918 18919// CHECK-LABEL: define i16 @test_vqshlh_n_u16(i16 %a) #0 { 18920// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 18921// CHECK: [[VQSHLH_N_U16:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 undef, i16 undef, i16 undef>) 18922// CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLH_N_U16]], i64 0 18923// CHECK: ret i16 [[TMP1]] 18924uint16_t test_vqshlh_n_u16(uint16_t a) { 18925 return (uint16_t)vqshlh_n_u16(a, 15); 18926} 18927 18928// CHECK-LABEL: define i32 @test_vqshls_n_u32(i32 %a) #0 { 18929// CHECK: [[VQSHLS_N_U32:%.*]] = call i32 @llvm.aarch64.neon.uqshl.i32(i32 %a, i32 31) 18930// CHECK: ret i32 [[VQSHLS_N_U32]] 18931uint32_t test_vqshls_n_u32(uint32_t a) { 18932 return (uint32_t)vqshls_n_u32(a, 31); 18933} 18934 18935// CHECK-LABEL: define i64 @test_vqshld_n_u64(i64 %a) #0 { 18936// CHECK: [[VQSHL_N:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %a, i64 63) 18937// CHECK: ret i64 [[VQSHL_N]] 18938uint64_t test_vqshld_n_u64(uint64_t a) { 18939 return (uint64_t)vqshld_n_u64(a, 63); 18940} 18941 18942// CHECK-LABEL: define <1 x i64> @test_vqshl_n_u64(<1 x i64> %a) #0 { 18943// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 18944// CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 18945// CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> <i64 1>) 18946// CHECK: ret <1 x i64> [[VQSHL_N1]] 18947uint64x1_t test_vqshl_n_u64(uint64x1_t a) { 18948 return vqshl_n_u64(a, 1); 18949} 18950 18951// CHECK-LABEL: define i8 @test_vqshlub_n_s8(i8 %a) #0 { 18952// CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 18953// CHECK: [[VQSHLUB_N_S8:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>) 18954// CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLUB_N_S8]], i64 0 18955// CHECK: ret i8 [[TMP1]] 18956int8_t test_vqshlub_n_s8(int8_t a) { 18957 return (int8_t)vqshlub_n_s8(a, 7); 18958} 18959 18960// CHECK-LABEL: define i16 @test_vqshluh_n_s16(i16 %a) #0 { 18961// CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 18962// CHECK: [[VQSHLUH_N_S16:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 undef, i16 undef, i16 undef>) 18963// CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLUH_N_S16]], i64 0 18964// CHECK: ret i16 [[TMP1]] 18965int16_t test_vqshluh_n_s16(int16_t a) { 18966 return (int16_t)vqshluh_n_s16(a, 15); 18967} 18968 18969// CHECK-LABEL: define i32 @test_vqshlus_n_s32(i32 %a) #0 { 18970// CHECK: [[VQSHLUS_N_S32:%.*]] = call i32 @llvm.aarch64.neon.sqshlu.i32(i32 %a, i32 31) 18971// CHECK: ret i32 [[VQSHLUS_N_S32]] 18972int32_t test_vqshlus_n_s32(int32_t a) { 18973 return (int32_t)vqshlus_n_s32(a, 31); 18974} 18975 18976// CHECK-LABEL: define i64 @test_vqshlud_n_s64(i64 %a) #0 { 18977// CHECK: [[VQSHLU_N:%.*]] = call i64 @llvm.aarch64.neon.sqshlu.i64(i64 %a, i64 63) 18978// CHECK: ret i64 [[VQSHLU_N]] 18979int64_t test_vqshlud_n_s64(int64_t a) { 18980 return (int64_t)vqshlud_n_s64(a, 63); 18981} 18982 18983// CHECK-LABEL: define <1 x i64> @test_vqshlu_n_s64(<1 x i64> %a) #0 { 18984// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 18985// CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 18986// CHECK: [[VQSHLU_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> [[VQSHLU_N]], <1 x i64> <i64 1>) 18987// CHECK: ret <1 x i64> [[VQSHLU_N1]] 18988uint64x1_t test_vqshlu_n_s64(int64x1_t a) { 18989 return vqshlu_n_s64(a, 1); 18990} 18991 18992// CHECK-LABEL: define i64 @test_vsrid_n_s64(i64 %a, i64 %b) #0 { 18993// CHECK: [[VSRID_N_S64:%.*]] = bitcast i64 %a to <1 x i64> 18994// CHECK: [[VSRID_N_S641:%.*]] = bitcast i64 %b to <1 x i64> 18995// CHECK: [[VSRID_N_S642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRID_N_S64]], <1 x i64> [[VSRID_N_S641]], i32 63) 18996// CHECK: [[VSRID_N_S643:%.*]] = bitcast <1 x i64> [[VSRID_N_S642]] to i64 18997// CHECK: ret i64 [[VSRID_N_S643]] 18998int64_t test_vsrid_n_s64(int64_t a, int64_t b) { 18999 return (int64_t)vsrid_n_s64(a, b, 63); 19000} 19001 19002// CHECK-LABEL: define <1 x i64> @test_vsri_n_s64(<1 x i64> %a, <1 x i64> %b) #0 { 19003// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 19004// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 19005// CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 19006// CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 19007// CHECK: [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 1) 19008// CHECK: ret <1 x i64> [[VSRI_N2]] 19009int64x1_t test_vsri_n_s64(int64x1_t a, int64x1_t b) { 19010 return vsri_n_s64(a, b, 1); 19011} 19012 19013// CHECK-LABEL: define i64 @test_vsrid_n_u64(i64 %a, i64 %b) #0 { 19014// CHECK: [[VSRID_N_U64:%.*]] = bitcast i64 %a to <1 x i64> 19015// CHECK: [[VSRID_N_U641:%.*]] = bitcast i64 %b to <1 x i64> 19016// CHECK: [[VSRID_N_U642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRID_N_U64]], <1 x i64> [[VSRID_N_U641]], i32 63) 19017// CHECK: [[VSRID_N_U643:%.*]] = bitcast <1 x i64> [[VSRID_N_U642]] to i64 19018// CHECK: ret i64 [[VSRID_N_U643]] 19019uint64_t test_vsrid_n_u64(uint64_t a, uint64_t b) { 19020 return (uint64_t)vsrid_n_u64(a, b, 63); 19021} 19022 19023// CHECK-LABEL: define <1 x i64> @test_vsri_n_u64(<1 x i64> %a, <1 x i64> %b) #0 { 19024// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 19025// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 19026// CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 19027// CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 19028// CHECK: [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 1) 19029// CHECK: ret <1 x i64> [[VSRI_N2]] 19030uint64x1_t test_vsri_n_u64(uint64x1_t a, uint64x1_t b) { 19031 return vsri_n_u64(a, b, 1); 19032} 19033 19034// CHECK-LABEL: define i64 @test_vslid_n_s64(i64 %a, i64 %b) #0 { 19035// CHECK: [[VSLID_N_S64:%.*]] = bitcast i64 %a to <1 x i64> 19036// CHECK: [[VSLID_N_S641:%.*]] = bitcast i64 %b to <1 x i64> 19037// CHECK: [[VSLID_N_S642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLID_N_S64]], <1 x i64> [[VSLID_N_S641]], i32 63) 19038// CHECK: [[VSLID_N_S643:%.*]] = bitcast <1 x i64> [[VSLID_N_S642]] to i64 19039// CHECK: ret i64 [[VSLID_N_S643]] 19040int64_t test_vslid_n_s64(int64_t a, int64_t b) { 19041 return (int64_t)vslid_n_s64(a, b, 63); 19042} 19043 19044// CHECK-LABEL: define <1 x i64> @test_vsli_n_s64(<1 x i64> %a, <1 x i64> %b) #0 { 19045// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 19046// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 19047// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 19048// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 19049// CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 1) 19050// CHECK: ret <1 x i64> [[VSLI_N2]] 19051int64x1_t test_vsli_n_s64(int64x1_t a, int64x1_t b) { 19052 return vsli_n_s64(a, b, 1); 19053} 19054 19055// CHECK-LABEL: define i64 @test_vslid_n_u64(i64 %a, i64 %b) #0 { 19056// CHECK: [[VSLID_N_U64:%.*]] = bitcast i64 %a to <1 x i64> 19057// CHECK: [[VSLID_N_U641:%.*]] = bitcast i64 %b to <1 x i64> 19058// CHECK: [[VSLID_N_U642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLID_N_U64]], <1 x i64> [[VSLID_N_U641]], i32 63) 19059// CHECK: [[VSLID_N_U643:%.*]] = bitcast <1 x i64> [[VSLID_N_U642]] to i64 19060// CHECK: ret i64 [[VSLID_N_U643]] 19061uint64_t test_vslid_n_u64(uint64_t a, uint64_t b) { 19062 return (uint64_t)vslid_n_u64(a, b, 63); 19063} 19064 19065// CHECK-LABEL: define <1 x i64> @test_vsli_n_u64(<1 x i64> %a, <1 x i64> %b) #0 { 19066// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 19067// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 19068// CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 19069// CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 19070// CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 1) 19071// CHECK: ret <1 x i64> [[VSLI_N2]] 19072uint64x1_t test_vsli_n_u64(uint64x1_t a, uint64x1_t b) { 19073 return vsli_n_u64(a, b, 1); 19074} 19075 19076// CHECK-LABEL: define i8 @test_vqshrnh_n_s16(i16 %a) #0 { 19077// CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0 19078// CHECK: [[VQSHRNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[TMP0]], i32 8) 19079// CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRNH_N_S16]], i64 0 19080// CHECK: ret i8 [[TMP1]] 19081int8_t test_vqshrnh_n_s16(int16_t a) { 19082 return (int8_t)vqshrnh_n_s16(a, 8); 19083} 19084 19085// CHECK-LABEL: define i16 @test_vqshrns_n_s32(i32 %a) #0 { 19086// CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0 19087// CHECK: [[VQSHRNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[TMP0]], i32 16) 19088// CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRNS_N_S32]], i64 0 19089// CHECK: ret i16 [[TMP1]] 19090int16_t test_vqshrns_n_s32(int32_t a) { 19091 return (int16_t)vqshrns_n_s32(a, 16); 19092} 19093 19094// CHECK-LABEL: define i32 @test_vqshrnd_n_s64(i64 %a) #0 { 19095// CHECK: [[VQSHRND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqshrn.i32(i64 %a, i32 32) 19096// CHECK: ret i32 [[VQSHRND_N_S64]] 19097int32_t test_vqshrnd_n_s64(int64_t a) { 19098 return (int32_t)vqshrnd_n_s64(a, 32); 19099} 19100 19101// CHECK-LABEL: define i8 @test_vqshrnh_n_u16(i16 %a) #0 { 19102// CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0 19103// CHECK: [[VQSHRNH_N_U16:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[TMP0]], i32 8) 19104// CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRNH_N_U16]], i64 0 19105// CHECK: ret i8 [[TMP1]] 19106uint8_t test_vqshrnh_n_u16(uint16_t a) { 19107 return (uint8_t)vqshrnh_n_u16(a, 8); 19108} 19109 19110// CHECK-LABEL: define i16 @test_vqshrns_n_u32(i32 %a) #0 { 19111// CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0 19112// CHECK: [[VQSHRNS_N_U32:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[TMP0]], i32 16) 19113// CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRNS_N_U32]], i64 0 19114// CHECK: ret i16 [[TMP1]] 19115uint16_t test_vqshrns_n_u32(uint32_t a) { 19116 return (uint16_t)vqshrns_n_u32(a, 16); 19117} 19118 19119// CHECK-LABEL: define i32 @test_vqshrnd_n_u64(i64 %a) #0 { 19120// CHECK: [[VQSHRND_N_U64:%.*]] = call i32 @llvm.aarch64.neon.uqshrn.i32(i64 %a, i32 32) 19121// CHECK: ret i32 [[VQSHRND_N_U64]] 19122uint32_t test_vqshrnd_n_u64(uint64_t a) { 19123 return (uint32_t)vqshrnd_n_u64(a, 32); 19124} 19125 19126// CHECK-LABEL: define i8 @test_vqrshrnh_n_s16(i16 %a) #0 { 19127// CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0 19128// CHECK: [[VQRSHRNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[TMP0]], i32 8) 19129// CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRNH_N_S16]], i64 0 19130// CHECK: ret i8 [[TMP1]] 19131int8_t test_vqrshrnh_n_s16(int16_t a) { 19132 return (int8_t)vqrshrnh_n_s16(a, 8); 19133} 19134 19135// CHECK-LABEL: define i16 @test_vqrshrns_n_s32(i32 %a) #0 { 19136// CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0 19137// CHECK: [[VQRSHRNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[TMP0]], i32 16) 19138// CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRNS_N_S32]], i64 0 19139// CHECK: ret i16 [[TMP1]] 19140int16_t test_vqrshrns_n_s32(int32_t a) { 19141 return (int16_t)vqrshrns_n_s32(a, 16); 19142} 19143 19144// CHECK-LABEL: define i32 @test_vqrshrnd_n_s64(i64 %a) #0 { 19145// CHECK: [[VQRSHRND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 %a, i32 32) 19146// CHECK: ret i32 [[VQRSHRND_N_S64]] 19147int32_t test_vqrshrnd_n_s64(int64_t a) { 19148 return (int32_t)vqrshrnd_n_s64(a, 32); 19149} 19150 19151// CHECK-LABEL: define i8 @test_vqrshrnh_n_u16(i16 %a) #0 { 19152// CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0 19153// CHECK: [[VQRSHRNH_N_U16:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[TMP0]], i32 8) 19154// CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRNH_N_U16]], i64 0 19155// CHECK: ret i8 [[TMP1]] 19156uint8_t test_vqrshrnh_n_u16(uint16_t a) { 19157 return (uint8_t)vqrshrnh_n_u16(a, 8); 19158} 19159 19160// CHECK-LABEL: define i16 @test_vqrshrns_n_u32(i32 %a) #0 { 19161// CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0 19162// CHECK: [[VQRSHRNS_N_U32:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[TMP0]], i32 16) 19163// CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRNS_N_U32]], i64 0 19164// CHECK: ret i16 [[TMP1]] 19165uint16_t test_vqrshrns_n_u32(uint32_t a) { 19166 return (uint16_t)vqrshrns_n_u32(a, 16); 19167} 19168 19169// CHECK-LABEL: define i32 @test_vqrshrnd_n_u64(i64 %a) #0 { 19170// CHECK: [[VQRSHRND_N_U64:%.*]] = call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 %a, i32 32) 19171// CHECK: ret i32 [[VQRSHRND_N_U64]] 19172uint32_t test_vqrshrnd_n_u64(uint64_t a) { 19173 return (uint32_t)vqrshrnd_n_u64(a, 32); 19174} 19175 19176// CHECK-LABEL: define i8 @test_vqshrunh_n_s16(i16 %a) #0 { 19177// CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0 19178// CHECK: [[VQSHRUNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[TMP0]], i32 8) 19179// CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRUNH_N_S16]], i64 0 19180// CHECK: ret i8 [[TMP1]] 19181int8_t test_vqshrunh_n_s16(int16_t a) { 19182 return (int8_t)vqshrunh_n_s16(a, 8); 19183} 19184 19185// CHECK-LABEL: define i16 @test_vqshruns_n_s32(i32 %a) #0 { 19186// CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0 19187// CHECK: [[VQSHRUNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[TMP0]], i32 16) 19188// CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRUNS_N_S32]], i64 0 19189// CHECK: ret i16 [[TMP1]] 19190int16_t test_vqshruns_n_s32(int32_t a) { 19191 return (int16_t)vqshruns_n_s32(a, 16); 19192} 19193 19194// CHECK-LABEL: define i32 @test_vqshrund_n_s64(i64 %a) #0 { 19195// CHECK: [[VQSHRUND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqshrun.i32(i64 %a, i32 32) 19196// CHECK: ret i32 [[VQSHRUND_N_S64]] 19197int32_t test_vqshrund_n_s64(int64_t a) { 19198 return (int32_t)vqshrund_n_s64(a, 32); 19199} 19200 19201// CHECK-LABEL: define i8 @test_vqrshrunh_n_s16(i16 %a) #0 { 19202// CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0 19203// CHECK: [[VQRSHRUNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[TMP0]], i32 8) 19204// CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRUNH_N_S16]], i64 0 19205// CHECK: ret i8 [[TMP1]] 19206int8_t test_vqrshrunh_n_s16(int16_t a) { 19207 return (int8_t)vqrshrunh_n_s16(a, 8); 19208} 19209 19210// CHECK-LABEL: define i16 @test_vqrshruns_n_s32(i32 %a) #0 { 19211// CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0 19212// CHECK: [[VQRSHRUNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[TMP0]], i32 16) 19213// CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRUNS_N_S32]], i64 0 19214// CHECK: ret i16 [[TMP1]] 19215int16_t test_vqrshruns_n_s32(int32_t a) { 19216 return (int16_t)vqrshruns_n_s32(a, 16); 19217} 19218 19219// CHECK-LABEL: define i32 @test_vqrshrund_n_s64(i64 %a) #0 { 19220// CHECK: [[VQRSHRUND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 %a, i32 32) 19221// CHECK: ret i32 [[VQRSHRUND_N_S64]] 19222int32_t test_vqrshrund_n_s64(int64_t a) { 19223 return (int32_t)vqrshrund_n_s64(a, 32); 19224} 19225 19226// CHECK-LABEL: define float @test_vcvts_n_f32_s32(i32 %a) #0 { 19227// CHECK: [[VCVTS_N_F32_S32:%.*]] = call float @llvm.aarch64.neon.vcvtfxs2fp.f32.i32(i32 %a, i32 1) 19228// CHECK: ret float [[VCVTS_N_F32_S32]] 19229float32_t test_vcvts_n_f32_s32(int32_t a) { 19230 return vcvts_n_f32_s32(a, 1); 19231} 19232 19233// CHECK-LABEL: define double @test_vcvtd_n_f64_s64(i64 %a) #0 { 19234// CHECK: [[VCVTD_N_F64_S64:%.*]] = call double @llvm.aarch64.neon.vcvtfxs2fp.f64.i64(i64 %a, i32 1) 19235// CHECK: ret double [[VCVTD_N_F64_S64]] 19236float64_t test_vcvtd_n_f64_s64(int64_t a) { 19237 return vcvtd_n_f64_s64(a, 1); 19238} 19239 19240// CHECK-LABEL: define float @test_vcvts_n_f32_u32(i32 %a) #0 { 19241// CHECK: [[VCVTS_N_F32_U32:%.*]] = call float @llvm.aarch64.neon.vcvtfxu2fp.f32.i32(i32 %a, i32 32) 19242// CHECK: ret float [[VCVTS_N_F32_U32]] 19243float32_t test_vcvts_n_f32_u32(uint32_t a) { 19244 return vcvts_n_f32_u32(a, 32); 19245} 19246 19247// CHECK-LABEL: define double @test_vcvtd_n_f64_u64(i64 %a) #0 { 19248// CHECK: [[VCVTD_N_F64_U64:%.*]] = call double @llvm.aarch64.neon.vcvtfxu2fp.f64.i64(i64 %a, i32 64) 19249// CHECK: ret double [[VCVTD_N_F64_U64]] 19250float64_t test_vcvtd_n_f64_u64(uint64_t a) { 19251 return vcvtd_n_f64_u64(a, 64); 19252} 19253 19254// CHECK-LABEL: define i32 @test_vcvts_n_s32_f32(float %a) #0 { 19255// CHECK: [[VCVTS_N_S32_F32:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f32(float %a, i32 1) 19256// CHECK: ret i32 [[VCVTS_N_S32_F32]] 19257int32_t test_vcvts_n_s32_f32(float32_t a) { 19258 return (int32_t)vcvts_n_s32_f32(a, 1); 19259} 19260 19261// CHECK-LABEL: define i64 @test_vcvtd_n_s64_f64(double %a) #0 { 19262// CHECK: [[VCVTD_N_S64_F64:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxs.i64.f64(double %a, i32 1) 19263// CHECK: ret i64 [[VCVTD_N_S64_F64]] 19264int64_t test_vcvtd_n_s64_f64(float64_t a) { 19265 return (int64_t)vcvtd_n_s64_f64(a, 1); 19266} 19267 19268// CHECK-LABEL: define i32 @test_vcvts_n_u32_f32(float %a) #0 { 19269// CHECK: [[VCVTS_N_U32_F32:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f32(float %a, i32 32) 19270// CHECK: ret i32 [[VCVTS_N_U32_F32]] 19271uint32_t test_vcvts_n_u32_f32(float32_t a) { 19272 return (uint32_t)vcvts_n_u32_f32(a, 32); 19273} 19274 19275// CHECK-LABEL: define i64 @test_vcvtd_n_u64_f64(double %a) #0 { 19276// CHECK: [[VCVTD_N_U64_F64:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxu.i64.f64(double %a, i32 64) 19277// CHECK: ret i64 [[VCVTD_N_U64_F64]] 19278uint64_t test_vcvtd_n_u64_f64(float64_t a) { 19279 return (uint64_t)vcvtd_n_u64_f64(a, 64); 19280} 19281 19282// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_s16(<4 x i16> %a) #0 { 19283// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 19284// CHECK: ret <8 x i8> [[TMP0]] 19285int8x8_t test_vreinterpret_s8_s16(int16x4_t a) { 19286 return vreinterpret_s8_s16(a); 19287} 19288 19289// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_s32(<2 x i32> %a) #0 { 19290// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 19291// CHECK: ret <8 x i8> [[TMP0]] 19292int8x8_t test_vreinterpret_s8_s32(int32x2_t a) { 19293 return vreinterpret_s8_s32(a); 19294} 19295 19296// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_s64(<1 x i64> %a) #0 { 19297// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 19298// CHECK: ret <8 x i8> [[TMP0]] 19299int8x8_t test_vreinterpret_s8_s64(int64x1_t a) { 19300 return vreinterpret_s8_s64(a); 19301} 19302 19303// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u8(<8 x i8> %a) #0 { 19304// CHECK: ret <8 x i8> %a 19305int8x8_t test_vreinterpret_s8_u8(uint8x8_t a) { 19306 return vreinterpret_s8_u8(a); 19307} 19308 19309// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u16(<4 x i16> %a) #0 { 19310// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 19311// CHECK: ret <8 x i8> [[TMP0]] 19312int8x8_t test_vreinterpret_s8_u16(uint16x4_t a) { 19313 return vreinterpret_s8_u16(a); 19314} 19315 19316// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u32(<2 x i32> %a) #0 { 19317// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 19318// CHECK: ret <8 x i8> [[TMP0]] 19319int8x8_t test_vreinterpret_s8_u32(uint32x2_t a) { 19320 return vreinterpret_s8_u32(a); 19321} 19322 19323// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u64(<1 x i64> %a) #0 { 19324// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 19325// CHECK: ret <8 x i8> [[TMP0]] 19326int8x8_t test_vreinterpret_s8_u64(uint64x1_t a) { 19327 return vreinterpret_s8_u64(a); 19328} 19329 19330// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_f16(<4 x half> %a) #0 { 19331// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8> 19332// CHECK: ret <8 x i8> [[TMP0]] 19333int8x8_t test_vreinterpret_s8_f16(float16x4_t a) { 19334 return vreinterpret_s8_f16(a); 19335} 19336 19337// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_f32(<2 x float> %a) #0 { 19338// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 19339// CHECK: ret <8 x i8> [[TMP0]] 19340int8x8_t test_vreinterpret_s8_f32(float32x2_t a) { 19341 return vreinterpret_s8_f32(a); 19342} 19343 19344// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_f64(<1 x double> %a) #0 { 19345// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 19346// CHECK: ret <8 x i8> [[TMP0]] 19347int8x8_t test_vreinterpret_s8_f64(float64x1_t a) { 19348 return vreinterpret_s8_f64(a); 19349} 19350 19351// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_p8(<8 x i8> %a) #0 { 19352// CHECK: ret <8 x i8> %a 19353int8x8_t test_vreinterpret_s8_p8(poly8x8_t a) { 19354 return vreinterpret_s8_p8(a); 19355} 19356 19357// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_p16(<4 x i16> %a) #0 { 19358// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 19359// CHECK: ret <8 x i8> [[TMP0]] 19360int8x8_t test_vreinterpret_s8_p16(poly16x4_t a) { 19361 return vreinterpret_s8_p16(a); 19362} 19363 19364// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_p64(<1 x i64> %a) #0 { 19365// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 19366// CHECK: ret <8 x i8> [[TMP0]] 19367int8x8_t test_vreinterpret_s8_p64(poly64x1_t a) { 19368 return vreinterpret_s8_p64(a); 19369} 19370 19371// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_s8(<8 x i8> %a) #0 { 19372// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> 19373// CHECK: ret <4 x i16> [[TMP0]] 19374int16x4_t test_vreinterpret_s16_s8(int8x8_t a) { 19375 return vreinterpret_s16_s8(a); 19376} 19377 19378// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_s32(<2 x i32> %a) #0 { 19379// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16> 19380// CHECK: ret <4 x i16> [[TMP0]] 19381int16x4_t test_vreinterpret_s16_s32(int32x2_t a) { 19382 return vreinterpret_s16_s32(a); 19383} 19384 19385// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_s64(<1 x i64> %a) #0 { 19386// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> 19387// CHECK: ret <4 x i16> [[TMP0]] 19388int16x4_t test_vreinterpret_s16_s64(int64x1_t a) { 19389 return vreinterpret_s16_s64(a); 19390} 19391 19392// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u8(<8 x i8> %a) #0 { 19393// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> 19394// CHECK: ret <4 x i16> [[TMP0]] 19395int16x4_t test_vreinterpret_s16_u8(uint8x8_t a) { 19396 return vreinterpret_s16_u8(a); 19397} 19398 19399// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u16(<4 x i16> %a) #0 { 19400// CHECK: ret <4 x i16> %a 19401int16x4_t test_vreinterpret_s16_u16(uint16x4_t a) { 19402 return vreinterpret_s16_u16(a); 19403} 19404 19405// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u32(<2 x i32> %a) #0 { 19406// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16> 19407// CHECK: ret <4 x i16> [[TMP0]] 19408int16x4_t test_vreinterpret_s16_u32(uint32x2_t a) { 19409 return vreinterpret_s16_u32(a); 19410} 19411 19412// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u64(<1 x i64> %a) #0 { 19413// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> 19414// CHECK: ret <4 x i16> [[TMP0]] 19415int16x4_t test_vreinterpret_s16_u64(uint64x1_t a) { 19416 return vreinterpret_s16_u64(a); 19417} 19418 19419// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_f16(<4 x half> %a) #0 { 19420// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16> 19421// CHECK: ret <4 x i16> [[TMP0]] 19422int16x4_t test_vreinterpret_s16_f16(float16x4_t a) { 19423 return vreinterpret_s16_f16(a); 19424} 19425 19426// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_f32(<2 x float> %a) #0 { 19427// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16> 19428// CHECK: ret <4 x i16> [[TMP0]] 19429int16x4_t test_vreinterpret_s16_f32(float32x2_t a) { 19430 return vreinterpret_s16_f32(a); 19431} 19432 19433// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_f64(<1 x double> %a) #0 { 19434// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16> 19435// CHECK: ret <4 x i16> [[TMP0]] 19436int16x4_t test_vreinterpret_s16_f64(float64x1_t a) { 19437 return vreinterpret_s16_f64(a); 19438} 19439 19440// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_p8(<8 x i8> %a) #0 { 19441// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> 19442// CHECK: ret <4 x i16> [[TMP0]] 19443int16x4_t test_vreinterpret_s16_p8(poly8x8_t a) { 19444 return vreinterpret_s16_p8(a); 19445} 19446 19447// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_p16(<4 x i16> %a) #0 { 19448// CHECK: ret <4 x i16> %a 19449int16x4_t test_vreinterpret_s16_p16(poly16x4_t a) { 19450 return vreinterpret_s16_p16(a); 19451} 19452 19453// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_p64(<1 x i64> %a) #0 { 19454// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> 19455// CHECK: ret <4 x i16> [[TMP0]] 19456int16x4_t test_vreinterpret_s16_p64(poly64x1_t a) { 19457 return vreinterpret_s16_p64(a); 19458} 19459 19460// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_s8(<8 x i8> %a) #0 { 19461// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32> 19462// CHECK: ret <2 x i32> [[TMP0]] 19463int32x2_t test_vreinterpret_s32_s8(int8x8_t a) { 19464 return vreinterpret_s32_s8(a); 19465} 19466 19467// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_s16(<4 x i16> %a) #0 { 19468// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32> 19469// CHECK: ret <2 x i32> [[TMP0]] 19470int32x2_t test_vreinterpret_s32_s16(int16x4_t a) { 19471 return vreinterpret_s32_s16(a); 19472} 19473 19474// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_s64(<1 x i64> %a) #0 { 19475// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32> 19476// CHECK: ret <2 x i32> [[TMP0]] 19477int32x2_t test_vreinterpret_s32_s64(int64x1_t a) { 19478 return vreinterpret_s32_s64(a); 19479} 19480 19481// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u8(<8 x i8> %a) #0 { 19482// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32> 19483// CHECK: ret <2 x i32> [[TMP0]] 19484int32x2_t test_vreinterpret_s32_u8(uint8x8_t a) { 19485 return vreinterpret_s32_u8(a); 19486} 19487 19488// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u16(<4 x i16> %a) #0 { 19489// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32> 19490// CHECK: ret <2 x i32> [[TMP0]] 19491int32x2_t test_vreinterpret_s32_u16(uint16x4_t a) { 19492 return vreinterpret_s32_u16(a); 19493} 19494 19495// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u32(<2 x i32> %a) #0 { 19496// CHECK: ret <2 x i32> %a 19497int32x2_t test_vreinterpret_s32_u32(uint32x2_t a) { 19498 return vreinterpret_s32_u32(a); 19499} 19500 19501// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u64(<1 x i64> %a) #0 { 19502// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32> 19503// CHECK: ret <2 x i32> [[TMP0]] 19504int32x2_t test_vreinterpret_s32_u64(uint64x1_t a) { 19505 return vreinterpret_s32_u64(a); 19506} 19507 19508// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_f16(<4 x half> %a) #0 { 19509// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32> 19510// CHECK: ret <2 x i32> [[TMP0]] 19511int32x2_t test_vreinterpret_s32_f16(float16x4_t a) { 19512 return vreinterpret_s32_f16(a); 19513} 19514 19515// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_f32(<2 x float> %a) #0 { 19516// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32> 19517// CHECK: ret <2 x i32> [[TMP0]] 19518int32x2_t test_vreinterpret_s32_f32(float32x2_t a) { 19519 return vreinterpret_s32_f32(a); 19520} 19521 19522// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_f64(<1 x double> %a) #0 { 19523// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x i32> 19524// CHECK: ret <2 x i32> [[TMP0]] 19525int32x2_t test_vreinterpret_s32_f64(float64x1_t a) { 19526 return vreinterpret_s32_f64(a); 19527} 19528 19529// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_p8(<8 x i8> %a) #0 { 19530// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32> 19531// CHECK: ret <2 x i32> [[TMP0]] 19532int32x2_t test_vreinterpret_s32_p8(poly8x8_t a) { 19533 return vreinterpret_s32_p8(a); 19534} 19535 19536// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_p16(<4 x i16> %a) #0 { 19537// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32> 19538// CHECK: ret <2 x i32> [[TMP0]] 19539int32x2_t test_vreinterpret_s32_p16(poly16x4_t a) { 19540 return vreinterpret_s32_p16(a); 19541} 19542 19543// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_p64(<1 x i64> %a) #0 { 19544// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32> 19545// CHECK: ret <2 x i32> [[TMP0]] 19546int32x2_t test_vreinterpret_s32_p64(poly64x1_t a) { 19547 return vreinterpret_s32_p64(a); 19548} 19549 19550// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_s8(<8 x i8> %a) #0 { 19551// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> 19552// CHECK: ret <1 x i64> [[TMP0]] 19553int64x1_t test_vreinterpret_s64_s8(int8x8_t a) { 19554 return vreinterpret_s64_s8(a); 19555} 19556 19557// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_s16(<4 x i16> %a) #0 { 19558// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> 19559// CHECK: ret <1 x i64> [[TMP0]] 19560int64x1_t test_vreinterpret_s64_s16(int16x4_t a) { 19561 return vreinterpret_s64_s16(a); 19562} 19563 19564// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_s32(<2 x i32> %a) #0 { 19565// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64> 19566// CHECK: ret <1 x i64> [[TMP0]] 19567int64x1_t test_vreinterpret_s64_s32(int32x2_t a) { 19568 return vreinterpret_s64_s32(a); 19569} 19570 19571// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u8(<8 x i8> %a) #0 { 19572// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> 19573// CHECK: ret <1 x i64> [[TMP0]] 19574int64x1_t test_vreinterpret_s64_u8(uint8x8_t a) { 19575 return vreinterpret_s64_u8(a); 19576} 19577 19578// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u16(<4 x i16> %a) #0 { 19579// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> 19580// CHECK: ret <1 x i64> [[TMP0]] 19581int64x1_t test_vreinterpret_s64_u16(uint16x4_t a) { 19582 return vreinterpret_s64_u16(a); 19583} 19584 19585// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u32(<2 x i32> %a) #0 { 19586// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64> 19587// CHECK: ret <1 x i64> [[TMP0]] 19588int64x1_t test_vreinterpret_s64_u32(uint32x2_t a) { 19589 return vreinterpret_s64_u32(a); 19590} 19591 19592// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u64(<1 x i64> %a) #0 { 19593// CHECK: ret <1 x i64> %a 19594int64x1_t test_vreinterpret_s64_u64(uint64x1_t a) { 19595 return vreinterpret_s64_u64(a); 19596} 19597 19598// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_f16(<4 x half> %a) #0 { 19599// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64> 19600// CHECK: ret <1 x i64> [[TMP0]] 19601int64x1_t test_vreinterpret_s64_f16(float16x4_t a) { 19602 return vreinterpret_s64_f16(a); 19603} 19604 19605// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_f32(<2 x float> %a) #0 { 19606// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64> 19607// CHECK: ret <1 x i64> [[TMP0]] 19608int64x1_t test_vreinterpret_s64_f32(float32x2_t a) { 19609 return vreinterpret_s64_f32(a); 19610} 19611 19612// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_f64(<1 x double> %a) #0 { 19613// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64> 19614// CHECK: ret <1 x i64> [[TMP0]] 19615int64x1_t test_vreinterpret_s64_f64(float64x1_t a) { 19616 return vreinterpret_s64_f64(a); 19617} 19618 19619// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_p8(<8 x i8> %a) #0 { 19620// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> 19621// CHECK: ret <1 x i64> [[TMP0]] 19622int64x1_t test_vreinterpret_s64_p8(poly8x8_t a) { 19623 return vreinterpret_s64_p8(a); 19624} 19625 19626// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_p16(<4 x i16> %a) #0 { 19627// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> 19628// CHECK: ret <1 x i64> [[TMP0]] 19629int64x1_t test_vreinterpret_s64_p16(poly16x4_t a) { 19630 return vreinterpret_s64_p16(a); 19631} 19632 19633// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_p64(<1 x i64> %a) #0 { 19634// CHECK: ret <1 x i64> %a 19635int64x1_t test_vreinterpret_s64_p64(poly64x1_t a) { 19636 return vreinterpret_s64_p64(a); 19637} 19638 19639// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s8(<8 x i8> %a) #0 { 19640// CHECK: ret <8 x i8> %a 19641uint8x8_t test_vreinterpret_u8_s8(int8x8_t a) { 19642 return vreinterpret_u8_s8(a); 19643} 19644 19645// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s16(<4 x i16> %a) #0 { 19646// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 19647// CHECK: ret <8 x i8> [[TMP0]] 19648uint8x8_t test_vreinterpret_u8_s16(int16x4_t a) { 19649 return vreinterpret_u8_s16(a); 19650} 19651 19652// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s32(<2 x i32> %a) #0 { 19653// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 19654// CHECK: ret <8 x i8> [[TMP0]] 19655uint8x8_t test_vreinterpret_u8_s32(int32x2_t a) { 19656 return vreinterpret_u8_s32(a); 19657} 19658 19659// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s64(<1 x i64> %a) #0 { 19660// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 19661// CHECK: ret <8 x i8> [[TMP0]] 19662uint8x8_t test_vreinterpret_u8_s64(int64x1_t a) { 19663 return vreinterpret_u8_s64(a); 19664} 19665 19666// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_u16(<4 x i16> %a) #0 { 19667// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 19668// CHECK: ret <8 x i8> [[TMP0]] 19669uint8x8_t test_vreinterpret_u8_u16(uint16x4_t a) { 19670 return vreinterpret_u8_u16(a); 19671} 19672 19673// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_u32(<2 x i32> %a) #0 { 19674// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 19675// CHECK: ret <8 x i8> [[TMP0]] 19676uint8x8_t test_vreinterpret_u8_u32(uint32x2_t a) { 19677 return vreinterpret_u8_u32(a); 19678} 19679 19680// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_u64(<1 x i64> %a) #0 { 19681// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 19682// CHECK: ret <8 x i8> [[TMP0]] 19683uint8x8_t test_vreinterpret_u8_u64(uint64x1_t a) { 19684 return vreinterpret_u8_u64(a); 19685} 19686 19687// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_f16(<4 x half> %a) #0 { 19688// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8> 19689// CHECK: ret <8 x i8> [[TMP0]] 19690uint8x8_t test_vreinterpret_u8_f16(float16x4_t a) { 19691 return vreinterpret_u8_f16(a); 19692} 19693 19694// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_f32(<2 x float> %a) #0 { 19695// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 19696// CHECK: ret <8 x i8> [[TMP0]] 19697uint8x8_t test_vreinterpret_u8_f32(float32x2_t a) { 19698 return vreinterpret_u8_f32(a); 19699} 19700 19701// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_f64(<1 x double> %a) #0 { 19702// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 19703// CHECK: ret <8 x i8> [[TMP0]] 19704uint8x8_t test_vreinterpret_u8_f64(float64x1_t a) { 19705 return vreinterpret_u8_f64(a); 19706} 19707 19708// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_p8(<8 x i8> %a) #0 { 19709// CHECK: ret <8 x i8> %a 19710uint8x8_t test_vreinterpret_u8_p8(poly8x8_t a) { 19711 return vreinterpret_u8_p8(a); 19712} 19713 19714// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_p16(<4 x i16> %a) #0 { 19715// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 19716// CHECK: ret <8 x i8> [[TMP0]] 19717uint8x8_t test_vreinterpret_u8_p16(poly16x4_t a) { 19718 return vreinterpret_u8_p16(a); 19719} 19720 19721// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_p64(<1 x i64> %a) #0 { 19722// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 19723// CHECK: ret <8 x i8> [[TMP0]] 19724uint8x8_t test_vreinterpret_u8_p64(poly64x1_t a) { 19725 return vreinterpret_u8_p64(a); 19726} 19727 19728// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s8(<8 x i8> %a) #0 { 19729// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> 19730// CHECK: ret <4 x i16> [[TMP0]] 19731uint16x4_t test_vreinterpret_u16_s8(int8x8_t a) { 19732 return vreinterpret_u16_s8(a); 19733} 19734 19735// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s16(<4 x i16> %a) #0 { 19736// CHECK: ret <4 x i16> %a 19737uint16x4_t test_vreinterpret_u16_s16(int16x4_t a) { 19738 return vreinterpret_u16_s16(a); 19739} 19740 19741// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s32(<2 x i32> %a) #0 { 19742// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16> 19743// CHECK: ret <4 x i16> [[TMP0]] 19744uint16x4_t test_vreinterpret_u16_s32(int32x2_t a) { 19745 return vreinterpret_u16_s32(a); 19746} 19747 19748// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s64(<1 x i64> %a) #0 { 19749// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> 19750// CHECK: ret <4 x i16> [[TMP0]] 19751uint16x4_t test_vreinterpret_u16_s64(int64x1_t a) { 19752 return vreinterpret_u16_s64(a); 19753} 19754 19755// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_u8(<8 x i8> %a) #0 { 19756// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> 19757// CHECK: ret <4 x i16> [[TMP0]] 19758uint16x4_t test_vreinterpret_u16_u8(uint8x8_t a) { 19759 return vreinterpret_u16_u8(a); 19760} 19761 19762// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_u32(<2 x i32> %a) #0 { 19763// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16> 19764// CHECK: ret <4 x i16> [[TMP0]] 19765uint16x4_t test_vreinterpret_u16_u32(uint32x2_t a) { 19766 return vreinterpret_u16_u32(a); 19767} 19768 19769// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_u64(<1 x i64> %a) #0 { 19770// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> 19771// CHECK: ret <4 x i16> [[TMP0]] 19772uint16x4_t test_vreinterpret_u16_u64(uint64x1_t a) { 19773 return vreinterpret_u16_u64(a); 19774} 19775 19776// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_f16(<4 x half> %a) #0 { 19777// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16> 19778// CHECK: ret <4 x i16> [[TMP0]] 19779uint16x4_t test_vreinterpret_u16_f16(float16x4_t a) { 19780 return vreinterpret_u16_f16(a); 19781} 19782 19783// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_f32(<2 x float> %a) #0 { 19784// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16> 19785// CHECK: ret <4 x i16> [[TMP0]] 19786uint16x4_t test_vreinterpret_u16_f32(float32x2_t a) { 19787 return vreinterpret_u16_f32(a); 19788} 19789 19790// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_f64(<1 x double> %a) #0 { 19791// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16> 19792// CHECK: ret <4 x i16> [[TMP0]] 19793uint16x4_t test_vreinterpret_u16_f64(float64x1_t a) { 19794 return vreinterpret_u16_f64(a); 19795} 19796 19797// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_p8(<8 x i8> %a) #0 { 19798// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> 19799// CHECK: ret <4 x i16> [[TMP0]] 19800uint16x4_t test_vreinterpret_u16_p8(poly8x8_t a) { 19801 return vreinterpret_u16_p8(a); 19802} 19803 19804// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_p16(<4 x i16> %a) #0 { 19805// CHECK: ret <4 x i16> %a 19806uint16x4_t test_vreinterpret_u16_p16(poly16x4_t a) { 19807 return vreinterpret_u16_p16(a); 19808} 19809 19810// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_p64(<1 x i64> %a) #0 { 19811// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> 19812// CHECK: ret <4 x i16> [[TMP0]] 19813uint16x4_t test_vreinterpret_u16_p64(poly64x1_t a) { 19814 return vreinterpret_u16_p64(a); 19815} 19816 19817// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s8(<8 x i8> %a) #0 { 19818// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32> 19819// CHECK: ret <2 x i32> [[TMP0]] 19820uint32x2_t test_vreinterpret_u32_s8(int8x8_t a) { 19821 return vreinterpret_u32_s8(a); 19822} 19823 19824// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s16(<4 x i16> %a) #0 { 19825// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32> 19826// CHECK: ret <2 x i32> [[TMP0]] 19827uint32x2_t test_vreinterpret_u32_s16(int16x4_t a) { 19828 return vreinterpret_u32_s16(a); 19829} 19830 19831// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s32(<2 x i32> %a) #0 { 19832// CHECK: ret <2 x i32> %a 19833uint32x2_t test_vreinterpret_u32_s32(int32x2_t a) { 19834 return vreinterpret_u32_s32(a); 19835} 19836 19837// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s64(<1 x i64> %a) #0 { 19838// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32> 19839// CHECK: ret <2 x i32> [[TMP0]] 19840uint32x2_t test_vreinterpret_u32_s64(int64x1_t a) { 19841 return vreinterpret_u32_s64(a); 19842} 19843 19844// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_u8(<8 x i8> %a) #0 { 19845// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32> 19846// CHECK: ret <2 x i32> [[TMP0]] 19847uint32x2_t test_vreinterpret_u32_u8(uint8x8_t a) { 19848 return vreinterpret_u32_u8(a); 19849} 19850 19851// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_u16(<4 x i16> %a) #0 { 19852// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32> 19853// CHECK: ret <2 x i32> [[TMP0]] 19854uint32x2_t test_vreinterpret_u32_u16(uint16x4_t a) { 19855 return vreinterpret_u32_u16(a); 19856} 19857 19858// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_u64(<1 x i64> %a) #0 { 19859// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32> 19860// CHECK: ret <2 x i32> [[TMP0]] 19861uint32x2_t test_vreinterpret_u32_u64(uint64x1_t a) { 19862 return vreinterpret_u32_u64(a); 19863} 19864 19865// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_f16(<4 x half> %a) #0 { 19866// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32> 19867// CHECK: ret <2 x i32> [[TMP0]] 19868uint32x2_t test_vreinterpret_u32_f16(float16x4_t a) { 19869 return vreinterpret_u32_f16(a); 19870} 19871 19872// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_f32(<2 x float> %a) #0 { 19873// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32> 19874// CHECK: ret <2 x i32> [[TMP0]] 19875uint32x2_t test_vreinterpret_u32_f32(float32x2_t a) { 19876 return vreinterpret_u32_f32(a); 19877} 19878 19879// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_f64(<1 x double> %a) #0 { 19880// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x i32> 19881// CHECK: ret <2 x i32> [[TMP0]] 19882uint32x2_t test_vreinterpret_u32_f64(float64x1_t a) { 19883 return vreinterpret_u32_f64(a); 19884} 19885 19886// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_p8(<8 x i8> %a) #0 { 19887// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32> 19888// CHECK: ret <2 x i32> [[TMP0]] 19889uint32x2_t test_vreinterpret_u32_p8(poly8x8_t a) { 19890 return vreinterpret_u32_p8(a); 19891} 19892 19893// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_p16(<4 x i16> %a) #0 { 19894// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32> 19895// CHECK: ret <2 x i32> [[TMP0]] 19896uint32x2_t test_vreinterpret_u32_p16(poly16x4_t a) { 19897 return vreinterpret_u32_p16(a); 19898} 19899 19900// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_p64(<1 x i64> %a) #0 { 19901// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32> 19902// CHECK: ret <2 x i32> [[TMP0]] 19903uint32x2_t test_vreinterpret_u32_p64(poly64x1_t a) { 19904 return vreinterpret_u32_p64(a); 19905} 19906 19907// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s8(<8 x i8> %a) #0 { 19908// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> 19909// CHECK: ret <1 x i64> [[TMP0]] 19910uint64x1_t test_vreinterpret_u64_s8(int8x8_t a) { 19911 return vreinterpret_u64_s8(a); 19912} 19913 19914// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s16(<4 x i16> %a) #0 { 19915// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> 19916// CHECK: ret <1 x i64> [[TMP0]] 19917uint64x1_t test_vreinterpret_u64_s16(int16x4_t a) { 19918 return vreinterpret_u64_s16(a); 19919} 19920 19921// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s32(<2 x i32> %a) #0 { 19922// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64> 19923// CHECK: ret <1 x i64> [[TMP0]] 19924uint64x1_t test_vreinterpret_u64_s32(int32x2_t a) { 19925 return vreinterpret_u64_s32(a); 19926} 19927 19928// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s64(<1 x i64> %a) #0 { 19929// CHECK: ret <1 x i64> %a 19930uint64x1_t test_vreinterpret_u64_s64(int64x1_t a) { 19931 return vreinterpret_u64_s64(a); 19932} 19933 19934// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_u8(<8 x i8> %a) #0 { 19935// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> 19936// CHECK: ret <1 x i64> [[TMP0]] 19937uint64x1_t test_vreinterpret_u64_u8(uint8x8_t a) { 19938 return vreinterpret_u64_u8(a); 19939} 19940 19941// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_u16(<4 x i16> %a) #0 { 19942// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> 19943// CHECK: ret <1 x i64> [[TMP0]] 19944uint64x1_t test_vreinterpret_u64_u16(uint16x4_t a) { 19945 return vreinterpret_u64_u16(a); 19946} 19947 19948// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_u32(<2 x i32> %a) #0 { 19949// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64> 19950// CHECK: ret <1 x i64> [[TMP0]] 19951uint64x1_t test_vreinterpret_u64_u32(uint32x2_t a) { 19952 return vreinterpret_u64_u32(a); 19953} 19954 19955// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_f16(<4 x half> %a) #0 { 19956// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64> 19957// CHECK: ret <1 x i64> [[TMP0]] 19958uint64x1_t test_vreinterpret_u64_f16(float16x4_t a) { 19959 return vreinterpret_u64_f16(a); 19960} 19961 19962// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_f32(<2 x float> %a) #0 { 19963// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64> 19964// CHECK: ret <1 x i64> [[TMP0]] 19965uint64x1_t test_vreinterpret_u64_f32(float32x2_t a) { 19966 return vreinterpret_u64_f32(a); 19967} 19968 19969// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_f64(<1 x double> %a) #0 { 19970// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64> 19971// CHECK: ret <1 x i64> [[TMP0]] 19972uint64x1_t test_vreinterpret_u64_f64(float64x1_t a) { 19973 return vreinterpret_u64_f64(a); 19974} 19975 19976// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_p8(<8 x i8> %a) #0 { 19977// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> 19978// CHECK: ret <1 x i64> [[TMP0]] 19979uint64x1_t test_vreinterpret_u64_p8(poly8x8_t a) { 19980 return vreinterpret_u64_p8(a); 19981} 19982 19983// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_p16(<4 x i16> %a) #0 { 19984// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> 19985// CHECK: ret <1 x i64> [[TMP0]] 19986uint64x1_t test_vreinterpret_u64_p16(poly16x4_t a) { 19987 return vreinterpret_u64_p16(a); 19988} 19989 19990// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_p64(<1 x i64> %a) #0 { 19991// CHECK: ret <1 x i64> %a 19992uint64x1_t test_vreinterpret_u64_p64(poly64x1_t a) { 19993 return vreinterpret_u64_p64(a); 19994} 19995 19996// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s8(<8 x i8> %a) #0 { 19997// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half> 19998// CHECK: ret <4 x half> [[TMP0]] 19999float16x4_t test_vreinterpret_f16_s8(int8x8_t a) { 20000 return vreinterpret_f16_s8(a); 20001} 20002 20003// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s16(<4 x i16> %a) #0 { 20004// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half> 20005// CHECK: ret <4 x half> [[TMP0]] 20006float16x4_t test_vreinterpret_f16_s16(int16x4_t a) { 20007 return vreinterpret_f16_s16(a); 20008} 20009 20010// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s32(<2 x i32> %a) #0 { 20011// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half> 20012// CHECK: ret <4 x half> [[TMP0]] 20013float16x4_t test_vreinterpret_f16_s32(int32x2_t a) { 20014 return vreinterpret_f16_s32(a); 20015} 20016 20017// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s64(<1 x i64> %a) #0 { 20018// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half> 20019// CHECK: ret <4 x half> [[TMP0]] 20020float16x4_t test_vreinterpret_f16_s64(int64x1_t a) { 20021 return vreinterpret_f16_s64(a); 20022} 20023 20024// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u8(<8 x i8> %a) #0 { 20025// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half> 20026// CHECK: ret <4 x half> [[TMP0]] 20027float16x4_t test_vreinterpret_f16_u8(uint8x8_t a) { 20028 return vreinterpret_f16_u8(a); 20029} 20030 20031// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u16(<4 x i16> %a) #0 { 20032// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half> 20033// CHECK: ret <4 x half> [[TMP0]] 20034float16x4_t test_vreinterpret_f16_u16(uint16x4_t a) { 20035 return vreinterpret_f16_u16(a); 20036} 20037 20038// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u32(<2 x i32> %a) #0 { 20039// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half> 20040// CHECK: ret <4 x half> [[TMP0]] 20041float16x4_t test_vreinterpret_f16_u32(uint32x2_t a) { 20042 return vreinterpret_f16_u32(a); 20043} 20044 20045// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u64(<1 x i64> %a) #0 { 20046// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half> 20047// CHECK: ret <4 x half> [[TMP0]] 20048float16x4_t test_vreinterpret_f16_u64(uint64x1_t a) { 20049 return vreinterpret_f16_u64(a); 20050} 20051 20052// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_f32(<2 x float> %a) #0 { 20053// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x half> 20054// CHECK: ret <4 x half> [[TMP0]] 20055float16x4_t test_vreinterpret_f16_f32(float32x2_t a) { 20056 return vreinterpret_f16_f32(a); 20057} 20058 20059// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_f64(<1 x double> %a) #0 { 20060// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x half> 20061// CHECK: ret <4 x half> [[TMP0]] 20062float16x4_t test_vreinterpret_f16_f64(float64x1_t a) { 20063 return vreinterpret_f16_f64(a); 20064} 20065 20066// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_p8(<8 x i8> %a) #0 { 20067// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half> 20068// CHECK: ret <4 x half> [[TMP0]] 20069float16x4_t test_vreinterpret_f16_p8(poly8x8_t a) { 20070 return vreinterpret_f16_p8(a); 20071} 20072 20073// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_p16(<4 x i16> %a) #0 { 20074// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half> 20075// CHECK: ret <4 x half> [[TMP0]] 20076float16x4_t test_vreinterpret_f16_p16(poly16x4_t a) { 20077 return vreinterpret_f16_p16(a); 20078} 20079 20080// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_p64(<1 x i64> %a) #0 { 20081// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half> 20082// CHECK: ret <4 x half> [[TMP0]] 20083float16x4_t test_vreinterpret_f16_p64(poly64x1_t a) { 20084 return vreinterpret_f16_p64(a); 20085} 20086 20087// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s8(<8 x i8> %a) #0 { 20088// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float> 20089// CHECK: ret <2 x float> [[TMP0]] 20090float32x2_t test_vreinterpret_f32_s8(int8x8_t a) { 20091 return vreinterpret_f32_s8(a); 20092} 20093 20094// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s16(<4 x i16> %a) #0 { 20095// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float> 20096// CHECK: ret <2 x float> [[TMP0]] 20097float32x2_t test_vreinterpret_f32_s16(int16x4_t a) { 20098 return vreinterpret_f32_s16(a); 20099} 20100 20101// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s32(<2 x i32> %a) #0 { 20102// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float> 20103// CHECK: ret <2 x float> [[TMP0]] 20104float32x2_t test_vreinterpret_f32_s32(int32x2_t a) { 20105 return vreinterpret_f32_s32(a); 20106} 20107 20108// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s64(<1 x i64> %a) #0 { 20109// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float> 20110// CHECK: ret <2 x float> [[TMP0]] 20111float32x2_t test_vreinterpret_f32_s64(int64x1_t a) { 20112 return vreinterpret_f32_s64(a); 20113} 20114 20115// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u8(<8 x i8> %a) #0 { 20116// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float> 20117// CHECK: ret <2 x float> [[TMP0]] 20118float32x2_t test_vreinterpret_f32_u8(uint8x8_t a) { 20119 return vreinterpret_f32_u8(a); 20120} 20121 20122// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u16(<4 x i16> %a) #0 { 20123// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float> 20124// CHECK: ret <2 x float> [[TMP0]] 20125float32x2_t test_vreinterpret_f32_u16(uint16x4_t a) { 20126 return vreinterpret_f32_u16(a); 20127} 20128 20129// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u32(<2 x i32> %a) #0 { 20130// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float> 20131// CHECK: ret <2 x float> [[TMP0]] 20132float32x2_t test_vreinterpret_f32_u32(uint32x2_t a) { 20133 return vreinterpret_f32_u32(a); 20134} 20135 20136// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u64(<1 x i64> %a) #0 { 20137// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float> 20138// CHECK: ret <2 x float> [[TMP0]] 20139float32x2_t test_vreinterpret_f32_u64(uint64x1_t a) { 20140 return vreinterpret_f32_u64(a); 20141} 20142 20143// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_f16(<4 x half> %a) #0 { 20144// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x float> 20145// CHECK: ret <2 x float> [[TMP0]] 20146float32x2_t test_vreinterpret_f32_f16(float16x4_t a) { 20147 return vreinterpret_f32_f16(a); 20148} 20149 20150// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_f64(<1 x double> %a) #0 { 20151// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x float> 20152// CHECK: ret <2 x float> [[TMP0]] 20153float32x2_t test_vreinterpret_f32_f64(float64x1_t a) { 20154 return vreinterpret_f32_f64(a); 20155} 20156 20157// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_p8(<8 x i8> %a) #0 { 20158// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float> 20159// CHECK: ret <2 x float> [[TMP0]] 20160float32x2_t test_vreinterpret_f32_p8(poly8x8_t a) { 20161 return vreinterpret_f32_p8(a); 20162} 20163 20164// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_p16(<4 x i16> %a) #0 { 20165// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float> 20166// CHECK: ret <2 x float> [[TMP0]] 20167float32x2_t test_vreinterpret_f32_p16(poly16x4_t a) { 20168 return vreinterpret_f32_p16(a); 20169} 20170 20171// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_p64(<1 x i64> %a) #0 { 20172// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float> 20173// CHECK: ret <2 x float> [[TMP0]] 20174float32x2_t test_vreinterpret_f32_p64(poly64x1_t a) { 20175 return vreinterpret_f32_p64(a); 20176} 20177 20178// CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_s8(<8 x i8> %a) #0 { 20179// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double> 20180// CHECK: ret <1 x double> [[TMP0]] 20181float64x1_t test_vreinterpret_f64_s8(int8x8_t a) { 20182 return vreinterpret_f64_s8(a); 20183} 20184 20185// CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_s16(<4 x i16> %a) #0 { 20186// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double> 20187// CHECK: ret <1 x double> [[TMP0]] 20188float64x1_t test_vreinterpret_f64_s16(int16x4_t a) { 20189 return vreinterpret_f64_s16(a); 20190} 20191 20192// CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_s32(<2 x i32> %a) #0 { 20193// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x double> 20194// CHECK: ret <1 x double> [[TMP0]] 20195float64x1_t test_vreinterpret_f64_s32(int32x2_t a) { 20196 return vreinterpret_f64_s32(a); 20197} 20198 20199// CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_s64(<1 x i64> %a) #0 { 20200// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double> 20201// CHECK: ret <1 x double> [[TMP0]] 20202float64x1_t test_vreinterpret_f64_s64(int64x1_t a) { 20203 return vreinterpret_f64_s64(a); 20204} 20205 20206// CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_u8(<8 x i8> %a) #0 { 20207// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double> 20208// CHECK: ret <1 x double> [[TMP0]] 20209float64x1_t test_vreinterpret_f64_u8(uint8x8_t a) { 20210 return vreinterpret_f64_u8(a); 20211} 20212 20213// CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_u16(<4 x i16> %a) #0 { 20214// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double> 20215// CHECK: ret <1 x double> [[TMP0]] 20216float64x1_t test_vreinterpret_f64_u16(uint16x4_t a) { 20217 return vreinterpret_f64_u16(a); 20218} 20219 20220// CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_u32(<2 x i32> %a) #0 { 20221// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x double> 20222// CHECK: ret <1 x double> [[TMP0]] 20223float64x1_t test_vreinterpret_f64_u32(uint32x2_t a) { 20224 return vreinterpret_f64_u32(a); 20225} 20226 20227// CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_u64(<1 x i64> %a) #0 { 20228// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double> 20229// CHECK: ret <1 x double> [[TMP0]] 20230float64x1_t test_vreinterpret_f64_u64(uint64x1_t a) { 20231 return vreinterpret_f64_u64(a); 20232} 20233 20234// CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_f16(<4 x half> %a) #0 { 20235// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x double> 20236// CHECK: ret <1 x double> [[TMP0]] 20237float64x1_t test_vreinterpret_f64_f16(float16x4_t a) { 20238 return vreinterpret_f64_f16(a); 20239} 20240 20241// CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_f32(<2 x float> %a) #0 { 20242// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x double> 20243// CHECK: ret <1 x double> [[TMP0]] 20244float64x1_t test_vreinterpret_f64_f32(float32x2_t a) { 20245 return vreinterpret_f64_f32(a); 20246} 20247 20248// CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_p8(<8 x i8> %a) #0 { 20249// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double> 20250// CHECK: ret <1 x double> [[TMP0]] 20251float64x1_t test_vreinterpret_f64_p8(poly8x8_t a) { 20252 return vreinterpret_f64_p8(a); 20253} 20254 20255// CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_p16(<4 x i16> %a) #0 { 20256// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double> 20257// CHECK: ret <1 x double> [[TMP0]] 20258float64x1_t test_vreinterpret_f64_p16(poly16x4_t a) { 20259 return vreinterpret_f64_p16(a); 20260} 20261 20262// CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_p64(<1 x i64> %a) #0 { 20263// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double> 20264// CHECK: ret <1 x double> [[TMP0]] 20265float64x1_t test_vreinterpret_f64_p64(poly64x1_t a) { 20266 return vreinterpret_f64_p64(a); 20267} 20268 20269// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s8(<8 x i8> %a) #0 { 20270// CHECK: ret <8 x i8> %a 20271poly8x8_t test_vreinterpret_p8_s8(int8x8_t a) { 20272 return vreinterpret_p8_s8(a); 20273} 20274 20275// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s16(<4 x i16> %a) #0 { 20276// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 20277// CHECK: ret <8 x i8> [[TMP0]] 20278poly8x8_t test_vreinterpret_p8_s16(int16x4_t a) { 20279 return vreinterpret_p8_s16(a); 20280} 20281 20282// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s32(<2 x i32> %a) #0 { 20283// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 20284// CHECK: ret <8 x i8> [[TMP0]] 20285poly8x8_t test_vreinterpret_p8_s32(int32x2_t a) { 20286 return vreinterpret_p8_s32(a); 20287} 20288 20289// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s64(<1 x i64> %a) #0 { 20290// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 20291// CHECK: ret <8 x i8> [[TMP0]] 20292poly8x8_t test_vreinterpret_p8_s64(int64x1_t a) { 20293 return vreinterpret_p8_s64(a); 20294} 20295 20296// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u8(<8 x i8> %a) #0 { 20297// CHECK: ret <8 x i8> %a 20298poly8x8_t test_vreinterpret_p8_u8(uint8x8_t a) { 20299 return vreinterpret_p8_u8(a); 20300} 20301 20302// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u16(<4 x i16> %a) #0 { 20303// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 20304// CHECK: ret <8 x i8> [[TMP0]] 20305poly8x8_t test_vreinterpret_p8_u16(uint16x4_t a) { 20306 return vreinterpret_p8_u16(a); 20307} 20308 20309// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u32(<2 x i32> %a) #0 { 20310// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 20311// CHECK: ret <8 x i8> [[TMP0]] 20312poly8x8_t test_vreinterpret_p8_u32(uint32x2_t a) { 20313 return vreinterpret_p8_u32(a); 20314} 20315 20316// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u64(<1 x i64> %a) #0 { 20317// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 20318// CHECK: ret <8 x i8> [[TMP0]] 20319poly8x8_t test_vreinterpret_p8_u64(uint64x1_t a) { 20320 return vreinterpret_p8_u64(a); 20321} 20322 20323// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_f16(<4 x half> %a) #0 { 20324// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8> 20325// CHECK: ret <8 x i8> [[TMP0]] 20326poly8x8_t test_vreinterpret_p8_f16(float16x4_t a) { 20327 return vreinterpret_p8_f16(a); 20328} 20329 20330// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_f32(<2 x float> %a) #0 { 20331// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 20332// CHECK: ret <8 x i8> [[TMP0]] 20333poly8x8_t test_vreinterpret_p8_f32(float32x2_t a) { 20334 return vreinterpret_p8_f32(a); 20335} 20336 20337// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_f64(<1 x double> %a) #0 { 20338// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 20339// CHECK: ret <8 x i8> [[TMP0]] 20340poly8x8_t test_vreinterpret_p8_f64(float64x1_t a) { 20341 return vreinterpret_p8_f64(a); 20342} 20343 20344// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_p16(<4 x i16> %a) #0 { 20345// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 20346// CHECK: ret <8 x i8> [[TMP0]] 20347poly8x8_t test_vreinterpret_p8_p16(poly16x4_t a) { 20348 return vreinterpret_p8_p16(a); 20349} 20350 20351// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_p64(<1 x i64> %a) #0 { 20352// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 20353// CHECK: ret <8 x i8> [[TMP0]] 20354poly8x8_t test_vreinterpret_p8_p64(poly64x1_t a) { 20355 return vreinterpret_p8_p64(a); 20356} 20357 20358// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s8(<8 x i8> %a) #0 { 20359// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> 20360// CHECK: ret <4 x i16> [[TMP0]] 20361poly16x4_t test_vreinterpret_p16_s8(int8x8_t a) { 20362 return vreinterpret_p16_s8(a); 20363} 20364 20365// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s16(<4 x i16> %a) #0 { 20366// CHECK: ret <4 x i16> %a 20367poly16x4_t test_vreinterpret_p16_s16(int16x4_t a) { 20368 return vreinterpret_p16_s16(a); 20369} 20370 20371// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s32(<2 x i32> %a) #0 { 20372// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16> 20373// CHECK: ret <4 x i16> [[TMP0]] 20374poly16x4_t test_vreinterpret_p16_s32(int32x2_t a) { 20375 return vreinterpret_p16_s32(a); 20376} 20377 20378// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s64(<1 x i64> %a) #0 { 20379// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> 20380// CHECK: ret <4 x i16> [[TMP0]] 20381poly16x4_t test_vreinterpret_p16_s64(int64x1_t a) { 20382 return vreinterpret_p16_s64(a); 20383} 20384 20385// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u8(<8 x i8> %a) #0 { 20386// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> 20387// CHECK: ret <4 x i16> [[TMP0]] 20388poly16x4_t test_vreinterpret_p16_u8(uint8x8_t a) { 20389 return vreinterpret_p16_u8(a); 20390} 20391 20392// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u16(<4 x i16> %a) #0 { 20393// CHECK: ret <4 x i16> %a 20394poly16x4_t test_vreinterpret_p16_u16(uint16x4_t a) { 20395 return vreinterpret_p16_u16(a); 20396} 20397 20398// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u32(<2 x i32> %a) #0 { 20399// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16> 20400// CHECK: ret <4 x i16> [[TMP0]] 20401poly16x4_t test_vreinterpret_p16_u32(uint32x2_t a) { 20402 return vreinterpret_p16_u32(a); 20403} 20404 20405// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u64(<1 x i64> %a) #0 { 20406// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> 20407// CHECK: ret <4 x i16> [[TMP0]] 20408poly16x4_t test_vreinterpret_p16_u64(uint64x1_t a) { 20409 return vreinterpret_p16_u64(a); 20410} 20411 20412// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_f16(<4 x half> %a) #0 { 20413// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16> 20414// CHECK: ret <4 x i16> [[TMP0]] 20415poly16x4_t test_vreinterpret_p16_f16(float16x4_t a) { 20416 return vreinterpret_p16_f16(a); 20417} 20418 20419// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_f32(<2 x float> %a) #0 { 20420// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16> 20421// CHECK: ret <4 x i16> [[TMP0]] 20422poly16x4_t test_vreinterpret_p16_f32(float32x2_t a) { 20423 return vreinterpret_p16_f32(a); 20424} 20425 20426// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_f64(<1 x double> %a) #0 { 20427// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16> 20428// CHECK: ret <4 x i16> [[TMP0]] 20429poly16x4_t test_vreinterpret_p16_f64(float64x1_t a) { 20430 return vreinterpret_p16_f64(a); 20431} 20432 20433// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_p8(<8 x i8> %a) #0 { 20434// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> 20435// CHECK: ret <4 x i16> [[TMP0]] 20436poly16x4_t test_vreinterpret_p16_p8(poly8x8_t a) { 20437 return vreinterpret_p16_p8(a); 20438} 20439 20440// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_p64(<1 x i64> %a) #0 { 20441// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> 20442// CHECK: ret <4 x i16> [[TMP0]] 20443poly16x4_t test_vreinterpret_p16_p64(poly64x1_t a) { 20444 return vreinterpret_p16_p64(a); 20445} 20446 20447// CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_s8(<8 x i8> %a) #0 { 20448// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> 20449// CHECK: ret <1 x i64> [[TMP0]] 20450poly64x1_t test_vreinterpret_p64_s8(int8x8_t a) { 20451 return vreinterpret_p64_s8(a); 20452} 20453 20454// CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_s16(<4 x i16> %a) #0 { 20455// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> 20456// CHECK: ret <1 x i64> [[TMP0]] 20457poly64x1_t test_vreinterpret_p64_s16(int16x4_t a) { 20458 return vreinterpret_p64_s16(a); 20459} 20460 20461// CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_s32(<2 x i32> %a) #0 { 20462// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64> 20463// CHECK: ret <1 x i64> [[TMP0]] 20464poly64x1_t test_vreinterpret_p64_s32(int32x2_t a) { 20465 return vreinterpret_p64_s32(a); 20466} 20467 20468// CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_s64(<1 x i64> %a) #0 { 20469// CHECK: ret <1 x i64> %a 20470poly64x1_t test_vreinterpret_p64_s64(int64x1_t a) { 20471 return vreinterpret_p64_s64(a); 20472} 20473 20474// CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_u8(<8 x i8> %a) #0 { 20475// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> 20476// CHECK: ret <1 x i64> [[TMP0]] 20477poly64x1_t test_vreinterpret_p64_u8(uint8x8_t a) { 20478 return vreinterpret_p64_u8(a); 20479} 20480 20481// CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_u16(<4 x i16> %a) #0 { 20482// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> 20483// CHECK: ret <1 x i64> [[TMP0]] 20484poly64x1_t test_vreinterpret_p64_u16(uint16x4_t a) { 20485 return vreinterpret_p64_u16(a); 20486} 20487 20488// CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_u32(<2 x i32> %a) #0 { 20489// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64> 20490// CHECK: ret <1 x i64> [[TMP0]] 20491poly64x1_t test_vreinterpret_p64_u32(uint32x2_t a) { 20492 return vreinterpret_p64_u32(a); 20493} 20494 20495// CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_u64(<1 x i64> %a) #0 { 20496// CHECK: ret <1 x i64> %a 20497poly64x1_t test_vreinterpret_p64_u64(uint64x1_t a) { 20498 return vreinterpret_p64_u64(a); 20499} 20500 20501// CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_f16(<4 x half> %a) #0 { 20502// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64> 20503// CHECK: ret <1 x i64> [[TMP0]] 20504poly64x1_t test_vreinterpret_p64_f16(float16x4_t a) { 20505 return vreinterpret_p64_f16(a); 20506} 20507 20508// CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_f32(<2 x float> %a) #0 { 20509// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64> 20510// CHECK: ret <1 x i64> [[TMP0]] 20511poly64x1_t test_vreinterpret_p64_f32(float32x2_t a) { 20512 return vreinterpret_p64_f32(a); 20513} 20514 20515// CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_f64(<1 x double> %a) #0 { 20516// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64> 20517// CHECK: ret <1 x i64> [[TMP0]] 20518poly64x1_t test_vreinterpret_p64_f64(float64x1_t a) { 20519 return vreinterpret_p64_f64(a); 20520} 20521 20522// CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_p8(<8 x i8> %a) #0 { 20523// CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> 20524// CHECK: ret <1 x i64> [[TMP0]] 20525poly64x1_t test_vreinterpret_p64_p8(poly8x8_t a) { 20526 return vreinterpret_p64_p8(a); 20527} 20528 20529// CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_p16(<4 x i16> %a) #0 { 20530// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> 20531// CHECK: ret <1 x i64> [[TMP0]] 20532poly64x1_t test_vreinterpret_p64_p16(poly16x4_t a) { 20533 return vreinterpret_p64_p16(a); 20534} 20535 20536// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_s16(<8 x i16> %a) #0 { 20537// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 20538// CHECK: ret <16 x i8> [[TMP0]] 20539int8x16_t test_vreinterpretq_s8_s16(int16x8_t a) { 20540 return vreinterpretq_s8_s16(a); 20541} 20542 20543// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_s32(<4 x i32> %a) #0 { 20544// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 20545// CHECK: ret <16 x i8> [[TMP0]] 20546int8x16_t test_vreinterpretq_s8_s32(int32x4_t a) { 20547 return vreinterpretq_s8_s32(a); 20548} 20549 20550// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_s64(<2 x i64> %a) #0 { 20551// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 20552// CHECK: ret <16 x i8> [[TMP0]] 20553int8x16_t test_vreinterpretq_s8_s64(int64x2_t a) { 20554 return vreinterpretq_s8_s64(a); 20555} 20556 20557// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u8(<16 x i8> %a) #0 { 20558// CHECK: ret <16 x i8> %a 20559int8x16_t test_vreinterpretq_s8_u8(uint8x16_t a) { 20560 return vreinterpretq_s8_u8(a); 20561} 20562 20563// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u16(<8 x i16> %a) #0 { 20564// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 20565// CHECK: ret <16 x i8> [[TMP0]] 20566int8x16_t test_vreinterpretq_s8_u16(uint16x8_t a) { 20567 return vreinterpretq_s8_u16(a); 20568} 20569 20570// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u32(<4 x i32> %a) #0 { 20571// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 20572// CHECK: ret <16 x i8> [[TMP0]] 20573int8x16_t test_vreinterpretq_s8_u32(uint32x4_t a) { 20574 return vreinterpretq_s8_u32(a); 20575} 20576 20577// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u64(<2 x i64> %a) #0 { 20578// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 20579// CHECK: ret <16 x i8> [[TMP0]] 20580int8x16_t test_vreinterpretq_s8_u64(uint64x2_t a) { 20581 return vreinterpretq_s8_u64(a); 20582} 20583 20584// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_f16(<8 x half> %a) #0 { 20585// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8> 20586// CHECK: ret <16 x i8> [[TMP0]] 20587int8x16_t test_vreinterpretq_s8_f16(float16x8_t a) { 20588 return vreinterpretq_s8_f16(a); 20589} 20590 20591// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_f32(<4 x float> %a) #0 { 20592// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 20593// CHECK: ret <16 x i8> [[TMP0]] 20594int8x16_t test_vreinterpretq_s8_f32(float32x4_t a) { 20595 return vreinterpretq_s8_f32(a); 20596} 20597 20598// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_f64(<2 x double> %a) #0 { 20599// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 20600// CHECK: ret <16 x i8> [[TMP0]] 20601int8x16_t test_vreinterpretq_s8_f64(float64x2_t a) { 20602 return vreinterpretq_s8_f64(a); 20603} 20604 20605// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_p8(<16 x i8> %a) #0 { 20606// CHECK: ret <16 x i8> %a 20607int8x16_t test_vreinterpretq_s8_p8(poly8x16_t a) { 20608 return vreinterpretq_s8_p8(a); 20609} 20610 20611// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_p16(<8 x i16> %a) #0 { 20612// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 20613// CHECK: ret <16 x i8> [[TMP0]] 20614int8x16_t test_vreinterpretq_s8_p16(poly16x8_t a) { 20615 return vreinterpretq_s8_p16(a); 20616} 20617 20618// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_p64(<2 x i64> %a) #0 { 20619// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 20620// CHECK: ret <16 x i8> [[TMP0]] 20621int8x16_t test_vreinterpretq_s8_p64(poly64x2_t a) { 20622 return vreinterpretq_s8_p64(a); 20623} 20624 20625// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_s8(<16 x i8> %a) #0 { 20626// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> 20627// CHECK: ret <8 x i16> [[TMP0]] 20628int16x8_t test_vreinterpretq_s16_s8(int8x16_t a) { 20629 return vreinterpretq_s16_s8(a); 20630} 20631 20632// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_s32(<4 x i32> %a) #0 { 20633// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16> 20634// CHECK: ret <8 x i16> [[TMP0]] 20635int16x8_t test_vreinterpretq_s16_s32(int32x4_t a) { 20636 return vreinterpretq_s16_s32(a); 20637} 20638 20639// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_s64(<2 x i64> %a) #0 { 20640// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> 20641// CHECK: ret <8 x i16> [[TMP0]] 20642int16x8_t test_vreinterpretq_s16_s64(int64x2_t a) { 20643 return vreinterpretq_s16_s64(a); 20644} 20645 20646// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u8(<16 x i8> %a) #0 { 20647// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> 20648// CHECK: ret <8 x i16> [[TMP0]] 20649int16x8_t test_vreinterpretq_s16_u8(uint8x16_t a) { 20650 return vreinterpretq_s16_u8(a); 20651} 20652 20653// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u16(<8 x i16> %a) #0 { 20654// CHECK: ret <8 x i16> %a 20655int16x8_t test_vreinterpretq_s16_u16(uint16x8_t a) { 20656 return vreinterpretq_s16_u16(a); 20657} 20658 20659// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u32(<4 x i32> %a) #0 { 20660// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16> 20661// CHECK: ret <8 x i16> [[TMP0]] 20662int16x8_t test_vreinterpretq_s16_u32(uint32x4_t a) { 20663 return vreinterpretq_s16_u32(a); 20664} 20665 20666// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u64(<2 x i64> %a) #0 { 20667// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> 20668// CHECK: ret <8 x i16> [[TMP0]] 20669int16x8_t test_vreinterpretq_s16_u64(uint64x2_t a) { 20670 return vreinterpretq_s16_u64(a); 20671} 20672 20673// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_f16(<8 x half> %a) #0 { 20674// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16> 20675// CHECK: ret <8 x i16> [[TMP0]] 20676int16x8_t test_vreinterpretq_s16_f16(float16x8_t a) { 20677 return vreinterpretq_s16_f16(a); 20678} 20679 20680// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_f32(<4 x float> %a) #0 { 20681// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16> 20682// CHECK: ret <8 x i16> [[TMP0]] 20683int16x8_t test_vreinterpretq_s16_f32(float32x4_t a) { 20684 return vreinterpretq_s16_f32(a); 20685} 20686 20687// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_f64(<2 x double> %a) #0 { 20688// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16> 20689// CHECK: ret <8 x i16> [[TMP0]] 20690int16x8_t test_vreinterpretq_s16_f64(float64x2_t a) { 20691 return vreinterpretq_s16_f64(a); 20692} 20693 20694// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_p8(<16 x i8> %a) #0 { 20695// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> 20696// CHECK: ret <8 x i16> [[TMP0]] 20697int16x8_t test_vreinterpretq_s16_p8(poly8x16_t a) { 20698 return vreinterpretq_s16_p8(a); 20699} 20700 20701// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_p16(<8 x i16> %a) #0 { 20702// CHECK: ret <8 x i16> %a 20703int16x8_t test_vreinterpretq_s16_p16(poly16x8_t a) { 20704 return vreinterpretq_s16_p16(a); 20705} 20706 20707// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_p64(<2 x i64> %a) #0 { 20708// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> 20709// CHECK: ret <8 x i16> [[TMP0]] 20710int16x8_t test_vreinterpretq_s16_p64(poly64x2_t a) { 20711 return vreinterpretq_s16_p64(a); 20712} 20713 20714// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_s8(<16 x i8> %a) #0 { 20715// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32> 20716// CHECK: ret <4 x i32> [[TMP0]] 20717int32x4_t test_vreinterpretq_s32_s8(int8x16_t a) { 20718 return vreinterpretq_s32_s8(a); 20719} 20720 20721// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_s16(<8 x i16> %a) #0 { 20722// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32> 20723// CHECK: ret <4 x i32> [[TMP0]] 20724int32x4_t test_vreinterpretq_s32_s16(int16x8_t a) { 20725 return vreinterpretq_s32_s16(a); 20726} 20727 20728// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_s64(<2 x i64> %a) #0 { 20729// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32> 20730// CHECK: ret <4 x i32> [[TMP0]] 20731int32x4_t test_vreinterpretq_s32_s64(int64x2_t a) { 20732 return vreinterpretq_s32_s64(a); 20733} 20734 20735// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u8(<16 x i8> %a) #0 { 20736// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32> 20737// CHECK: ret <4 x i32> [[TMP0]] 20738int32x4_t test_vreinterpretq_s32_u8(uint8x16_t a) { 20739 return vreinterpretq_s32_u8(a); 20740} 20741 20742// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u16(<8 x i16> %a) #0 { 20743// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32> 20744// CHECK: ret <4 x i32> [[TMP0]] 20745int32x4_t test_vreinterpretq_s32_u16(uint16x8_t a) { 20746 return vreinterpretq_s32_u16(a); 20747} 20748 20749// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u32(<4 x i32> %a) #0 { 20750// CHECK: ret <4 x i32> %a 20751int32x4_t test_vreinterpretq_s32_u32(uint32x4_t a) { 20752 return vreinterpretq_s32_u32(a); 20753} 20754 20755// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u64(<2 x i64> %a) #0 { 20756// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32> 20757// CHECK: ret <4 x i32> [[TMP0]] 20758int32x4_t test_vreinterpretq_s32_u64(uint64x2_t a) { 20759 return vreinterpretq_s32_u64(a); 20760} 20761 20762// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_f16(<8 x half> %a) #0 { 20763// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32> 20764// CHECK: ret <4 x i32> [[TMP0]] 20765int32x4_t test_vreinterpretq_s32_f16(float16x8_t a) { 20766 return vreinterpretq_s32_f16(a); 20767} 20768 20769// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_f32(<4 x float> %a) #0 { 20770// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32> 20771// CHECK: ret <4 x i32> [[TMP0]] 20772int32x4_t test_vreinterpretq_s32_f32(float32x4_t a) { 20773 return vreinterpretq_s32_f32(a); 20774} 20775 20776// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_f64(<2 x double> %a) #0 { 20777// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x i32> 20778// CHECK: ret <4 x i32> [[TMP0]] 20779int32x4_t test_vreinterpretq_s32_f64(float64x2_t a) { 20780 return vreinterpretq_s32_f64(a); 20781} 20782 20783// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_p8(<16 x i8> %a) #0 { 20784// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32> 20785// CHECK: ret <4 x i32> [[TMP0]] 20786int32x4_t test_vreinterpretq_s32_p8(poly8x16_t a) { 20787 return vreinterpretq_s32_p8(a); 20788} 20789 20790// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_p16(<8 x i16> %a) #0 { 20791// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32> 20792// CHECK: ret <4 x i32> [[TMP0]] 20793int32x4_t test_vreinterpretq_s32_p16(poly16x8_t a) { 20794 return vreinterpretq_s32_p16(a); 20795} 20796 20797// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_p64(<2 x i64> %a) #0 { 20798// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32> 20799// CHECK: ret <4 x i32> [[TMP0]] 20800int32x4_t test_vreinterpretq_s32_p64(poly64x2_t a) { 20801 return vreinterpretq_s32_p64(a); 20802} 20803 20804// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_s8(<16 x i8> %a) #0 { 20805// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> 20806// CHECK: ret <2 x i64> [[TMP0]] 20807int64x2_t test_vreinterpretq_s64_s8(int8x16_t a) { 20808 return vreinterpretq_s64_s8(a); 20809} 20810 20811// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_s16(<8 x i16> %a) #0 { 20812// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> 20813// CHECK: ret <2 x i64> [[TMP0]] 20814int64x2_t test_vreinterpretq_s64_s16(int16x8_t a) { 20815 return vreinterpretq_s64_s16(a); 20816} 20817 20818// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_s32(<4 x i32> %a) #0 { 20819// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64> 20820// CHECK: ret <2 x i64> [[TMP0]] 20821int64x2_t test_vreinterpretq_s64_s32(int32x4_t a) { 20822 return vreinterpretq_s64_s32(a); 20823} 20824 20825// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u8(<16 x i8> %a) #0 { 20826// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> 20827// CHECK: ret <2 x i64> [[TMP0]] 20828int64x2_t test_vreinterpretq_s64_u8(uint8x16_t a) { 20829 return vreinterpretq_s64_u8(a); 20830} 20831 20832// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u16(<8 x i16> %a) #0 { 20833// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> 20834// CHECK: ret <2 x i64> [[TMP0]] 20835int64x2_t test_vreinterpretq_s64_u16(uint16x8_t a) { 20836 return vreinterpretq_s64_u16(a); 20837} 20838 20839// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u32(<4 x i32> %a) #0 { 20840// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64> 20841// CHECK: ret <2 x i64> [[TMP0]] 20842int64x2_t test_vreinterpretq_s64_u32(uint32x4_t a) { 20843 return vreinterpretq_s64_u32(a); 20844} 20845 20846// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u64(<2 x i64> %a) #0 { 20847// CHECK: ret <2 x i64> %a 20848int64x2_t test_vreinterpretq_s64_u64(uint64x2_t a) { 20849 return vreinterpretq_s64_u64(a); 20850} 20851 20852// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_f16(<8 x half> %a) #0 { 20853// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64> 20854// CHECK: ret <2 x i64> [[TMP0]] 20855int64x2_t test_vreinterpretq_s64_f16(float16x8_t a) { 20856 return vreinterpretq_s64_f16(a); 20857} 20858 20859// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_f32(<4 x float> %a) #0 { 20860// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64> 20861// CHECK: ret <2 x i64> [[TMP0]] 20862int64x2_t test_vreinterpretq_s64_f32(float32x4_t a) { 20863 return vreinterpretq_s64_f32(a); 20864} 20865 20866// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_f64(<2 x double> %a) #0 { 20867// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64> 20868// CHECK: ret <2 x i64> [[TMP0]] 20869int64x2_t test_vreinterpretq_s64_f64(float64x2_t a) { 20870 return vreinterpretq_s64_f64(a); 20871} 20872 20873// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_p8(<16 x i8> %a) #0 { 20874// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> 20875// CHECK: ret <2 x i64> [[TMP0]] 20876int64x2_t test_vreinterpretq_s64_p8(poly8x16_t a) { 20877 return vreinterpretq_s64_p8(a); 20878} 20879 20880// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_p16(<8 x i16> %a) #0 { 20881// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> 20882// CHECK: ret <2 x i64> [[TMP0]] 20883int64x2_t test_vreinterpretq_s64_p16(poly16x8_t a) { 20884 return vreinterpretq_s64_p16(a); 20885} 20886 20887// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_p64(<2 x i64> %a) #0 { 20888// CHECK: ret <2 x i64> %a 20889int64x2_t test_vreinterpretq_s64_p64(poly64x2_t a) { 20890 return vreinterpretq_s64_p64(a); 20891} 20892 20893// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s8(<16 x i8> %a) #0 { 20894// CHECK: ret <16 x i8> %a 20895uint8x16_t test_vreinterpretq_u8_s8(int8x16_t a) { 20896 return vreinterpretq_u8_s8(a); 20897} 20898 20899// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s16(<8 x i16> %a) #0 { 20900// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 20901// CHECK: ret <16 x i8> [[TMP0]] 20902uint8x16_t test_vreinterpretq_u8_s16(int16x8_t a) { 20903 return vreinterpretq_u8_s16(a); 20904} 20905 20906// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s32(<4 x i32> %a) #0 { 20907// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 20908// CHECK: ret <16 x i8> [[TMP0]] 20909uint8x16_t test_vreinterpretq_u8_s32(int32x4_t a) { 20910 return vreinterpretq_u8_s32(a); 20911} 20912 20913// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s64(<2 x i64> %a) #0 { 20914// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 20915// CHECK: ret <16 x i8> [[TMP0]] 20916uint8x16_t test_vreinterpretq_u8_s64(int64x2_t a) { 20917 return vreinterpretq_u8_s64(a); 20918} 20919 20920// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_u16(<8 x i16> %a) #0 { 20921// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 20922// CHECK: ret <16 x i8> [[TMP0]] 20923uint8x16_t test_vreinterpretq_u8_u16(uint16x8_t a) { 20924 return vreinterpretq_u8_u16(a); 20925} 20926 20927// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_u32(<4 x i32> %a) #0 { 20928// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 20929// CHECK: ret <16 x i8> [[TMP0]] 20930uint8x16_t test_vreinterpretq_u8_u32(uint32x4_t a) { 20931 return vreinterpretq_u8_u32(a); 20932} 20933 20934// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_u64(<2 x i64> %a) #0 { 20935// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 20936// CHECK: ret <16 x i8> [[TMP0]] 20937uint8x16_t test_vreinterpretq_u8_u64(uint64x2_t a) { 20938 return vreinterpretq_u8_u64(a); 20939} 20940 20941// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_f16(<8 x half> %a) #0 { 20942// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8> 20943// CHECK: ret <16 x i8> [[TMP0]] 20944uint8x16_t test_vreinterpretq_u8_f16(float16x8_t a) { 20945 return vreinterpretq_u8_f16(a); 20946} 20947 20948// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_f32(<4 x float> %a) #0 { 20949// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 20950// CHECK: ret <16 x i8> [[TMP0]] 20951uint8x16_t test_vreinterpretq_u8_f32(float32x4_t a) { 20952 return vreinterpretq_u8_f32(a); 20953} 20954 20955// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_f64(<2 x double> %a) #0 { 20956// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 20957// CHECK: ret <16 x i8> [[TMP0]] 20958uint8x16_t test_vreinterpretq_u8_f64(float64x2_t a) { 20959 return vreinterpretq_u8_f64(a); 20960} 20961 20962// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_p8(<16 x i8> %a) #0 { 20963// CHECK: ret <16 x i8> %a 20964uint8x16_t test_vreinterpretq_u8_p8(poly8x16_t a) { 20965 return vreinterpretq_u8_p8(a); 20966} 20967 20968// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_p16(<8 x i16> %a) #0 { 20969// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 20970// CHECK: ret <16 x i8> [[TMP0]] 20971uint8x16_t test_vreinterpretq_u8_p16(poly16x8_t a) { 20972 return vreinterpretq_u8_p16(a); 20973} 20974 20975// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_p64(<2 x i64> %a) #0 { 20976// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 20977// CHECK: ret <16 x i8> [[TMP0]] 20978uint8x16_t test_vreinterpretq_u8_p64(poly64x2_t a) { 20979 return vreinterpretq_u8_p64(a); 20980} 20981 20982// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s8(<16 x i8> %a) #0 { 20983// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> 20984// CHECK: ret <8 x i16> [[TMP0]] 20985uint16x8_t test_vreinterpretq_u16_s8(int8x16_t a) { 20986 return vreinterpretq_u16_s8(a); 20987} 20988 20989// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s16(<8 x i16> %a) #0 { 20990// CHECK: ret <8 x i16> %a 20991uint16x8_t test_vreinterpretq_u16_s16(int16x8_t a) { 20992 return vreinterpretq_u16_s16(a); 20993} 20994 20995// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s32(<4 x i32> %a) #0 { 20996// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16> 20997// CHECK: ret <8 x i16> [[TMP0]] 20998uint16x8_t test_vreinterpretq_u16_s32(int32x4_t a) { 20999 return vreinterpretq_u16_s32(a); 21000} 21001 21002// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s64(<2 x i64> %a) #0 { 21003// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> 21004// CHECK: ret <8 x i16> [[TMP0]] 21005uint16x8_t test_vreinterpretq_u16_s64(int64x2_t a) { 21006 return vreinterpretq_u16_s64(a); 21007} 21008 21009// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_u8(<16 x i8> %a) #0 { 21010// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> 21011// CHECK: ret <8 x i16> [[TMP0]] 21012uint16x8_t test_vreinterpretq_u16_u8(uint8x16_t a) { 21013 return vreinterpretq_u16_u8(a); 21014} 21015 21016// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_u32(<4 x i32> %a) #0 { 21017// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16> 21018// CHECK: ret <8 x i16> [[TMP0]] 21019uint16x8_t test_vreinterpretq_u16_u32(uint32x4_t a) { 21020 return vreinterpretq_u16_u32(a); 21021} 21022 21023// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_u64(<2 x i64> %a) #0 { 21024// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> 21025// CHECK: ret <8 x i16> [[TMP0]] 21026uint16x8_t test_vreinterpretq_u16_u64(uint64x2_t a) { 21027 return vreinterpretq_u16_u64(a); 21028} 21029 21030// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_f16(<8 x half> %a) #0 { 21031// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16> 21032// CHECK: ret <8 x i16> [[TMP0]] 21033uint16x8_t test_vreinterpretq_u16_f16(float16x8_t a) { 21034 return vreinterpretq_u16_f16(a); 21035} 21036 21037// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_f32(<4 x float> %a) #0 { 21038// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16> 21039// CHECK: ret <8 x i16> [[TMP0]] 21040uint16x8_t test_vreinterpretq_u16_f32(float32x4_t a) { 21041 return vreinterpretq_u16_f32(a); 21042} 21043 21044// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_f64(<2 x double> %a) #0 { 21045// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16> 21046// CHECK: ret <8 x i16> [[TMP0]] 21047uint16x8_t test_vreinterpretq_u16_f64(float64x2_t a) { 21048 return vreinterpretq_u16_f64(a); 21049} 21050 21051// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_p8(<16 x i8> %a) #0 { 21052// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> 21053// CHECK: ret <8 x i16> [[TMP0]] 21054uint16x8_t test_vreinterpretq_u16_p8(poly8x16_t a) { 21055 return vreinterpretq_u16_p8(a); 21056} 21057 21058// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_p16(<8 x i16> %a) #0 { 21059// CHECK: ret <8 x i16> %a 21060uint16x8_t test_vreinterpretq_u16_p16(poly16x8_t a) { 21061 return vreinterpretq_u16_p16(a); 21062} 21063 21064// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_p64(<2 x i64> %a) #0 { 21065// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> 21066// CHECK: ret <8 x i16> [[TMP0]] 21067uint16x8_t test_vreinterpretq_u16_p64(poly64x2_t a) { 21068 return vreinterpretq_u16_p64(a); 21069} 21070 21071// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s8(<16 x i8> %a) #0 { 21072// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32> 21073// CHECK: ret <4 x i32> [[TMP0]] 21074uint32x4_t test_vreinterpretq_u32_s8(int8x16_t a) { 21075 return vreinterpretq_u32_s8(a); 21076} 21077 21078// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s16(<8 x i16> %a) #0 { 21079// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32> 21080// CHECK: ret <4 x i32> [[TMP0]] 21081uint32x4_t test_vreinterpretq_u32_s16(int16x8_t a) { 21082 return vreinterpretq_u32_s16(a); 21083} 21084 21085// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s32(<4 x i32> %a) #0 { 21086// CHECK: ret <4 x i32> %a 21087uint32x4_t test_vreinterpretq_u32_s32(int32x4_t a) { 21088 return vreinterpretq_u32_s32(a); 21089} 21090 21091// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s64(<2 x i64> %a) #0 { 21092// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32> 21093// CHECK: ret <4 x i32> [[TMP0]] 21094uint32x4_t test_vreinterpretq_u32_s64(int64x2_t a) { 21095 return vreinterpretq_u32_s64(a); 21096} 21097 21098// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_u8(<16 x i8> %a) #0 { 21099// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32> 21100// CHECK: ret <4 x i32> [[TMP0]] 21101uint32x4_t test_vreinterpretq_u32_u8(uint8x16_t a) { 21102 return vreinterpretq_u32_u8(a); 21103} 21104 21105// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_u16(<8 x i16> %a) #0 { 21106// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32> 21107// CHECK: ret <4 x i32> [[TMP0]] 21108uint32x4_t test_vreinterpretq_u32_u16(uint16x8_t a) { 21109 return vreinterpretq_u32_u16(a); 21110} 21111 21112// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_u64(<2 x i64> %a) #0 { 21113// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32> 21114// CHECK: ret <4 x i32> [[TMP0]] 21115uint32x4_t test_vreinterpretq_u32_u64(uint64x2_t a) { 21116 return vreinterpretq_u32_u64(a); 21117} 21118 21119// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_f16(<8 x half> %a) #0 { 21120// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32> 21121// CHECK: ret <4 x i32> [[TMP0]] 21122uint32x4_t test_vreinterpretq_u32_f16(float16x8_t a) { 21123 return vreinterpretq_u32_f16(a); 21124} 21125 21126// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_f32(<4 x float> %a) #0 { 21127// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32> 21128// CHECK: ret <4 x i32> [[TMP0]] 21129uint32x4_t test_vreinterpretq_u32_f32(float32x4_t a) { 21130 return vreinterpretq_u32_f32(a); 21131} 21132 21133// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_f64(<2 x double> %a) #0 { 21134// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x i32> 21135// CHECK: ret <4 x i32> [[TMP0]] 21136uint32x4_t test_vreinterpretq_u32_f64(float64x2_t a) { 21137 return vreinterpretq_u32_f64(a); 21138} 21139 21140// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_p8(<16 x i8> %a) #0 { 21141// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32> 21142// CHECK: ret <4 x i32> [[TMP0]] 21143uint32x4_t test_vreinterpretq_u32_p8(poly8x16_t a) { 21144 return vreinterpretq_u32_p8(a); 21145} 21146 21147// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_p16(<8 x i16> %a) #0 { 21148// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32> 21149// CHECK: ret <4 x i32> [[TMP0]] 21150uint32x4_t test_vreinterpretq_u32_p16(poly16x8_t a) { 21151 return vreinterpretq_u32_p16(a); 21152} 21153 21154// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_p64(<2 x i64> %a) #0 { 21155// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32> 21156// CHECK: ret <4 x i32> [[TMP0]] 21157uint32x4_t test_vreinterpretq_u32_p64(poly64x2_t a) { 21158 return vreinterpretq_u32_p64(a); 21159} 21160 21161// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s8(<16 x i8> %a) #0 { 21162// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> 21163// CHECK: ret <2 x i64> [[TMP0]] 21164uint64x2_t test_vreinterpretq_u64_s8(int8x16_t a) { 21165 return vreinterpretq_u64_s8(a); 21166} 21167 21168// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s16(<8 x i16> %a) #0 { 21169// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> 21170// CHECK: ret <2 x i64> [[TMP0]] 21171uint64x2_t test_vreinterpretq_u64_s16(int16x8_t a) { 21172 return vreinterpretq_u64_s16(a); 21173} 21174 21175// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s32(<4 x i32> %a) #0 { 21176// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64> 21177// CHECK: ret <2 x i64> [[TMP0]] 21178uint64x2_t test_vreinterpretq_u64_s32(int32x4_t a) { 21179 return vreinterpretq_u64_s32(a); 21180} 21181 21182// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s64(<2 x i64> %a) #0 { 21183// CHECK: ret <2 x i64> %a 21184uint64x2_t test_vreinterpretq_u64_s64(int64x2_t a) { 21185 return vreinterpretq_u64_s64(a); 21186} 21187 21188// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_u8(<16 x i8> %a) #0 { 21189// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> 21190// CHECK: ret <2 x i64> [[TMP0]] 21191uint64x2_t test_vreinterpretq_u64_u8(uint8x16_t a) { 21192 return vreinterpretq_u64_u8(a); 21193} 21194 21195// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_u16(<8 x i16> %a) #0 { 21196// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> 21197// CHECK: ret <2 x i64> [[TMP0]] 21198uint64x2_t test_vreinterpretq_u64_u16(uint16x8_t a) { 21199 return vreinterpretq_u64_u16(a); 21200} 21201 21202// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_u32(<4 x i32> %a) #0 { 21203// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64> 21204// CHECK: ret <2 x i64> [[TMP0]] 21205uint64x2_t test_vreinterpretq_u64_u32(uint32x4_t a) { 21206 return vreinterpretq_u64_u32(a); 21207} 21208 21209// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_f16(<8 x half> %a) #0 { 21210// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64> 21211// CHECK: ret <2 x i64> [[TMP0]] 21212uint64x2_t test_vreinterpretq_u64_f16(float16x8_t a) { 21213 return vreinterpretq_u64_f16(a); 21214} 21215 21216// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_f32(<4 x float> %a) #0 { 21217// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64> 21218// CHECK: ret <2 x i64> [[TMP0]] 21219uint64x2_t test_vreinterpretq_u64_f32(float32x4_t a) { 21220 return vreinterpretq_u64_f32(a); 21221} 21222 21223// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_f64(<2 x double> %a) #0 { 21224// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64> 21225// CHECK: ret <2 x i64> [[TMP0]] 21226uint64x2_t test_vreinterpretq_u64_f64(float64x2_t a) { 21227 return vreinterpretq_u64_f64(a); 21228} 21229 21230// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_p8(<16 x i8> %a) #0 { 21231// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> 21232// CHECK: ret <2 x i64> [[TMP0]] 21233uint64x2_t test_vreinterpretq_u64_p8(poly8x16_t a) { 21234 return vreinterpretq_u64_p8(a); 21235} 21236 21237// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_p16(<8 x i16> %a) #0 { 21238// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> 21239// CHECK: ret <2 x i64> [[TMP0]] 21240uint64x2_t test_vreinterpretq_u64_p16(poly16x8_t a) { 21241 return vreinterpretq_u64_p16(a); 21242} 21243 21244// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_p64(<2 x i64> %a) #0 { 21245// CHECK: ret <2 x i64> %a 21246uint64x2_t test_vreinterpretq_u64_p64(poly64x2_t a) { 21247 return vreinterpretq_u64_p64(a); 21248} 21249 21250// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s8(<16 x i8> %a) #0 { 21251// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half> 21252// CHECK: ret <8 x half> [[TMP0]] 21253float16x8_t test_vreinterpretq_f16_s8(int8x16_t a) { 21254 return vreinterpretq_f16_s8(a); 21255} 21256 21257// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s16(<8 x i16> %a) #0 { 21258// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half> 21259// CHECK: ret <8 x half> [[TMP0]] 21260float16x8_t test_vreinterpretq_f16_s16(int16x8_t a) { 21261 return vreinterpretq_f16_s16(a); 21262} 21263 21264// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s32(<4 x i32> %a) #0 { 21265// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half> 21266// CHECK: ret <8 x half> [[TMP0]] 21267float16x8_t test_vreinterpretq_f16_s32(int32x4_t a) { 21268 return vreinterpretq_f16_s32(a); 21269} 21270 21271// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s64(<2 x i64> %a) #0 { 21272// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half> 21273// CHECK: ret <8 x half> [[TMP0]] 21274float16x8_t test_vreinterpretq_f16_s64(int64x2_t a) { 21275 return vreinterpretq_f16_s64(a); 21276} 21277 21278// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u8(<16 x i8> %a) #0 { 21279// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half> 21280// CHECK: ret <8 x half> [[TMP0]] 21281float16x8_t test_vreinterpretq_f16_u8(uint8x16_t a) { 21282 return vreinterpretq_f16_u8(a); 21283} 21284 21285// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u16(<8 x i16> %a) #0 { 21286// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half> 21287// CHECK: ret <8 x half> [[TMP0]] 21288float16x8_t test_vreinterpretq_f16_u16(uint16x8_t a) { 21289 return vreinterpretq_f16_u16(a); 21290} 21291 21292// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u32(<4 x i32> %a) #0 { 21293// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half> 21294// CHECK: ret <8 x half> [[TMP0]] 21295float16x8_t test_vreinterpretq_f16_u32(uint32x4_t a) { 21296 return vreinterpretq_f16_u32(a); 21297} 21298 21299// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u64(<2 x i64> %a) #0 { 21300// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half> 21301// CHECK: ret <8 x half> [[TMP0]] 21302float16x8_t test_vreinterpretq_f16_u64(uint64x2_t a) { 21303 return vreinterpretq_f16_u64(a); 21304} 21305 21306// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_f32(<4 x float> %a) #0 { 21307// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x half> 21308// CHECK: ret <8 x half> [[TMP0]] 21309float16x8_t test_vreinterpretq_f16_f32(float32x4_t a) { 21310 return vreinterpretq_f16_f32(a); 21311} 21312 21313// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_f64(<2 x double> %a) #0 { 21314// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x half> 21315// CHECK: ret <8 x half> [[TMP0]] 21316float16x8_t test_vreinterpretq_f16_f64(float64x2_t a) { 21317 return vreinterpretq_f16_f64(a); 21318} 21319 21320// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_p8(<16 x i8> %a) #0 { 21321// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half> 21322// CHECK: ret <8 x half> [[TMP0]] 21323float16x8_t test_vreinterpretq_f16_p8(poly8x16_t a) { 21324 return vreinterpretq_f16_p8(a); 21325} 21326 21327// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_p16(<8 x i16> %a) #0 { 21328// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half> 21329// CHECK: ret <8 x half> [[TMP0]] 21330float16x8_t test_vreinterpretq_f16_p16(poly16x8_t a) { 21331 return vreinterpretq_f16_p16(a); 21332} 21333 21334// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_p64(<2 x i64> %a) #0 { 21335// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half> 21336// CHECK: ret <8 x half> [[TMP0]] 21337float16x8_t test_vreinterpretq_f16_p64(poly64x2_t a) { 21338 return vreinterpretq_f16_p64(a); 21339} 21340 21341// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s8(<16 x i8> %a) #0 { 21342// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float> 21343// CHECK: ret <4 x float> [[TMP0]] 21344float32x4_t test_vreinterpretq_f32_s8(int8x16_t a) { 21345 return vreinterpretq_f32_s8(a); 21346} 21347 21348// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s16(<8 x i16> %a) #0 { 21349// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float> 21350// CHECK: ret <4 x float> [[TMP0]] 21351float32x4_t test_vreinterpretq_f32_s16(int16x8_t a) { 21352 return vreinterpretq_f32_s16(a); 21353} 21354 21355// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s32(<4 x i32> %a) #0 { 21356// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float> 21357// CHECK: ret <4 x float> [[TMP0]] 21358float32x4_t test_vreinterpretq_f32_s32(int32x4_t a) { 21359 return vreinterpretq_f32_s32(a); 21360} 21361 21362// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s64(<2 x i64> %a) #0 { 21363// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float> 21364// CHECK: ret <4 x float> [[TMP0]] 21365float32x4_t test_vreinterpretq_f32_s64(int64x2_t a) { 21366 return vreinterpretq_f32_s64(a); 21367} 21368 21369// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u8(<16 x i8> %a) #0 { 21370// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float> 21371// CHECK: ret <4 x float> [[TMP0]] 21372float32x4_t test_vreinterpretq_f32_u8(uint8x16_t a) { 21373 return vreinterpretq_f32_u8(a); 21374} 21375 21376// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u16(<8 x i16> %a) #0 { 21377// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float> 21378// CHECK: ret <4 x float> [[TMP0]] 21379float32x4_t test_vreinterpretq_f32_u16(uint16x8_t a) { 21380 return vreinterpretq_f32_u16(a); 21381} 21382 21383// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u32(<4 x i32> %a) #0 { 21384// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float> 21385// CHECK: ret <4 x float> [[TMP0]] 21386float32x4_t test_vreinterpretq_f32_u32(uint32x4_t a) { 21387 return vreinterpretq_f32_u32(a); 21388} 21389 21390// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u64(<2 x i64> %a) #0 { 21391// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float> 21392// CHECK: ret <4 x float> [[TMP0]] 21393float32x4_t test_vreinterpretq_f32_u64(uint64x2_t a) { 21394 return vreinterpretq_f32_u64(a); 21395} 21396 21397// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_f16(<8 x half> %a) #0 { 21398// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x float> 21399// CHECK: ret <4 x float> [[TMP0]] 21400float32x4_t test_vreinterpretq_f32_f16(float16x8_t a) { 21401 return vreinterpretq_f32_f16(a); 21402} 21403 21404// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_f64(<2 x double> %a) #0 { 21405// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x float> 21406// CHECK: ret <4 x float> [[TMP0]] 21407float32x4_t test_vreinterpretq_f32_f64(float64x2_t a) { 21408 return vreinterpretq_f32_f64(a); 21409} 21410 21411// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_p8(<16 x i8> %a) #0 { 21412// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float> 21413// CHECK: ret <4 x float> [[TMP0]] 21414float32x4_t test_vreinterpretq_f32_p8(poly8x16_t a) { 21415 return vreinterpretq_f32_p8(a); 21416} 21417 21418// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_p16(<8 x i16> %a) #0 { 21419// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float> 21420// CHECK: ret <4 x float> [[TMP0]] 21421float32x4_t test_vreinterpretq_f32_p16(poly16x8_t a) { 21422 return vreinterpretq_f32_p16(a); 21423} 21424 21425// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_p64(<2 x i64> %a) #0 { 21426// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float> 21427// CHECK: ret <4 x float> [[TMP0]] 21428float32x4_t test_vreinterpretq_f32_p64(poly64x2_t a) { 21429 return vreinterpretq_f32_p64(a); 21430} 21431 21432// CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_s8(<16 x i8> %a) #0 { 21433// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double> 21434// CHECK: ret <2 x double> [[TMP0]] 21435float64x2_t test_vreinterpretq_f64_s8(int8x16_t a) { 21436 return vreinterpretq_f64_s8(a); 21437} 21438 21439// CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_s16(<8 x i16> %a) #0 { 21440// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double> 21441// CHECK: ret <2 x double> [[TMP0]] 21442float64x2_t test_vreinterpretq_f64_s16(int16x8_t a) { 21443 return vreinterpretq_f64_s16(a); 21444} 21445 21446// CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_s32(<4 x i32> %a) #0 { 21447// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x double> 21448// CHECK: ret <2 x double> [[TMP0]] 21449float64x2_t test_vreinterpretq_f64_s32(int32x4_t a) { 21450 return vreinterpretq_f64_s32(a); 21451} 21452 21453// CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_s64(<2 x i64> %a) #0 { 21454// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double> 21455// CHECK: ret <2 x double> [[TMP0]] 21456float64x2_t test_vreinterpretq_f64_s64(int64x2_t a) { 21457 return vreinterpretq_f64_s64(a); 21458} 21459 21460// CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_u8(<16 x i8> %a) #0 { 21461// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double> 21462// CHECK: ret <2 x double> [[TMP0]] 21463float64x2_t test_vreinterpretq_f64_u8(uint8x16_t a) { 21464 return vreinterpretq_f64_u8(a); 21465} 21466 21467// CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_u16(<8 x i16> %a) #0 { 21468// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double> 21469// CHECK: ret <2 x double> [[TMP0]] 21470float64x2_t test_vreinterpretq_f64_u16(uint16x8_t a) { 21471 return vreinterpretq_f64_u16(a); 21472} 21473 21474// CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_u32(<4 x i32> %a) #0 { 21475// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x double> 21476// CHECK: ret <2 x double> [[TMP0]] 21477float64x2_t test_vreinterpretq_f64_u32(uint32x4_t a) { 21478 return vreinterpretq_f64_u32(a); 21479} 21480 21481// CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_u64(<2 x i64> %a) #0 { 21482// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double> 21483// CHECK: ret <2 x double> [[TMP0]] 21484float64x2_t test_vreinterpretq_f64_u64(uint64x2_t a) { 21485 return vreinterpretq_f64_u64(a); 21486} 21487 21488// CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_f16(<8 x half> %a) #0 { 21489// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x double> 21490// CHECK: ret <2 x double> [[TMP0]] 21491float64x2_t test_vreinterpretq_f64_f16(float16x8_t a) { 21492 return vreinterpretq_f64_f16(a); 21493} 21494 21495// CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_f32(<4 x float> %a) #0 { 21496// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x double> 21497// CHECK: ret <2 x double> [[TMP0]] 21498float64x2_t test_vreinterpretq_f64_f32(float32x4_t a) { 21499 return vreinterpretq_f64_f32(a); 21500} 21501 21502// CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_p8(<16 x i8> %a) #0 { 21503// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double> 21504// CHECK: ret <2 x double> [[TMP0]] 21505float64x2_t test_vreinterpretq_f64_p8(poly8x16_t a) { 21506 return vreinterpretq_f64_p8(a); 21507} 21508 21509// CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_p16(<8 x i16> %a) #0 { 21510// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double> 21511// CHECK: ret <2 x double> [[TMP0]] 21512float64x2_t test_vreinterpretq_f64_p16(poly16x8_t a) { 21513 return vreinterpretq_f64_p16(a); 21514} 21515 21516// CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_p64(<2 x i64> %a) #0 { 21517// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double> 21518// CHECK: ret <2 x double> [[TMP0]] 21519float64x2_t test_vreinterpretq_f64_p64(poly64x2_t a) { 21520 return vreinterpretq_f64_p64(a); 21521} 21522 21523// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s8(<16 x i8> %a) #0 { 21524// CHECK: ret <16 x i8> %a 21525poly8x16_t test_vreinterpretq_p8_s8(int8x16_t a) { 21526 return vreinterpretq_p8_s8(a); 21527} 21528 21529// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s16(<8 x i16> %a) #0 { 21530// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 21531// CHECK: ret <16 x i8> [[TMP0]] 21532poly8x16_t test_vreinterpretq_p8_s16(int16x8_t a) { 21533 return vreinterpretq_p8_s16(a); 21534} 21535 21536// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s32(<4 x i32> %a) #0 { 21537// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 21538// CHECK: ret <16 x i8> [[TMP0]] 21539poly8x16_t test_vreinterpretq_p8_s32(int32x4_t a) { 21540 return vreinterpretq_p8_s32(a); 21541} 21542 21543// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s64(<2 x i64> %a) #0 { 21544// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 21545// CHECK: ret <16 x i8> [[TMP0]] 21546poly8x16_t test_vreinterpretq_p8_s64(int64x2_t a) { 21547 return vreinterpretq_p8_s64(a); 21548} 21549 21550// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u8(<16 x i8> %a) #0 { 21551// CHECK: ret <16 x i8> %a 21552poly8x16_t test_vreinterpretq_p8_u8(uint8x16_t a) { 21553 return vreinterpretq_p8_u8(a); 21554} 21555 21556// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u16(<8 x i16> %a) #0 { 21557// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 21558// CHECK: ret <16 x i8> [[TMP0]] 21559poly8x16_t test_vreinterpretq_p8_u16(uint16x8_t a) { 21560 return vreinterpretq_p8_u16(a); 21561} 21562 21563// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u32(<4 x i32> %a) #0 { 21564// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 21565// CHECK: ret <16 x i8> [[TMP0]] 21566poly8x16_t test_vreinterpretq_p8_u32(uint32x4_t a) { 21567 return vreinterpretq_p8_u32(a); 21568} 21569 21570// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u64(<2 x i64> %a) #0 { 21571// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 21572// CHECK: ret <16 x i8> [[TMP0]] 21573poly8x16_t test_vreinterpretq_p8_u64(uint64x2_t a) { 21574 return vreinterpretq_p8_u64(a); 21575} 21576 21577// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_f16(<8 x half> %a) #0 { 21578// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8> 21579// CHECK: ret <16 x i8> [[TMP0]] 21580poly8x16_t test_vreinterpretq_p8_f16(float16x8_t a) { 21581 return vreinterpretq_p8_f16(a); 21582} 21583 21584// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_f32(<4 x float> %a) #0 { 21585// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 21586// CHECK: ret <16 x i8> [[TMP0]] 21587poly8x16_t test_vreinterpretq_p8_f32(float32x4_t a) { 21588 return vreinterpretq_p8_f32(a); 21589} 21590 21591// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_f64(<2 x double> %a) #0 { 21592// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 21593// CHECK: ret <16 x i8> [[TMP0]] 21594poly8x16_t test_vreinterpretq_p8_f64(float64x2_t a) { 21595 return vreinterpretq_p8_f64(a); 21596} 21597 21598// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_p16(<8 x i16> %a) #0 { 21599// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 21600// CHECK: ret <16 x i8> [[TMP0]] 21601poly8x16_t test_vreinterpretq_p8_p16(poly16x8_t a) { 21602 return vreinterpretq_p8_p16(a); 21603} 21604 21605// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_p64(<2 x i64> %a) #0 { 21606// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 21607// CHECK: ret <16 x i8> [[TMP0]] 21608poly8x16_t test_vreinterpretq_p8_p64(poly64x2_t a) { 21609 return vreinterpretq_p8_p64(a); 21610} 21611 21612// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s8(<16 x i8> %a) #0 { 21613// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> 21614// CHECK: ret <8 x i16> [[TMP0]] 21615poly16x8_t test_vreinterpretq_p16_s8(int8x16_t a) { 21616 return vreinterpretq_p16_s8(a); 21617} 21618 21619// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s16(<8 x i16> %a) #0 { 21620// CHECK: ret <8 x i16> %a 21621poly16x8_t test_vreinterpretq_p16_s16(int16x8_t a) { 21622 return vreinterpretq_p16_s16(a); 21623} 21624 21625// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s32(<4 x i32> %a) #0 { 21626// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16> 21627// CHECK: ret <8 x i16> [[TMP0]] 21628poly16x8_t test_vreinterpretq_p16_s32(int32x4_t a) { 21629 return vreinterpretq_p16_s32(a); 21630} 21631 21632// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s64(<2 x i64> %a) #0 { 21633// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> 21634// CHECK: ret <8 x i16> [[TMP0]] 21635poly16x8_t test_vreinterpretq_p16_s64(int64x2_t a) { 21636 return vreinterpretq_p16_s64(a); 21637} 21638 21639// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u8(<16 x i8> %a) #0 { 21640// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> 21641// CHECK: ret <8 x i16> [[TMP0]] 21642poly16x8_t test_vreinterpretq_p16_u8(uint8x16_t a) { 21643 return vreinterpretq_p16_u8(a); 21644} 21645 21646// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u16(<8 x i16> %a) #0 { 21647// CHECK: ret <8 x i16> %a 21648poly16x8_t test_vreinterpretq_p16_u16(uint16x8_t a) { 21649 return vreinterpretq_p16_u16(a); 21650} 21651 21652// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u32(<4 x i32> %a) #0 { 21653// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16> 21654// CHECK: ret <8 x i16> [[TMP0]] 21655poly16x8_t test_vreinterpretq_p16_u32(uint32x4_t a) { 21656 return vreinterpretq_p16_u32(a); 21657} 21658 21659// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u64(<2 x i64> %a) #0 { 21660// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> 21661// CHECK: ret <8 x i16> [[TMP0]] 21662poly16x8_t test_vreinterpretq_p16_u64(uint64x2_t a) { 21663 return vreinterpretq_p16_u64(a); 21664} 21665 21666// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_f16(<8 x half> %a) #0 { 21667// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16> 21668// CHECK: ret <8 x i16> [[TMP0]] 21669poly16x8_t test_vreinterpretq_p16_f16(float16x8_t a) { 21670 return vreinterpretq_p16_f16(a); 21671} 21672 21673// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_f32(<4 x float> %a) #0 { 21674// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16> 21675// CHECK: ret <8 x i16> [[TMP0]] 21676poly16x8_t test_vreinterpretq_p16_f32(float32x4_t a) { 21677 return vreinterpretq_p16_f32(a); 21678} 21679 21680// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_f64(<2 x double> %a) #0 { 21681// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16> 21682// CHECK: ret <8 x i16> [[TMP0]] 21683poly16x8_t test_vreinterpretq_p16_f64(float64x2_t a) { 21684 return vreinterpretq_p16_f64(a); 21685} 21686 21687// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_p8(<16 x i8> %a) #0 { 21688// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> 21689// CHECK: ret <8 x i16> [[TMP0]] 21690poly16x8_t test_vreinterpretq_p16_p8(poly8x16_t a) { 21691 return vreinterpretq_p16_p8(a); 21692} 21693 21694// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_p64(<2 x i64> %a) #0 { 21695// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> 21696// CHECK: ret <8 x i16> [[TMP0]] 21697poly16x8_t test_vreinterpretq_p16_p64(poly64x2_t a) { 21698 return vreinterpretq_p16_p64(a); 21699} 21700 21701// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_s8(<16 x i8> %a) #0 { 21702// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> 21703// CHECK: ret <2 x i64> [[TMP0]] 21704poly64x2_t test_vreinterpretq_p64_s8(int8x16_t a) { 21705 return vreinterpretq_p64_s8(a); 21706} 21707 21708// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_s16(<8 x i16> %a) #0 { 21709// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> 21710// CHECK: ret <2 x i64> [[TMP0]] 21711poly64x2_t test_vreinterpretq_p64_s16(int16x8_t a) { 21712 return vreinterpretq_p64_s16(a); 21713} 21714 21715// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_s32(<4 x i32> %a) #0 { 21716// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64> 21717// CHECK: ret <2 x i64> [[TMP0]] 21718poly64x2_t test_vreinterpretq_p64_s32(int32x4_t a) { 21719 return vreinterpretq_p64_s32(a); 21720} 21721 21722// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_s64(<2 x i64> %a) #0 { 21723// CHECK: ret <2 x i64> %a 21724poly64x2_t test_vreinterpretq_p64_s64(int64x2_t a) { 21725 return vreinterpretq_p64_s64(a); 21726} 21727 21728// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_u8(<16 x i8> %a) #0 { 21729// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> 21730// CHECK: ret <2 x i64> [[TMP0]] 21731poly64x2_t test_vreinterpretq_p64_u8(uint8x16_t a) { 21732 return vreinterpretq_p64_u8(a); 21733} 21734 21735// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_u16(<8 x i16> %a) #0 { 21736// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> 21737// CHECK: ret <2 x i64> [[TMP0]] 21738poly64x2_t test_vreinterpretq_p64_u16(uint16x8_t a) { 21739 return vreinterpretq_p64_u16(a); 21740} 21741 21742// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_u32(<4 x i32> %a) #0 { 21743// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64> 21744// CHECK: ret <2 x i64> [[TMP0]] 21745poly64x2_t test_vreinterpretq_p64_u32(uint32x4_t a) { 21746 return vreinterpretq_p64_u32(a); 21747} 21748 21749// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_u64(<2 x i64> %a) #0 { 21750// CHECK: ret <2 x i64> %a 21751poly64x2_t test_vreinterpretq_p64_u64(uint64x2_t a) { 21752 return vreinterpretq_p64_u64(a); 21753} 21754 21755// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_f16(<8 x half> %a) #0 { 21756// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64> 21757// CHECK: ret <2 x i64> [[TMP0]] 21758poly64x2_t test_vreinterpretq_p64_f16(float16x8_t a) { 21759 return vreinterpretq_p64_f16(a); 21760} 21761 21762// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_f32(<4 x float> %a) #0 { 21763// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64> 21764// CHECK: ret <2 x i64> [[TMP0]] 21765poly64x2_t test_vreinterpretq_p64_f32(float32x4_t a) { 21766 return vreinterpretq_p64_f32(a); 21767} 21768 21769// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_f64(<2 x double> %a) #0 { 21770// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64> 21771// CHECK: ret <2 x i64> [[TMP0]] 21772poly64x2_t test_vreinterpretq_p64_f64(float64x2_t a) { 21773 return vreinterpretq_p64_f64(a); 21774} 21775 21776// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_p8(<16 x i8> %a) #0 { 21777// CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> 21778// CHECK: ret <2 x i64> [[TMP0]] 21779poly64x2_t test_vreinterpretq_p64_p8(poly8x16_t a) { 21780 return vreinterpretq_p64_p8(a); 21781} 21782 21783// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_p16(<8 x i16> %a) #0 { 21784// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> 21785// CHECK: ret <2 x i64> [[TMP0]] 21786poly64x2_t test_vreinterpretq_p64_p16(poly16x8_t a) { 21787 return vreinterpretq_p64_p16(a); 21788} 21789 21790// CHECK-LABEL: define float @test_vabds_f32(float %a, float %b) #0 { 21791// CHECK: [[VABDS_F32_I:%.*]] = call float @llvm.aarch64.sisd.fabd.f32(float %a, float %b) #4 21792// CHECK: ret float [[VABDS_F32_I]] 21793float32_t test_vabds_f32(float32_t a, float32_t b) { 21794 return vabds_f32(a, b); 21795} 21796 21797// CHECK-LABEL: define double @test_vabdd_f64(double %a, double %b) #0 { 21798// CHECK: [[VABDD_F64_I:%.*]] = call double @llvm.aarch64.sisd.fabd.f64(double %a, double %b) #4 21799// CHECK: ret double [[VABDD_F64_I]] 21800float64_t test_vabdd_f64(float64_t a, float64_t b) { 21801 return vabdd_f64(a, b); 21802} 21803 21804// CHECK-LABEL: define <1 x i64> @test_vuqadd_s64(<1 x i64> %a, <1 x i64> %b) #0 { 21805// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 21806// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 21807// CHECK: [[VUQADD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 21808// CHECK: [[VUQADD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 21809// CHECK: [[VUQADD2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64> [[VUQADD_I]], <1 x i64> [[VUQADD1_I]]) #4 21810// CHECK: ret <1 x i64> [[VUQADD2_I]] 21811int64x1_t test_vuqadd_s64(int64x1_t a, uint64x1_t b) { 21812 return vuqadd_s64(a, b); 21813} 21814 21815// CHECK-LABEL: define <1 x i64> @test_vsqadd_u64(<1 x i64> %a, <1 x i64> %b) #0 { 21816// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 21817// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 21818// CHECK: [[VSQADD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 21819// CHECK: [[VSQADD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 21820// CHECK: [[VSQADD2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64> [[VSQADD_I]], <1 x i64> [[VSQADD1_I]]) #4 21821// CHECK: ret <1 x i64> [[VSQADD2_I]] 21822uint64x1_t test_vsqadd_u64(uint64x1_t a, int64x1_t b) { 21823 return vsqadd_u64(a, b); 21824} 21825 21826// CHECK-LABEL: define <8 x i8> @test_vsqadd_u8(<8 x i8> %a, <8 x i8> %b) #0 { 21827// CHECK: [[VSQADD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4 21828// CHECK: ret <8 x i8> [[VSQADD_I]] 21829uint8x8_t test_vsqadd_u8(uint8x8_t a, int8x8_t b) { 21830 return vsqadd_u8(a, b); 21831} 21832 21833// CHECK-LABEL: define <16 x i8> @test_vsqaddq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 21834// CHECK: [[VSQADD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.usqadd.v16i8(<16 x i8> %a, <16 x i8> %b) #4 21835// CHECK: ret <16 x i8> [[VSQADD_I]] 21836uint8x16_t test_vsqaddq_u8(uint8x16_t a, int8x16_t b) { 21837 return vsqaddq_u8(a, b); 21838} 21839 21840// CHECK-LABEL: define <4 x i16> @test_vsqadd_u16(<4 x i16> %a, <4 x i16> %b) #0 { 21841// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 21842// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 21843// CHECK: [[VSQADD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 21844// CHECK: [[VSQADD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 21845// CHECK: [[VSQADD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> [[VSQADD_I]], <4 x i16> [[VSQADD1_I]]) #4 21846// CHECK: ret <4 x i16> [[VSQADD2_I]] 21847uint16x4_t test_vsqadd_u16(uint16x4_t a, int16x4_t b) { 21848 return vsqadd_u16(a, b); 21849} 21850 21851// CHECK-LABEL: define <8 x i16> @test_vsqaddq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 21852// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 21853// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 21854// CHECK: [[VSQADD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 21855// CHECK: [[VSQADD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 21856// CHECK: [[VSQADD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.usqadd.v8i16(<8 x i16> [[VSQADD_I]], <8 x i16> [[VSQADD1_I]]) #4 21857// CHECK: ret <8 x i16> [[VSQADD2_I]] 21858uint16x8_t test_vsqaddq_u16(uint16x8_t a, int16x8_t b) { 21859 return vsqaddq_u16(a, b); 21860} 21861 21862// CHECK-LABEL: define <2 x i32> @test_vsqadd_u32(<2 x i32> %a, <2 x i32> %b) #0 { 21863// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 21864// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 21865// CHECK: [[VSQADD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 21866// CHECK: [[VSQADD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 21867// CHECK: [[VSQADD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.usqadd.v2i32(<2 x i32> [[VSQADD_I]], <2 x i32> [[VSQADD1_I]]) #4 21868// CHECK: ret <2 x i32> [[VSQADD2_I]] 21869uint32x2_t test_vsqadd_u32(uint32x2_t a, int32x2_t b) { 21870 return vsqadd_u32(a, b); 21871} 21872 21873// CHECK-LABEL: define <4 x i32> @test_vsqaddq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 21874// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 21875// CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 21876// CHECK: [[VSQADD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 21877// CHECK: [[VSQADD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 21878// CHECK: [[VSQADD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.usqadd.v4i32(<4 x i32> [[VSQADD_I]], <4 x i32> [[VSQADD1_I]]) #4 21879// CHECK: ret <4 x i32> [[VSQADD2_I]] 21880uint32x4_t test_vsqaddq_u32(uint32x4_t a, int32x4_t b) { 21881 return vsqaddq_u32(a, b); 21882} 21883 21884// CHECK-LABEL: define <2 x i64> @test_vsqaddq_u64(<2 x i64> %a, <2 x i64> %b) #0 { 21885// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 21886// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 21887// CHECK: [[VSQADD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 21888// CHECK: [[VSQADD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 21889// CHECK: [[VSQADD2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.usqadd.v2i64(<2 x i64> [[VSQADD_I]], <2 x i64> [[VSQADD1_I]]) #4 21890// CHECK: ret <2 x i64> [[VSQADD2_I]] 21891uint64x2_t test_vsqaddq_u64(uint64x2_t a, int64x2_t b) { 21892 return vsqaddq_u64(a, b); 21893} 21894 21895// CHECK-LABEL: define <1 x i64> @test_vabs_s64(<1 x i64> %a) #0 { 21896// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 21897// CHECK: [[VABS_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 21898// CHECK: [[VABS1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.abs.v1i64(<1 x i64> [[VABS_I]]) #4 21899// CHECK: ret <1 x i64> [[VABS1_I]] 21900int64x1_t test_vabs_s64(int64x1_t a) { 21901 return vabs_s64(a); 21902} 21903 21904// CHECK-LABEL: define <1 x i64> @test_vqabs_s64(<1 x i64> %a) #0 { 21905// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 21906// CHECK: [[VQABS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 21907// CHECK: [[VQABS_V1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqabs.v1i64(<1 x i64> [[VQABS_V_I]]) #4 21908// CHECK: [[VQABS_V2_I:%.*]] = bitcast <1 x i64> [[VQABS_V1_I]] to <8 x i8> 21909// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQABS_V2_I]] to <1 x i64> 21910// CHECK: ret <1 x i64> [[TMP1]] 21911int64x1_t test_vqabs_s64(int64x1_t a) { 21912 return vqabs_s64(a); 21913} 21914 21915// CHECK-LABEL: define <1 x i64> @test_vqneg_s64(<1 x i64> %a) #0 { 21916// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 21917// CHECK: [[VQNEG_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 21918// CHECK: [[VQNEG_V1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqneg.v1i64(<1 x i64> [[VQNEG_V_I]]) #4 21919// CHECK: [[VQNEG_V2_I:%.*]] = bitcast <1 x i64> [[VQNEG_V1_I]] to <8 x i8> 21920// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQNEG_V2_I]] to <1 x i64> 21921// CHECK: ret <1 x i64> [[TMP1]] 21922int64x1_t test_vqneg_s64(int64x1_t a) { 21923 return vqneg_s64(a); 21924} 21925 21926// CHECK-LABEL: define <1 x i64> @test_vneg_s64(<1 x i64> %a) #0 { 21927// CHECK: [[SUB_I:%.*]] = sub <1 x i64> zeroinitializer, %a 21928// CHECK: ret <1 x i64> [[SUB_I]] 21929int64x1_t test_vneg_s64(int64x1_t a) { 21930 return vneg_s64(a); 21931} 21932 21933// CHECK-LABEL: define float @test_vaddv_f32(<2 x float> %a) #0 { 21934// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 21935// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 21936// CHECK: [[VADDV_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v2f32(<2 x float> [[TMP1]]) #4 21937// CHECK: ret float [[VADDV_F32_I]] 21938float32_t test_vaddv_f32(float32x2_t a) { 21939 return vaddv_f32(a); 21940} 21941 21942// CHECK-LABEL: define float @test_vaddvq_f32(<4 x float> %a) #0 { 21943// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 21944// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 21945// CHECK: [[VADDVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v4f32(<4 x float> [[TMP1]]) #4 21946// CHECK: ret float [[VADDVQ_F32_I]] 21947float32_t test_vaddvq_f32(float32x4_t a) { 21948 return vaddvq_f32(a); 21949} 21950 21951// CHECK-LABEL: define double @test_vaddvq_f64(<2 x double> %a) #0 { 21952// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 21953// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 21954// CHECK: [[VADDVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.faddv.f64.v2f64(<2 x double> [[TMP1]]) #4 21955// CHECK: ret double [[VADDVQ_F64_I]] 21956float64_t test_vaddvq_f64(float64x2_t a) { 21957 return vaddvq_f64(a); 21958} 21959 21960// CHECK-LABEL: define float @test_vmaxv_f32(<2 x float> %a) #0 { 21961// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 21962// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 21963// CHECK: [[VMAXV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> [[TMP1]]) #4 21964// CHECK: ret float [[VMAXV_F32_I]] 21965float32_t test_vmaxv_f32(float32x2_t a) { 21966 return vmaxv_f32(a); 21967} 21968 21969// CHECK-LABEL: define double @test_vmaxvq_f64(<2 x double> %a) #0 { 21970// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 21971// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 21972// CHECK: [[VMAXVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> [[TMP1]]) #4 21973// CHECK: ret double [[VMAXVQ_F64_I]] 21974float64_t test_vmaxvq_f64(float64x2_t a) { 21975 return vmaxvq_f64(a); 21976} 21977 21978// CHECK-LABEL: define float @test_vminv_f32(<2 x float> %a) #0 { 21979// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 21980// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 21981// CHECK: [[VMINV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> [[TMP1]]) #4 21982// CHECK: ret float [[VMINV_F32_I]] 21983float32_t test_vminv_f32(float32x2_t a) { 21984 return vminv_f32(a); 21985} 21986 21987// CHECK-LABEL: define double @test_vminvq_f64(<2 x double> %a) #0 { 21988// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 21989// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 21990// CHECK: [[VMINVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> [[TMP1]]) #4 21991// CHECK: ret double [[VMINVQ_F64_I]] 21992float64_t test_vminvq_f64(float64x2_t a) { 21993 return vminvq_f64(a); 21994} 21995 21996// CHECK-LABEL: define double @test_vmaxnmvq_f64(<2 x double> %a) #0 { 21997// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 21998// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 21999// CHECK: [[VMAXNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> [[TMP1]]) #4 22000// CHECK: ret double [[VMAXNMVQ_F64_I]] 22001float64_t test_vmaxnmvq_f64(float64x2_t a) { 22002 return vmaxnmvq_f64(a); 22003} 22004 22005// CHECK-LABEL: define float @test_vmaxnmv_f32(<2 x float> %a) #0 { 22006// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 22007// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 22008// CHECK: [[VMAXNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> [[TMP1]]) #4 22009// CHECK: ret float [[VMAXNMV_F32_I]] 22010float32_t test_vmaxnmv_f32(float32x2_t a) { 22011 return vmaxnmv_f32(a); 22012} 22013 22014// CHECK-LABEL: define double @test_vminnmvq_f64(<2 x double> %a) #0 { 22015// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 22016// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 22017// CHECK: [[VMINNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> [[TMP1]]) #4 22018// CHECK: ret double [[VMINNMVQ_F64_I]] 22019float64_t test_vminnmvq_f64(float64x2_t a) { 22020 return vminnmvq_f64(a); 22021} 22022 22023// CHECK-LABEL: define float @test_vminnmv_f32(<2 x float> %a) #0 { 22024// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 22025// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 22026// CHECK: [[VMINNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> [[TMP1]]) #4 22027// CHECK: ret float [[VMINNMV_F32_I]] 22028float32_t test_vminnmv_f32(float32x2_t a) { 22029 return vminnmv_f32(a); 22030} 22031 22032// CHECK-LABEL: define <2 x i64> @test_vpaddq_s64(<2 x i64> %a, <2 x i64> %b) #0 { 22033// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 22034// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 22035// CHECK: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 22036// CHECK: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 22037// CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[VPADDQ_V_I]], <2 x i64> [[VPADDQ_V1_I]]) #4 22038// CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8> 22039// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <2 x i64> 22040// CHECK: ret <2 x i64> [[TMP2]] 22041int64x2_t test_vpaddq_s64(int64x2_t a, int64x2_t b) { 22042 return vpaddq_s64(a, b); 22043} 22044 22045// CHECK-LABEL: define <2 x i64> @test_vpaddq_u64(<2 x i64> %a, <2 x i64> %b) #0 { 22046// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 22047// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 22048// CHECK: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 22049// CHECK: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 22050// CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[VPADDQ_V_I]], <2 x i64> [[VPADDQ_V1_I]]) #4 22051// CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8> 22052// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <2 x i64> 22053// CHECK: ret <2 x i64> [[TMP2]] 22054uint64x2_t test_vpaddq_u64(uint64x2_t a, uint64x2_t b) { 22055 return vpaddq_u64(a, b); 22056} 22057 22058// CHECK-LABEL: define i64 @test_vpaddd_u64(<2 x i64> %a) #0 { 22059// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 22060// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 22061// CHECK: [[VPADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> [[TMP1]]) #4 22062// CHECK: ret i64 [[VPADDD_U64_I]] 22063uint64_t test_vpaddd_u64(uint64x2_t a) { 22064 return vpaddd_u64(a); 22065} 22066 22067// CHECK-LABEL: define i64 @test_vaddvq_s64(<2 x i64> %a) #0 { 22068// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 22069// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 22070// CHECK: [[VADDVQ_S64_I:%.*]] = call i64 @llvm.aarch64.neon.saddv.i64.v2i64(<2 x i64> [[TMP1]]) #4 22071// CHECK: ret i64 [[VADDVQ_S64_I]] 22072int64_t test_vaddvq_s64(int64x2_t a) { 22073 return vaddvq_s64(a); 22074} 22075 22076// CHECK-LABEL: define i64 @test_vaddvq_u64(<2 x i64> %a) #0 { 22077// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 22078// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 22079// CHECK: [[VADDVQ_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> [[TMP1]]) #4 22080// CHECK: ret i64 [[VADDVQ_U64_I]] 22081uint64_t test_vaddvq_u64(uint64x2_t a) { 22082 return vaddvq_u64(a); 22083} 22084 22085// CHECK-LABEL: define <1 x double> @test_vadd_f64(<1 x double> %a, <1 x double> %b) #0 { 22086// CHECK: [[ADD_I:%.*]] = fadd <1 x double> %a, %b 22087// CHECK: ret <1 x double> [[ADD_I]] 22088float64x1_t test_vadd_f64(float64x1_t a, float64x1_t b) { 22089 return vadd_f64(a, b); 22090} 22091 22092// CHECK-LABEL: define <1 x double> @test_vmul_f64(<1 x double> %a, <1 x double> %b) #0 { 22093// CHECK: [[MUL_I:%.*]] = fmul <1 x double> %a, %b 22094// CHECK: ret <1 x double> [[MUL_I]] 22095float64x1_t test_vmul_f64(float64x1_t a, float64x1_t b) { 22096 return vmul_f64(a, b); 22097} 22098 22099// CHECK-LABEL: define <1 x double> @test_vdiv_f64(<1 x double> %a, <1 x double> %b) #0 { 22100// CHECK: [[DIV_I:%.*]] = fdiv <1 x double> %a, %b 22101// CHECK: ret <1 x double> [[DIV_I]] 22102float64x1_t test_vdiv_f64(float64x1_t a, float64x1_t b) { 22103 return vdiv_f64(a, b); 22104} 22105 22106// CHECK-LABEL: define <1 x double> @test_vmla_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) #0 { 22107// CHECK: [[MUL_I:%.*]] = fmul <1 x double> %b, %c 22108// CHECK: [[ADD_I:%.*]] = fadd <1 x double> %a, [[MUL_I]] 22109// CHECK: ret <1 x double> [[ADD_I]] 22110float64x1_t test_vmla_f64(float64x1_t a, float64x1_t b, float64x1_t c) { 22111 return vmla_f64(a, b, c); 22112} 22113 22114// CHECK-LABEL: define <1 x double> @test_vmls_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) #0 { 22115// CHECK: [[MUL_I:%.*]] = fmul <1 x double> %b, %c 22116// CHECK: [[SUB_I:%.*]] = fsub <1 x double> %a, [[MUL_I]] 22117// CHECK: ret <1 x double> [[SUB_I]] 22118float64x1_t test_vmls_f64(float64x1_t a, float64x1_t b, float64x1_t c) { 22119 return vmls_f64(a, b, c); 22120} 22121 22122// CHECK-LABEL: define <1 x double> @test_vfma_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) #0 { 22123// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22124// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 22125// CHECK: [[TMP2:%.*]] = bitcast <1 x double> %c to <8 x i8> 22126// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22127// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> 22128// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double> 22129// CHECK: [[TMP6:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[TMP4]], <1 x double> [[TMP5]], <1 x double> [[TMP3]]) #4 22130// CHECK: ret <1 x double> [[TMP6]] 22131float64x1_t test_vfma_f64(float64x1_t a, float64x1_t b, float64x1_t c) { 22132 return vfma_f64(a, b, c); 22133} 22134 22135// CHECK-LABEL: define <1 x double> @test_vfms_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) #0 { 22136// CHECK: [[SUB_I:%.*]] = fsub <1 x double> <double -0.000000e+00>, %b 22137// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22138// CHECK: [[TMP1:%.*]] = bitcast <1 x double> [[SUB_I]] to <8 x i8> 22139// CHECK: [[TMP2:%.*]] = bitcast <1 x double> %c to <8 x i8> 22140// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22141// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> 22142// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double> 22143// CHECK: [[TMP6:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[TMP4]], <1 x double> [[TMP5]], <1 x double> [[TMP3]]) #4 22144// CHECK: ret <1 x double> [[TMP6]] 22145float64x1_t test_vfms_f64(float64x1_t a, float64x1_t b, float64x1_t c) { 22146 return vfms_f64(a, b, c); 22147} 22148 22149// CHECK-LABEL: define <1 x double> @test_vsub_f64(<1 x double> %a, <1 x double> %b) #0 { 22150// CHECK: [[SUB_I:%.*]] = fsub <1 x double> %a, %b 22151// CHECK: ret <1 x double> [[SUB_I]] 22152float64x1_t test_vsub_f64(float64x1_t a, float64x1_t b) { 22153 return vsub_f64(a, b); 22154} 22155 22156// CHECK-LABEL: define <1 x double> @test_vabd_f64(<1 x double> %a, <1 x double> %b) #0 { 22157// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22158// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 22159// CHECK: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22160// CHECK: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> 22161// CHECK: [[VABD2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fabd.v1f64(<1 x double> [[VABD_I]], <1 x double> [[VABD1_I]]) #4 22162// CHECK: ret <1 x double> [[VABD2_I]] 22163float64x1_t test_vabd_f64(float64x1_t a, float64x1_t b) { 22164 return vabd_f64(a, b); 22165} 22166 22167// CHECK-LABEL: define <1 x double> @test_vmax_f64(<1 x double> %a, <1 x double> %b) #0 { 22168// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22169// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 22170// CHECK: [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22171// CHECK: [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> 22172// CHECK: [[VMAX2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmax.v1f64(<1 x double> [[VMAX_I]], <1 x double> [[VMAX1_I]]) #4 22173// CHECK: ret <1 x double> [[VMAX2_I]] 22174float64x1_t test_vmax_f64(float64x1_t a, float64x1_t b) { 22175 return vmax_f64(a, b); 22176} 22177 22178// CHECK-LABEL: define <1 x double> @test_vmin_f64(<1 x double> %a, <1 x double> %b) #0 { 22179// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22180// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 22181// CHECK: [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22182// CHECK: [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> 22183// CHECK: [[VMIN2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmin.v1f64(<1 x double> [[VMIN_I]], <1 x double> [[VMIN1_I]]) #4 22184// CHECK: ret <1 x double> [[VMIN2_I]] 22185float64x1_t test_vmin_f64(float64x1_t a, float64x1_t b) { 22186 return vmin_f64(a, b); 22187} 22188 22189// CHECK-LABEL: define <1 x double> @test_vmaxnm_f64(<1 x double> %a, <1 x double> %b) #0 { 22190// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22191// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 22192// CHECK: [[VMAXNM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22193// CHECK: [[VMAXNM1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> 22194// CHECK: [[VMAXNM2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmaxnm.v1f64(<1 x double> [[VMAXNM_I]], <1 x double> [[VMAXNM1_I]]) #4 22195// CHECK: ret <1 x double> [[VMAXNM2_I]] 22196float64x1_t test_vmaxnm_f64(float64x1_t a, float64x1_t b) { 22197 return vmaxnm_f64(a, b); 22198} 22199 22200// CHECK-LABEL: define <1 x double> @test_vminnm_f64(<1 x double> %a, <1 x double> %b) #0 { 22201// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22202// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 22203// CHECK: [[VMINNM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22204// CHECK: [[VMINNM1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> 22205// CHECK: [[VMINNM2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fminnm.v1f64(<1 x double> [[VMINNM_I]], <1 x double> [[VMINNM1_I]]) #4 22206// CHECK: ret <1 x double> [[VMINNM2_I]] 22207float64x1_t test_vminnm_f64(float64x1_t a, float64x1_t b) { 22208 return vminnm_f64(a, b); 22209} 22210 22211// CHECK-LABEL: define <1 x double> @test_vabs_f64(<1 x double> %a) #0 { 22212// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22213// CHECK: [[VABS_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22214// CHECK: [[VABS1_I:%.*]] = call <1 x double> @llvm.fabs.v1f64(<1 x double> [[VABS_I]]) #4 22215// CHECK: ret <1 x double> [[VABS1_I]] 22216float64x1_t test_vabs_f64(float64x1_t a) { 22217 return vabs_f64(a); 22218} 22219 22220// CHECK-LABEL: define <1 x double> @test_vneg_f64(<1 x double> %a) #0 { 22221// CHECK: [[SUB_I:%.*]] = fsub <1 x double> <double -0.000000e+00>, %a 22222// CHECK: ret <1 x double> [[SUB_I]] 22223float64x1_t test_vneg_f64(float64x1_t a) { 22224 return vneg_f64(a); 22225} 22226 22227// CHECK-LABEL: define <1 x i64> @test_vcvt_s64_f64(<1 x double> %a) #0 { 22228// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22229// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22230// CHECK: [[TMP2:%.*]] = fptosi <1 x double> [[TMP1]] to <1 x i64> 22231// CHECK: ret <1 x i64> [[TMP2]] 22232int64x1_t test_vcvt_s64_f64(float64x1_t a) { 22233 return vcvt_s64_f64(a); 22234} 22235 22236// CHECK-LABEL: define <1 x i64> @test_vcvt_u64_f64(<1 x double> %a) #0 { 22237// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22238// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22239// CHECK: [[TMP2:%.*]] = fptoui <1 x double> [[TMP1]] to <1 x i64> 22240// CHECK: ret <1 x i64> [[TMP2]] 22241uint64x1_t test_vcvt_u64_f64(float64x1_t a) { 22242 return vcvt_u64_f64(a); 22243} 22244 22245// CHECK-LABEL: define <1 x i64> @test_vcvtn_s64_f64(<1 x double> %a) #0 { 22246// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22247// CHECK: [[VCVTN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22248// CHECK: [[VCVTN1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double> [[VCVTN_I]]) #4 22249// CHECK: ret <1 x i64> [[VCVTN1_I]] 22250int64x1_t test_vcvtn_s64_f64(float64x1_t a) { 22251 return vcvtn_s64_f64(a); 22252} 22253 22254// CHECK-LABEL: define <1 x i64> @test_vcvtn_u64_f64(<1 x double> %a) #0 { 22255// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22256// CHECK: [[VCVTN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22257// CHECK: [[VCVTN1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double> [[VCVTN_I]]) #4 22258// CHECK: ret <1 x i64> [[VCVTN1_I]] 22259uint64x1_t test_vcvtn_u64_f64(float64x1_t a) { 22260 return vcvtn_u64_f64(a); 22261} 22262 22263// CHECK-LABEL: define <1 x i64> @test_vcvtp_s64_f64(<1 x double> %a) #0 { 22264// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22265// CHECK: [[VCVTP_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22266// CHECK: [[VCVTP1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double> [[VCVTP_I]]) #4 22267// CHECK: ret <1 x i64> [[VCVTP1_I]] 22268int64x1_t test_vcvtp_s64_f64(float64x1_t a) { 22269 return vcvtp_s64_f64(a); 22270} 22271 22272// CHECK-LABEL: define <1 x i64> @test_vcvtp_u64_f64(<1 x double> %a) #0 { 22273// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22274// CHECK: [[VCVTP_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22275// CHECK: [[VCVTP1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double> [[VCVTP_I]]) #4 22276// CHECK: ret <1 x i64> [[VCVTP1_I]] 22277uint64x1_t test_vcvtp_u64_f64(float64x1_t a) { 22278 return vcvtp_u64_f64(a); 22279} 22280 22281// CHECK-LABEL: define <1 x i64> @test_vcvtm_s64_f64(<1 x double> %a) #0 { 22282// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22283// CHECK: [[VCVTM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22284// CHECK: [[VCVTM1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double> [[VCVTM_I]]) #4 22285// CHECK: ret <1 x i64> [[VCVTM1_I]] 22286int64x1_t test_vcvtm_s64_f64(float64x1_t a) { 22287 return vcvtm_s64_f64(a); 22288} 22289 22290// CHECK-LABEL: define <1 x i64> @test_vcvtm_u64_f64(<1 x double> %a) #0 { 22291// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22292// CHECK: [[VCVTM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22293// CHECK: [[VCVTM1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double> [[VCVTM_I]]) #4 22294// CHECK: ret <1 x i64> [[VCVTM1_I]] 22295uint64x1_t test_vcvtm_u64_f64(float64x1_t a) { 22296 return vcvtm_u64_f64(a); 22297} 22298 22299// CHECK-LABEL: define <1 x i64> @test_vcvta_s64_f64(<1 x double> %a) #0 { 22300// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22301// CHECK: [[VCVTA_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22302// CHECK: [[VCVTA1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double> [[VCVTA_I]]) #4 22303// CHECK: ret <1 x i64> [[VCVTA1_I]] 22304int64x1_t test_vcvta_s64_f64(float64x1_t a) { 22305 return vcvta_s64_f64(a); 22306} 22307 22308// CHECK-LABEL: define <1 x i64> @test_vcvta_u64_f64(<1 x double> %a) #0 { 22309// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22310// CHECK: [[VCVTA_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22311// CHECK: [[VCVTA1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double> [[VCVTA_I]]) #4 22312// CHECK: ret <1 x i64> [[VCVTA1_I]] 22313uint64x1_t test_vcvta_u64_f64(float64x1_t a) { 22314 return vcvta_u64_f64(a); 22315} 22316 22317// CHECK-LABEL: define <1 x double> @test_vcvt_f64_s64(<1 x i64> %a) #0 { 22318// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 22319// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 22320// CHECK: [[VCVT_I:%.*]] = sitofp <1 x i64> [[TMP1]] to <1 x double> 22321// CHECK: ret <1 x double> [[VCVT_I]] 22322float64x1_t test_vcvt_f64_s64(int64x1_t a) { 22323 return vcvt_f64_s64(a); 22324} 22325 22326// CHECK-LABEL: define <1 x double> @test_vcvt_f64_u64(<1 x i64> %a) #0 { 22327// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 22328// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 22329// CHECK: [[VCVT_I:%.*]] = uitofp <1 x i64> [[TMP1]] to <1 x double> 22330// CHECK: ret <1 x double> [[VCVT_I]] 22331float64x1_t test_vcvt_f64_u64(uint64x1_t a) { 22332 return vcvt_f64_u64(a); 22333} 22334 22335// CHECK-LABEL: define <1 x i64> @test_vcvt_n_s64_f64(<1 x double> %a) #0 { 22336// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22337// CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22338// CHECK: [[VCVT_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double> [[VCVT_N]], i32 64) 22339// CHECK: ret <1 x i64> [[VCVT_N1]] 22340int64x1_t test_vcvt_n_s64_f64(float64x1_t a) { 22341 return vcvt_n_s64_f64(a, 64); 22342} 22343 22344// CHECK-LABEL: define <1 x i64> @test_vcvt_n_u64_f64(<1 x double> %a) #0 { 22345// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22346// CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22347// CHECK: [[VCVT_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double> [[VCVT_N]], i32 64) 22348// CHECK: ret <1 x i64> [[VCVT_N1]] 22349uint64x1_t test_vcvt_n_u64_f64(float64x1_t a) { 22350 return vcvt_n_u64_f64(a, 64); 22351} 22352 22353// CHECK-LABEL: define <1 x double> @test_vcvt_n_f64_s64(<1 x i64> %a) #0 { 22354// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 22355// CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 22356// CHECK: [[VCVT_N1:%.*]] = call <1 x double> @llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64> [[VCVT_N]], i32 64) 22357// CHECK: ret <1 x double> [[VCVT_N1]] 22358float64x1_t test_vcvt_n_f64_s64(int64x1_t a) { 22359 return vcvt_n_f64_s64(a, 64); 22360} 22361 22362// CHECK-LABEL: define <1 x double> @test_vcvt_n_f64_u64(<1 x i64> %a) #0 { 22363// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 22364// CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 22365// CHECK: [[VCVT_N1:%.*]] = call <1 x double> @llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64> [[VCVT_N]], i32 64) 22366// CHECK: ret <1 x double> [[VCVT_N1]] 22367float64x1_t test_vcvt_n_f64_u64(uint64x1_t a) { 22368 return vcvt_n_f64_u64(a, 64); 22369} 22370 22371// CHECK-LABEL: define <1 x double> @test_vrndn_f64(<1 x double> %a) #0 { 22372// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22373// CHECK: [[VRNDN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22374// CHECK: [[VRNDN1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frintn.v1f64(<1 x double> [[VRNDN_I]]) #4 22375// CHECK: ret <1 x double> [[VRNDN1_I]] 22376float64x1_t test_vrndn_f64(float64x1_t a) { 22377 return vrndn_f64(a); 22378} 22379 22380// CHECK-LABEL: define <1 x double> @test_vrnda_f64(<1 x double> %a) #0 { 22381// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22382// CHECK: [[VRNDA_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22383// CHECK: [[VRNDA1_I:%.*]] = call <1 x double> @llvm.round.v1f64(<1 x double> [[VRNDA_I]]) #4 22384// CHECK: ret <1 x double> [[VRNDA1_I]] 22385float64x1_t test_vrnda_f64(float64x1_t a) { 22386 return vrnda_f64(a); 22387} 22388 22389// CHECK-LABEL: define <1 x double> @test_vrndp_f64(<1 x double> %a) #0 { 22390// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22391// CHECK: [[VRNDP_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22392// CHECK: [[VRNDP1_I:%.*]] = call <1 x double> @llvm.ceil.v1f64(<1 x double> [[VRNDP_I]]) #4 22393// CHECK: ret <1 x double> [[VRNDP1_I]] 22394float64x1_t test_vrndp_f64(float64x1_t a) { 22395 return vrndp_f64(a); 22396} 22397 22398// CHECK-LABEL: define <1 x double> @test_vrndm_f64(<1 x double> %a) #0 { 22399// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22400// CHECK: [[VRNDM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22401// CHECK: [[VRNDM1_I:%.*]] = call <1 x double> @llvm.floor.v1f64(<1 x double> [[VRNDM_I]]) #4 22402// CHECK: ret <1 x double> [[VRNDM1_I]] 22403float64x1_t test_vrndm_f64(float64x1_t a) { 22404 return vrndm_f64(a); 22405} 22406 22407// CHECK-LABEL: define <1 x double> @test_vrndx_f64(<1 x double> %a) #0 { 22408// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22409// CHECK: [[VRNDX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22410// CHECK: [[VRNDX1_I:%.*]] = call <1 x double> @llvm.rint.v1f64(<1 x double> [[VRNDX_I]]) #4 22411// CHECK: ret <1 x double> [[VRNDX1_I]] 22412float64x1_t test_vrndx_f64(float64x1_t a) { 22413 return vrndx_f64(a); 22414} 22415 22416// CHECK-LABEL: define <1 x double> @test_vrnd_f64(<1 x double> %a) #0 { 22417// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22418// CHECK: [[VRNDZ_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22419// CHECK: [[VRNDZ1_I:%.*]] = call <1 x double> @llvm.trunc.v1f64(<1 x double> [[VRNDZ_I]]) #4 22420// CHECK: ret <1 x double> [[VRNDZ1_I]] 22421float64x1_t test_vrnd_f64(float64x1_t a) { 22422 return vrnd_f64(a); 22423} 22424 22425// CHECK-LABEL: define <1 x double> @test_vrndi_f64(<1 x double> %a) #0 { 22426// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22427// CHECK: [[VRNDI_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22428// CHECK: [[VRNDI1_I:%.*]] = call <1 x double> @llvm.nearbyint.v1f64(<1 x double> [[VRNDI_I]]) #4 22429// CHECK: ret <1 x double> [[VRNDI1_I]] 22430float64x1_t test_vrndi_f64(float64x1_t a) { 22431 return vrndi_f64(a); 22432} 22433 22434// CHECK-LABEL: define <1 x double> @test_vrsqrte_f64(<1 x double> %a) #0 { 22435// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22436// CHECK: [[VRSQRTE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22437// CHECK: [[VRSQRTE_V1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frsqrte.v1f64(<1 x double> [[VRSQRTE_V_I]]) #4 22438// CHECK: ret <1 x double> [[VRSQRTE_V1_I]] 22439float64x1_t test_vrsqrte_f64(float64x1_t a) { 22440 return vrsqrte_f64(a); 22441} 22442 22443// CHECK-LABEL: define <1 x double> @test_vrecpe_f64(<1 x double> %a) #0 { 22444// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22445// CHECK: [[VRECPE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22446// CHECK: [[VRECPE_V1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frecpe.v1f64(<1 x double> [[VRECPE_V_I]]) #4 22447// CHECK: ret <1 x double> [[VRECPE_V1_I]] 22448float64x1_t test_vrecpe_f64(float64x1_t a) { 22449 return vrecpe_f64(a); 22450} 22451 22452// CHECK-LABEL: define <1 x double> @test_vsqrt_f64(<1 x double> %a) #0 { 22453// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22454// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22455// CHECK: [[VSQRT_I:%.*]] = call <1 x double> @llvm.sqrt.v1f64(<1 x double> [[TMP1]]) #4 22456// CHECK: ret <1 x double> [[VSQRT_I]] 22457float64x1_t test_vsqrt_f64(float64x1_t a) { 22458 return vsqrt_f64(a); 22459} 22460 22461// CHECK-LABEL: define <1 x double> @test_vrecps_f64(<1 x double> %a, <1 x double> %b) #0 { 22462// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22463// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 22464// CHECK: [[VRECPS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22465// CHECK: [[VRECPS_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> 22466// CHECK: [[VRECPS_V2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frecps.v1f64(<1 x double> [[VRECPS_V_I]], <1 x double> [[VRECPS_V1_I]]) #4 22467// CHECK: [[VRECPS_V3_I:%.*]] = bitcast <1 x double> [[VRECPS_V2_I]] to <8 x i8> 22468// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRECPS_V3_I]] to <1 x double> 22469// CHECK: ret <1 x double> [[TMP2]] 22470float64x1_t test_vrecps_f64(float64x1_t a, float64x1_t b) { 22471 return vrecps_f64(a, b); 22472} 22473 22474// CHECK-LABEL: define <1 x double> @test_vrsqrts_f64(<1 x double> %a, <1 x double> %b) #0 { 22475// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22476// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 22477// CHECK: [[VRSQRTS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22478// CHECK: [[VRSQRTS_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> 22479// CHECK: [[VRSQRTS_V2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frsqrts.v1f64(<1 x double> [[VRSQRTS_V_I]], <1 x double> [[VRSQRTS_V1_I]]) #4 22480// CHECK: [[VRSQRTS_V3_I:%.*]] = bitcast <1 x double> [[VRSQRTS_V2_I]] to <8 x i8> 22481// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSQRTS_V3_I]] to <1 x double> 22482// CHECK: ret <1 x double> [[TMP2]] 22483float64x1_t test_vrsqrts_f64(float64x1_t a, float64x1_t b) { 22484 return vrsqrts_f64(a, b); 22485} 22486 22487// CHECK-LABEL: define i32 @test_vminv_s32(<2 x i32> %a) #0 { 22488// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 22489// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 22490// CHECK: [[VMINV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32> [[TMP1]]) #4 22491// CHECK: ret i32 [[VMINV_S32_I]] 22492int32_t test_vminv_s32(int32x2_t a) { 22493 return vminv_s32(a); 22494} 22495 22496// CHECK-LABEL: define i32 @test_vminv_u32(<2 x i32> %a) #0 { 22497// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 22498// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 22499// CHECK: [[VMINV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v2i32(<2 x i32> [[TMP1]]) #4 22500// CHECK: ret i32 [[VMINV_U32_I]] 22501uint32_t test_vminv_u32(uint32x2_t a) { 22502 return vminv_u32(a); 22503} 22504 22505// CHECK-LABEL: define i32 @test_vmaxv_s32(<2 x i32> %a) #0 { 22506// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 22507// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 22508// CHECK: [[VMAXV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> [[TMP1]]) #4 22509// CHECK: ret i32 [[VMAXV_S32_I]] 22510int32_t test_vmaxv_s32(int32x2_t a) { 22511 return vmaxv_s32(a); 22512} 22513 22514// CHECK-LABEL: define i32 @test_vmaxv_u32(<2 x i32> %a) #0 { 22515// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 22516// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 22517// CHECK: [[VMAXV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v2i32(<2 x i32> [[TMP1]]) #4 22518// CHECK: ret i32 [[VMAXV_U32_I]] 22519uint32_t test_vmaxv_u32(uint32x2_t a) { 22520 return vmaxv_u32(a); 22521} 22522 22523// CHECK-LABEL: define i32 @test_vaddv_s32(<2 x i32> %a) #0 { 22524// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 22525// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 22526// CHECK: [[VADDV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32> [[TMP1]]) #4 22527// CHECK: ret i32 [[VADDV_S32_I]] 22528int32_t test_vaddv_s32(int32x2_t a) { 22529 return vaddv_s32(a); 22530} 22531 22532// CHECK-LABEL: define i32 @test_vaddv_u32(<2 x i32> %a) #0 { 22533// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 22534// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 22535// CHECK: [[VADDV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v2i32(<2 x i32> [[TMP1]]) #4 22536// CHECK: ret i32 [[VADDV_U32_I]] 22537uint32_t test_vaddv_u32(uint32x2_t a) { 22538 return vaddv_u32(a); 22539} 22540 22541// CHECK-LABEL: define i64 @test_vaddlv_s32(<2 x i32> %a) #0 { 22542// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 22543// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 22544// CHECK: [[VADDLV_S32_I:%.*]] = call i64 @llvm.aarch64.neon.saddlv.i64.v2i32(<2 x i32> [[TMP1]]) #4 22545// CHECK: ret i64 [[VADDLV_S32_I]] 22546int64_t test_vaddlv_s32(int32x2_t a) { 22547 return vaddlv_s32(a); 22548} 22549 22550// CHECK-LABEL: define i64 @test_vaddlv_u32(<2 x i32> %a) #0 { 22551// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 22552// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 22553// CHECK: [[VADDLV_U32_I:%.*]] = call i64 @llvm.aarch64.neon.uaddlv.i64.v2i32(<2 x i32> [[TMP1]]) #4 22554// CHECK: ret i64 [[VADDLV_U32_I]] 22555uint64_t test_vaddlv_u32(uint32x2_t a) { 22556 return vaddlv_u32(a); 22557} 22558